changeset 4001:4767c78f3504 hs24-b23

Merge
author amurillo
date Sat, 20 Oct 2012 00:08:35 -0700
parents 21a84336cab3 (current diff) 81d17d9ab7c9 (diff)
children f2b48605f612
files .hgtags .jcheck/conf agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java make/hotspot_version make/jprt.properties make/linux/makefiles/gcc.make make/linux/makefiles/vm.make make/solaris/makefiles/defs.make make/windows/makefiles/defs.make src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp src/cpu/x86/vm/c1_LIRAssembler_x86.cpp src/cpu/x86/vm/c1_LIRGenerator_x86.cpp src/cpu/x86/vm/sharedRuntime_x86_64.cpp src/cpu/x86/vm/vm_version_x86.cpp src/cpu/x86/vm/x86_32.ad src/cpu/x86/vm/x86_64.ad src/os/bsd/vm/os_bsd.cpp src/os/linux/vm/os_linux.cpp src/os/solaris/vm/os_solaris.cpp src/os/windows/vm/os_windows.cpp src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp src/share/vm/c1/c1_LIRGenerator.cpp src/share/vm/c1/c1_ValueMap.hpp src/share/vm/classfile/classFileParser.cpp src/share/vm/classfile/symbolTable.cpp src/share/vm/classfile/symbolTable.hpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/compiler/compilerOracle.cpp src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.cpp src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp src/share/vm/memory/cardTableModRefBS.hpp src/share/vm/memory/dump.cpp src/share/vm/memory/universe.hpp src/share/vm/oops/instanceKlass.cpp src/share/vm/oops/instanceKlass.hpp src/share/vm/oops/instanceKlassKlass.cpp src/share/vm/opto/callGenerator.cpp src/share/vm/opto/chaitin.cpp src/share/vm/opto/library_call.cpp src/share/vm/opto/loopnode.cpp src/share/vm/opto/stringopts.cpp src/share/vm/opto/type.cpp src/share/vm/prims/jvmtiClassFileReconstituter.cpp src/share/vm/prims/jvmtiEnv.cpp src/share/vm/prims/jvmtiRedefineClasses.cpp src/share/vm/prims/methodHandleWalk.cpp src/share/vm/prims/methodHandleWalk.hpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/globals.cpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/os.hpp src/share/vm/runtime/vmStructs.cpp src/share/vm/utilities/hashtable.cpp src/share/vm/utilities/hashtable.hpp src/share/vm/utilities/vmError.cpp test/runtime/6294277/SourceDebugExtension.java
diffstat 859 files changed, 63199 insertions(+), 34817 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Wed Sep 26 21:45:41 2012 -0700
+++ b/.hgtags	Sat Oct 20 00:08:35 2012 -0700
@@ -336,3 +336,53 @@
 042438023396a3886a060ca14a06a4664ef85d9d hs23.6-b03
 4c525a19affa0f69c502a74a01d33f0dd74a1ade jdk7u10-b08
 d14ad18fc5168983f693bb173fa353a3846369ad jdk7u10-b09
+3b24e7e01d20ca590d0f86b1222bb7c3f1a2aa2d jdk8-b27
+975c4105f1e2ef1190a75b77124033f1fd4290b5 hs24-b01
+b183b0863611b85dbac16f3b08b40ba978756d19 jdk8-b28
+030b5306d60f140e822e4a6d301744cb110ff0c8 hs24-b02
+b45b5c564098c58ea69e7cff3f7d341f0254dd1d jdk8-b29
+d61761bf305031c94f7f8eca49abd978b7d3c5da jdk8-b30
+dfae0140457cfb2c381d7679735fbedbae862c62 hs24-b03
+f4767e53d6e0d5da7e3f1775904076cce54247c1 hs24-b04
+0cd147eaa673d1642b2f466f5dc257cf192db524 jdk8-b31
+27863e4586de38be7dd17da4163f542038f4d1d7 hs24-b05
+25410a347ebb0bef166c4338a90d9dea82463a20 jdk8-b32
+cd47da9383cd932cb2b659064057feafa2a91134 hs24-b06
+785bcf415ead2eaa5f6677aaf528481008140bac jdk8-b33
+7c6aba65acd2c334f1c3512b574f9038cddac24b hs24-b07
+f284b08835584517c1ca3dd67341f569e763841f jdk8-b34
+f621660a297baa48fab9dca28e99d318826e8304 jdk8-b35
+dff6e3459210f8dd0430b9b03ccc99280560da30 hs24-b08
+50b4400ca1ecb2ac2fde35f5e53ec8f04b86be7f jdk8-b36
+bfcf92bfefb82da00f7fdbf0d9273feaa0a9456d jdk8-b37
+7d5ec8bf38d1b12e0e09ec381f10976b8beede3b hs24-b09
+637c3f5f068f88fb9ec9c5867341cf59fd5ebedc jdk8-b38
+73147e6c48813b5fee904aa33f79a77103250ff4 hs24-b10
+96a403721094ecdaf6a1f4f52ebd0a82e07df199 jdk8-b39
+14b0e07ab9a6fa1662414496b7e07ac8450cf517 hs24-b11
+ff9decc8235d5af80ea45fda4ecbe643ea252564 jdk8-b40
+785573170238f0eae6dc8e22ecf1050fbc9ea055 hs24-b12
+37add4fa0296705f67481e1fd50e2900cd25e39b jdk8-b41
+bd568544be7fcd12a9327e6c448592198d57b043 hs24-b13
+55954061c6e8750ea39a63523fd65d580db6eeb1 jdk8-b42
+e77b8e0ed1f84e3e268239e276c7ab64fa573baa jdk8-b43
+5ba29a1db46ecb80a321ca873adb56a3fe6ad320 hs24-b14
+831e5c76a20af18f3c08c5a95ed31be0e128a010 jdk8-b44
+9d5f20961bc5846fa8d098d534effafbbdae0a58 jdk8-b45
+40e5a3f2907ed02b335c7caa8ecf068cc801380d hs24-b15
+cf37a594c38db2ea926954154636f9f81da2e032 jdk8-b46
+0c7bb1f4f9c8062b5c5bfa56b3bdca44839b4109 jdk8-b47
+66b0450071c1534e014b131892cc86b63f1d009c hs24-b16
+1e26f61bbb521642639f56fae11326f1932f5a7d jdk8-b48
+bd54fe36b5e50f9ef1e30a5047b27fee5297e268 hs24-b17
+e3619706a7253540a2d94e9e841acaab8ace7038 jdk8-b49
+72e0362c3f0cfacbbac8af8a5b9d2e182f21c17b hs24-b18
+58f237a9e83af6ded0d2e2c81d252cd47c0f4c45 jdk8-b50
+3b3ad16429701b2eb6712851c2f7c5a726eb2cbe hs24-b19
+663fc23da8d51c4c0552cbcb17ffc85f5869d4fd jdk8-b51
+4c8f2a12e757e7a808aa85827573e09f75d7459f hs24-b20
+6d0436885201db3f581523344a734793bb989549 jdk8-b52
+54240c1b8e87758f28da2c6a569a926fd9e0910a jdk8-b53
+9e3ae661284dc04185b029d85440fe7811f1ed07 hs24-b21
+e8fb566b94667f88462164defa654203f0ab6820 jdk8-b54
+09ea7e0752b306b8ae74713aeb4eb6263e1c6836 hs24-b22
--- a/agent/make/saenv.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/make/saenv.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -26,7 +26,7 @@
 # This file sets common environment variables for all SA scripts
 
 OS=`uname`
-STARTDIR=`dirname $0`
+STARTDIR=`(cd \`dirname $0 \`; pwd)`
 ARCH=`uname -m`
 
 if [ "x$SA_JAVA" = "x" ]; then
--- a/agent/make/start-debug-server-proc.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/make/start-debug-server-proc.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -25,10 +25,11 @@
 
 . `dirname $0`/saenv.sh
 
-if [ -f $STARTDIR/sa.jar ] ; then
-  CP=$STARTDIR/sa.jar
+if [ -f $STARTDIR/../lib/sa-jdi.jar ] ; then
+  CP=$STARTDIR/../lib/sa-jdi.jar
 else
   CP=$STARTDIR/../build/classes
 fi
 
-$SA_JAVA -classpath $CP ${OPTIONS} -Djava.rmi.server.codebase=file:/$CP -Djava.security.policy=$STARTDIR\/grantAll.policy sun.jvm.hotspot.DebugServer $*
+$STARTDIR/java -classpath $CP ${OPTIONS} -Djava.rmi.server.codebase=file://$CP -Djava.security.policy=${STARTDIR}/grantAll.policy sun.jvm.hotspot.DebugServer $*
+
--- a/agent/src/os/linux/LinuxDebuggerLocal.c	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/os/linux/LinuxDebuggerLocal.c	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,11 +55,11 @@
 #define THROW_NEW_DEBUGGER_EXCEPTION_(str, value) { throw_new_debugger_exception(env, str); return value; }
 #define THROW_NEW_DEBUGGER_EXCEPTION(str) { throw_new_debugger_exception(env, str); return;}
 
-static void throw_new_debugger_exception(JNIEnv* env, const char* errMsg) {
+void throw_new_debugger_exception(JNIEnv* env, const char* errMsg) {
   (*env)->ThrowNew(env, (*env)->FindClass(env, "sun/jvm/hotspot/debugger/DebuggerException"), errMsg);
 }
 
-static struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj) {
+struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj) {
   jlong ptr = (*env)->GetLongField(env, this_obj, p_ps_prochandle_ID);
   return (struct ps_prochandle*)(intptr_t)ptr;
 }
@@ -280,6 +280,7 @@
   return (err == PS_OK)? array : 0;
 }
 
+#if defined(i386) || defined(ia64) || defined(amd64) || defined(sparc) || defined(sparcv9)
 JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
   (JNIEnv *env, jobject this_obj, jint lwp_id) {
 
@@ -410,3 +411,4 @@
   (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT);
   return array;
 }
+#endif
--- a/agent/src/os/linux/libproc.h	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/os/linux/libproc.h	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,15 @@
 #ifndef _LIBPROC_H_
 #define _LIBPROC_H_
 
+#include <jni.h>
 #include <unistd.h>
 #include <stdint.h>
 #include "proc_service.h"
 
+#if defined(arm) || defined(ppc)
+#include "libproc_md.h"
+#endif
+
 #if defined(sparc) || defined(sparcv9)
 /*
   If _LP64 is defined ptrace.h should be taken from /usr/include/asm-sparc64
@@ -139,4 +144,8 @@
 // address->nearest symbol lookup. return NULL for no symbol
 const char* symbol_for_pc(struct ps_prochandle* ph, uintptr_t addr, uintptr_t* poffset);
 
+struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj);
+
+void throw_new_debugger_exception(JNIEnv* env, const char* errMsg);
+
 #endif //__LIBPROC_H_
--- a/agent/src/os/linux/ps_core.c	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/os/linux/ps_core.c	Sat Oct 20 00:08:35 2012 -0700
@@ -440,7 +440,7 @@
       int j = 0;
       print_debug("---- sorted virtual address map ----\n");
       for (j = 0; j < ph->core->num_maps; j++) {
-        print_debug("base = 0x%lx\tsize = %d\n", ph->core->map_array[j]->vaddr,
+        print_debug("base = 0x%lx\tsize = %zu\n", ph->core->map_array[j]->vaddr,
                                          ph->core->map_array[j]->memsz);
       }
    }
--- a/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -549,7 +549,13 @@
                     machDesc = new MachineDescriptionSPARC32Bit();
             }
         } else {
-            throw new DebuggerException("Linux only supported on x86/ia64/amd64/sparc/sparc64");
+          try {
+            machDesc = (MachineDescription)
+              Class.forName("sun.jvm.hotspot.debugger.MachineDescription" +
+                            cpu.toUpperCase()).newInstance();
+          } catch (Exception e) {
+            throw new DebuggerException("Linux not supported on machine type " + cpu);
+          }
         }
 
         LinuxDebuggerLocal dbg =
--- a/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpotAgent.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpotAgent.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -737,9 +737,16 @@
                machDesc = new MachineDescriptionSPARC32Bit();
             }
         } else {
-            throw new DebuggerException("Linux only supported on x86/ia64/amd64/sparc/sparc64");
+          try {
+            machDesc = (MachineDescription)
+              Class.forName("sun.jvm.hotspot.debugger.MachineDescription" +
+              cpu.toUpperCase()).newInstance();
+          } catch (Exception e) {
+            throw new DebuggerException("unsupported machine type");
+          }
         }
 
+
         // Note we do not use a cache for the local debugger in server
         // mode; it will be taken care of on the client side (once remote
         // debugging is implemented).
--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Sat Oct 20 00:08:35 2012 -0700
@@ -93,7 +93,6 @@
   public boolean isUncommonTrapStub()   { return false; }
   public boolean isExceptionStub()      { return false; }
   public boolean isSafepointStub()      { return false; }
-  public boolean isRicochetBlob()       { return false; }
   public boolean isAdapterBlob()        { return false; }
 
   // Fine grain nmethod support: isNmethod() == isJavaMethod() || isNativeMethod() || isOSRMethod()
--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java	Sat Oct 20 00:08:35 2012 -0700
@@ -57,7 +57,6 @@
     virtualConstructor.addMapping("BufferBlob", BufferBlob.class);
     virtualConstructor.addMapping("nmethod", NMethod.class);
     virtualConstructor.addMapping("RuntimeStub", RuntimeStub.class);
-    virtualConstructor.addMapping("RicochetBlob", RicochetBlob.class);
     virtualConstructor.addMapping("AdapterBlob", AdapterBlob.class);
     virtualConstructor.addMapping("MethodHandlesAdapterBlob", MethodHandlesAdapterBlob.class);
     virtualConstructor.addMapping("SafepointBlob", SafepointBlob.class);
@@ -127,10 +126,6 @@
       Assert.that(result.blobContains(start) || result.blobContains(start.addOffsetTo(8)),
                                                                     "found wrong CodeBlob");
     }
-    if (result.isRicochetBlob()) {
-      // This should probably be done for other SingletonBlobs
-      return VM.getVM().ricochetBlob();
-    }
     return result;
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.code;
-
-import java.util.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-/** RicochetBlob (currently only used by Compiler 2) */
-
-public class RicochetBlob extends SingletonBlob {
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static void initialize(TypeDataBase db) {
-    Type type = db.lookupType("RicochetBlob");
-
-    bounceOffsetField                = type.getCIntegerField("_bounce_offset");
-    exceptionOffsetField             = type.getCIntegerField("_exception_offset");
-  }
-
-  private static CIntegerField bounceOffsetField;
-  private static CIntegerField exceptionOffsetField;
-
-  public RicochetBlob(Address addr) {
-    super(addr);
-  }
-
-  public boolean isRicochetBlob() {
-    return true;
-  }
-
-  public Address bounceAddr() {
-    return codeBegin().addOffsetTo(bounceOffsetField.getValue(addr));
-  }
-
-  public boolean returnsToBounceAddr(Address pc) {
-    Address bouncePc = bounceAddr();
-    return (pc.equals(bouncePc) || pc.addOffsetTo(Frame.pcReturnOffset()).equals(bouncePc));
-  }
-
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/ThreadContext.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/ThreadContext.java	Sat Oct 20 00:08:35 2012 -0700
@@ -24,6 +24,8 @@
 
 package sun.jvm.hotspot.debugger;
 
+import sun.jvm.hotspot.debugger.cdbg.*;
+
 /** This is a placeholder interface for a thread's context, containing
     only integer registers (no floating-point ones). What it contains
     is platform-dependent. Not all registers are guaranteed to be
@@ -54,4 +56,6 @@
   /** Set the value of the specified register (0..getNumRegisters() -
       1) as an Address */
   public void setRegisterAsAddress(int index, Address value);
+
+  public CFrame getTopFrame(Debugger dbg);
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/amd64/AMD64ThreadContext.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/amd64/AMD64ThreadContext.java	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.amd64;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Specifies the thread context on amd64 platforms; only a sub-portion
  * of the context is guaranteed to be present on all operating
@@ -98,6 +99,10 @@
         return data[index];
     }
 
+    public CFrame getTopFrame(Debugger dbg) {
+        return null;
+    }
+
     /** This can't be implemented in this class since we would have to
      * tie the implementation to, for example, the debugging system */
     public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/ia64/IA64ThreadContext.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/ia64/IA64ThreadContext.java	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.ia64;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Specifies the thread context on ia64 platform; only a sub-portion
     of the context is guaranteed to be present on all operating
@@ -172,6 +173,10 @@
     return data[index];
   }
 
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
   /** This can't be implemented in this class since we would have to
       tie the implementation to, for example, the debugging system */
   public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -107,7 +107,9 @@
        if (pc == null) return null;
        return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize());
     } else {
-       throw new DebuggerException(cpu + " is not yet supported");
+       // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
+       ThreadContext context = (ThreadContext) thread.getContext();
+       return context.getTopFrame(dbg);
     }
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 package sun.jvm.hotspot.debugger.linux;
 
+import java.lang.reflect.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.debugger.linux.amd64.*;
 import sun.jvm.hotspot.debugger.linux.ia64.*;
@@ -41,8 +42,16 @@
          return new LinuxIA64ThreadContext(dbg);
       } else if (cpu.equals("sparc")) {
          return new LinuxSPARCThreadContext(dbg);
-      } else {
-         throw new RuntimeException("cpu " + cpu + " is not yet supported");
+      } else  {
+        try {
+          Class tcc = Class.forName("sun.jvm.hotspot.debugger.linux." +
+             cpu.toLowerCase() + ".Linux" + cpu.toUpperCase() +
+             "ThreadContext");
+          Constructor[] ctcc = tcc.getConstructors();
+          return (ThreadContext)ctcc[0].newInstance(dbg);
+        } catch (Exception e) {
+          throw new RuntimeException("cpu " + cpu + " is not yet supported");
+        }
       }
    }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 import java.io.*;
 import java.net.*;
 import java.util.*;
+import java.lang.reflect.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.proc.amd64.*;
@@ -86,7 +87,16 @@
             pcRegIndex = AMD64ThreadContext.RIP;
             fpRegIndex = AMD64ThreadContext.RBP;
         } else {
+          try {
+            Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." +
+               cpu.toLowerCase() + ".Proc" + cpu.toUpperCase() +
+               "ThreadFactory");
+            Constructor[] ctfc = tfc.getConstructors();
+            threadFactory = (ProcThreadFactory)ctfc[0].newInstance(this);
+          } catch (Exception e) {
             throw new RuntimeException("Thread access for CPU architecture " + PlatformInfo.getCPU() + " not yet supported");
+            // Note: pcRegIndex and fpRegIndex do not appear to be referenced
+          }
         }
         if (useCache) {
             // Cache portion of the remote process's address space.
@@ -375,7 +385,11 @@
         int pagesize = getPageSize0();
         if (pagesize == -1) {
             // return the hard coded default value.
-            pagesize = (PlatformInfo.getCPU().equals("x86"))? 4096 : 8192;
+            if (PlatformInfo.getCPU().equals("sparc") ||
+                PlatformInfo.getCPU().equals("amd64") )
+               pagesize = 8196;
+            else
+               pagesize = 4096;
         }
         return pagesize;
     }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 
 import java.rmi.*;
 import java.util.*;
+import java.lang.reflect.*;
 
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
@@ -70,7 +71,18 @@
         cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
         unalignedAccessesOkay = true;
       } else {
-        throw new DebuggerException("Thread access for CPU architecture " + cpu + " not yet supported");
+        try {
+          Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." +
+            cpu.toLowerCase() + ".Remote" + cpu.toUpperCase() +
+            "ThreadFactory");
+          Constructor[] ctf = tf.getConstructors();
+          threadFactory = (RemoteThreadFactory)ctf[0].newInstance(this);
+        } catch (Exception e) {
+          throw new DebuggerException("Thread access for CPU architecture " + cpu + " not yet supported");
+        }
+        cachePageSize = 4096;
+        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+        unalignedAccessesOkay = false;
       }
 
       // Cache portion of the remote process's address space.
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/sparc/SPARCThreadContext.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/sparc/SPARCThreadContext.java	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.sparc;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Currently provides just the minimal information necessary to get
     stack traces working. FIXME: currently hardwired for v9 -- will
@@ -124,6 +125,10 @@
     return data[index];
   }
 
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
   /** This can't be implemented in this class since we would have to
       tie the implementation to, for example, the debugging system */
   public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/x86/X86ThreadContext.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/x86/X86ThreadContext.java	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.x86;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Specifies the thread context on x86 platforms; only a sub-portion
     of the context is guaranteed to be present on all operating
@@ -109,6 +110,10 @@
     return data[index];
   }
 
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
   /** This can't be implemented in this class since we would have to
       tie the implementation to, for example, the debugging system */
   public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSeq.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSeq.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,7 +42,7 @@
 public class HeapRegionSeq extends VMObject {
     // HeapRegion** _regions;
     static private AddressField regionsField;
-    // size_t _length;
+    // uint _length;
     static private CIntegerField lengthField;
 
     static {
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java	Sat Oct 20 00:08:35 2012 -0700
@@ -40,9 +40,9 @@
 // Mirror class for HeapRegionSetBase. Represents a group of regions.
 
 public class HeapRegionSetBase extends VMObject {
-    // size_t _length;
+    // uint _length;
     static private CIntegerField lengthField;
-    // size_t _region_num;
+    // uint _region_num;
     static private CIntegerField regionNumField;
     // size_t _total_used_bytes;
     static private CIntegerField totalUsedBytesField;
--- a/agent/src/share/classes/sun/jvm/hotspot/jdi/VirtualMachineImpl.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/jdi/VirtualMachineImpl.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -889,15 +889,9 @@
         Klass kls = ((ReferenceTypeImpl)type).ref();
         if (kls instanceof InstanceKlass) {
             InstanceKlass ik = (InstanceKlass) kls;
-            if (ik.isInterface()) {
-                if (ik.nofImplementors() == 0L) {
-                    return new ArrayList(0);
-                }
-            } else {
-                // if the Klass is final or if there are no subklasses loaded yet
-                if (ik.getAccessFlagsObj().isFinal() || ik.getSubklassKlass() == null) {
-                    includeSubtypes = false;
-                }
+            // if the Klass is final or if there are no subklasses loaded yet
+            if (ik.getAccessFlagsObj().isFinal() || ik.getSubklassKlass() == null) {
+                includeSubtypes = false;
             }
         } else {
             // no subtypes for primitive array types
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/AccessFlags.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/AccessFlags.java	Sat Oct 20 00:08:35 2012 -0700
@@ -81,6 +81,7 @@
   // field flags
   public boolean fieldAccessWatched () { return (flags & JVM_ACC_FIELD_ACCESS_WATCHED) != 0; }
   public boolean fieldModificationWatched() { return (flags & JVM_ACC_FIELD_MODIFICATION_WATCHED) != 0; }
+  public boolean fieldHasGenericSignature() { return (flags & JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE)!= 0; }
 
   public void printOn(PrintStream tty) {
     // prints only .class flags and not the hotspot internal flags
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,15 +47,11 @@
   private static int HAS_LINENUMBER_TABLE;
   private static int HAS_CHECKED_EXCEPTIONS;
   private static int HAS_LOCALVARIABLE_TABLE;
+  private static int HAS_EXCEPTION_TABLE;
 
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type                  = db.lookupType("constMethodOopDesc");
-    // Backpointer to non-const methodOop
-    method                     = new OopField(type.getOopField("_method"), 0);
-    // The exception handler table. 4-tuples of ints [start_pc, end_pc,
-    // handler_pc, catch_type index] For methods with no exceptions the
-    // table is pointing to Universe::the_empty_int_array
-    exceptionTable             = new OopField(type.getOopField("_exception_table"), 0);
+    constants                  = new OopField(type.getOopField("_constants"), 0);
     constMethodSize            = new CIntField(type.getCIntegerField("_constMethod_size"), 0);
     flags                      = new ByteField(type.getJByteField("_flags"), 0);
 
@@ -63,12 +59,14 @@
     HAS_LINENUMBER_TABLE      = db.lookupIntConstant("constMethodOopDesc::_has_linenumber_table").intValue();
     HAS_CHECKED_EXCEPTIONS     = db.lookupIntConstant("constMethodOopDesc::_has_checked_exceptions").intValue();
     HAS_LOCALVARIABLE_TABLE   = db.lookupIntConstant("constMethodOopDesc::_has_localvariable_table").intValue();
+    HAS_EXCEPTION_TABLE       = db.lookupIntConstant("constMethodOopDesc::_has_exception_table").intValue();
 
     // Size of Java bytecodes allocated immediately after constMethodOop.
     codeSize                   = new CIntField(type.getCIntegerField("_code_size"), 0);
     nameIndex                  = new CIntField(type.getCIntegerField("_name_index"), 0);
     signatureIndex             = new CIntField(type.getCIntegerField("_signature_index"), 0);
     genericSignatureIndex      = new CIntField(type.getCIntegerField("_generic_signature_index"),0);
+    idnum                      = new CIntField(type.getCIntegerField("_method_idnum"), 0);
 
     // start of byte code
     bytecodeOffset = type.getSize();
@@ -78,6 +76,9 @@
 
     type                       = db.lookupType("LocalVariableTableElement");
     localVariableTableElementSize = type.getSize();
+
+    type                       = db.lookupType("ExceptionTableElement");
+    exceptionTableElementSize = type.getSize();
   }
 
   ConstMethod(OopHandle handle, ObjectHeap heap) {
@@ -85,28 +86,31 @@
   }
 
   // Fields
-  private static OopField  method;
-  private static OopField  exceptionTable;
+  private static OopField  constants;
   private static CIntField constMethodSize;
   private static ByteField flags;
   private static CIntField codeSize;
   private static CIntField nameIndex;
   private static CIntField signatureIndex;
   private static CIntField genericSignatureIndex;
+  private static CIntField idnum;
 
   // start of bytecode
   private static long bytecodeOffset;
 
   private static long checkedExceptionElementSize;
   private static long localVariableTableElementSize;
+  private static long exceptionTableElementSize;
+
+  public Method getMethod() {
+    InstanceKlass ik = (InstanceKlass)getConstants().getPoolHolder();
+    ObjArray methods = ik.getMethods();
+    return (Method)methods.getObjAt(getIdNum());
+  }
 
   // Accessors for declared fields
-  public Method getMethod() {
-    return (Method) method.getValue(this);
-  }
-
-  public TypeArray getExceptionTable() {
-    return (TypeArray) exceptionTable.getValue(this);
+  public ConstantPool getConstants() {
+    return (ConstantPool) constants.getValue(this);
   }
 
   public long getConstMethodSize() {
@@ -133,6 +137,10 @@
     return genericSignatureIndex.getValue(this);
   }
 
+  public long getIdNum() {
+    return idnum.getValue(this);
+  }
+
   public Symbol getName() {
     return getMethod().getName();
   }
@@ -223,8 +231,7 @@
   public void iterateFields(OopVisitor visitor, boolean doVMFields) {
     super.iterateFields(visitor, doVMFields);
     if (doVMFields) {
-      visitor.doOop(method, true);
-      visitor.doOop(exceptionTable, true);
+      visitor.doOop(constants, true);
       visitor.doCInt(constMethodSize, true);
       visitor.doByte(flags, true);
       visitor.doCInt(codeSize, true);
@@ -315,6 +322,23 @@
     return ret;
   }
 
+  public boolean hasExceptionTable() {
+    return (getFlags() & HAS_EXCEPTION_TABLE) != 0;
+  }
+
+  public ExceptionTableElement[] getExceptionTable() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(hasExceptionTable(), "should only be called if table is present");
+    }
+    ExceptionTableElement[] ret = new ExceptionTableElement[getExceptionTableLength()];
+    long offset = offsetOfExceptionTable();
+    for (int i = 0; i < ret.length; i++) {
+      ret[i] = new ExceptionTableElement(getHandle(), offset);
+      offset += exceptionTableElementSize;
+    }
+    return ret;
+  }
+
   public boolean hasCheckedExceptions() {
     return (getFlags() & HAS_CHECKED_EXCEPTIONS) != 0;
   }
@@ -404,7 +428,10 @@
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(hasLocalVariableTable(), "should only be called if table is present");
     }
-    if (hasCheckedExceptions()) {
+
+    if (hasExceptionTable()) {
+      return offsetOfExceptionTable() - 2;
+    } else if (hasCheckedExceptions()) {
       return offsetOfCheckedExceptions() - 2;
     } else {
       return offsetOfLastU2Element();
@@ -421,4 +448,33 @@
     return offset;
   }
 
+  private int getExceptionTableLength() {
+    if (hasExceptionTable()) {
+      return (int) getHandle().getCIntegerAt(offsetOfExceptionTableLength(), 2, true);
+    } else {
+      return 0;
+    }
+  }
+
+  private long offsetOfExceptionTableLength() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(hasExceptionTable(), "should only be called if table is present");
+    }
+    if (hasCheckedExceptions()) {
+      return offsetOfCheckedExceptions() - 2;
+    } else {
+      return offsetOfLastU2Element();
+    }
+  }
+
+  private long offsetOfExceptionTable() {
+    long offset = offsetOfExceptionTableLength();
+    long length = getExceptionTableLength();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(length > 0, "should only be called if table is present");
+    }
+    offset -= length * exceptionTableElementSize;
+    return offset;
+  }
+
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Sat Oct 20 00:08:35 2012 -0700
@@ -648,7 +648,12 @@
   }
 
   public void printValueOn(PrintStream tty) {
-    tty.print("ConstantPool for " + getPoolHolder().getName().asString());
+    Oop holder = poolHolder.getValue(this);
+    if (holder instanceof Klass) {
+      tty.print("ConstantPool for " + ((Klass)holder).getName().asString());
+    } else {
+      tty.print("ConstantPool for partially loaded class");
+    }
   }
 
   public long getObjectSize() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ExceptionTableElement.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.oops;
+
+import java.io.*;
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.interpreter.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class ExceptionTableElement {
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
+    Type type            = db.lookupType("ExceptionTableElement");
+    offsetOfStartPC      = type.getCIntegerField("start_pc").getOffset();
+    offsetOfEndPC        = type.getCIntegerField("end_pc").getOffset();
+    offsetOfHandlerPC    = type.getCIntegerField("handler_pc").getOffset();
+    offsetOfCatchTypeIndex = type.getCIntegerField("catch_type_index").getOffset();
+  }
+
+  private static long offsetOfStartPC;
+  private static long offsetOfEndPC;
+  private static long offsetOfHandlerPC;
+  private static long offsetOfCatchTypeIndex;
+
+  private OopHandle handle;
+  private long      offset;
+
+  public ExceptionTableElement(OopHandle handle, long offset) {
+    this.handle = handle;
+    this.offset = offset;
+  }
+
+  public int getStartPC() {
+    return (int) handle.getCIntegerAt(offset + offsetOfStartPC, 2, true);
+  }
+
+  public int getEndPC() {
+    return (int) handle.getCIntegerAt(offset + offsetOfEndPC, 2, true);
+  }
+
+  public int getHandlerPC() {
+    return (int) handle.getCIntegerAt(offset + offsetOfHandlerPC, 2, true);
+  }
+
+  public int getCatchTypeIndex() {
+    return (int) handle.getCIntegerAt(offset + offsetOfCatchTypeIndex, 2, true);
+  }
+}
+
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/GenerateOopMap.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/GenerateOopMap.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -651,10 +651,11 @@
     boolean fellThrough = false;  // False to get first BB marked.
 
     // First mark all exception handlers as start of a basic-block
-    TypeArray excps = method().getExceptionTable();
-    for(int i = 0; i < excps.getLength(); i += 4) {
-      int handler_pc_idx = i+2;
-      markBB(excps.getIntAt(handler_pc_idx), null);
+    if (method().hasExceptionTable()) {
+      ExceptionTableElement[] excps = method().getExceptionTable();
+      for(int i = 0; i < excps.length; i++) {
+        markBB(excps[i].getHandlerPC(), null);
+      }
     }
 
     // Then iterate through the code
@@ -891,14 +892,15 @@
 
     // Mark entry basic block as alive and all exception handlers
     _basic_blocks[0].markAsAlive();
-    TypeArray excps = method().getExceptionTable();
-    for(int i = 0; i < excps.getLength(); i += 4) {
-      int handler_pc_idx = i+2;
-      BasicBlock bb = getBasicBlockAt(excps.getIntAt(handler_pc_idx));
-      // If block is not already alive (due to multiple exception handlers to same bb), then
-      // make it alive
-      if (bb.isDead())
-        bb.markAsAlive();
+    if (method().hasExceptionTable()) {
+      ExceptionTableElement[] excps = method().getExceptionTable();
+      for(int i = 0; i < excps.length; i ++) {
+        BasicBlock bb = getBasicBlockAt(excps[i].getHandlerPC());
+        // If block is not already alive (due to multiple exception handlers to same bb), then
+        // make it alive
+        if (bb.isDead())
+          bb.markAsAlive();
+      }
     }
 
     BytecodeStream bcs = new BytecodeStream(_method);
@@ -1468,12 +1470,12 @@
 
     if (_has_exceptions) {
       int bci = itr.bci();
-      TypeArray exct   = method().getExceptionTable();
-      for(int i = 0; i< exct.getLength(); i+=4) {
-        int start_pc   = exct.getIntAt(i);
-        int end_pc     = exct.getIntAt(i+1);
-        int handler_pc = exct.getIntAt(i+2);
-        int catch_type = exct.getIntAt(i+3);
+      ExceptionTableElement[] exct   = method().getExceptionTable();
+      for(int i = 0; i< exct.length; i++) {
+        int start_pc   = exct[i].getStartPC();
+        int end_pc     = exct[i].getEndPC();
+        int handler_pc = exct[i].getHandlerPC();
+        int catch_type = exct[i].getCatchTypeIndex();
 
         if (start_pc <= bci && bci < end_pc) {
           BasicBlock excBB = getBasicBlockAt(handler_pc);
@@ -2151,7 +2153,7 @@
     _conflict       = false;
     _max_locals     = (int) method().getMaxLocals();
     _max_stack      = (int) method().getMaxStack();
-    _has_exceptions = (method().getExceptionTable().getLength() > 0);
+    _has_exceptions = (method().hasExceptionTable());
     _nof_refval_conflicts = 0;
     _init_vars      = new ArrayList(5);  // There are seldom more than 5 init_vars
     _report_result  = false;
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,9 +50,7 @@
   private static int INITVAL_INDEX_OFFSET;
   private static int LOW_OFFSET;
   private static int HIGH_OFFSET;
-  private static int GENERIC_SIGNATURE_INDEX_OFFSET;
   private static int FIELD_SLOTS;
-  public static int IMPLEMENTORS_LIMIT;
 
   // ClassState constants
   private static int CLASS_STATE_UNPARSABLE_BY_GC;
@@ -70,13 +68,6 @@
     methodOrdering       = new OopField(type.getOopField("_method_ordering"), Oop.getHeaderSize());
     localInterfaces      = new OopField(type.getOopField("_local_interfaces"), Oop.getHeaderSize());
     transitiveInterfaces = new OopField(type.getOopField("_transitive_interfaces"), Oop.getHeaderSize());
-    nofImplementors      = new CIntField(type.getCIntegerField("_nof_implementors"), Oop.getHeaderSize());
-    IMPLEMENTORS_LIMIT   = db.lookupIntConstant("instanceKlass::implementors_limit").intValue();
-    implementors         = new OopField[IMPLEMENTORS_LIMIT];
-    for (int i = 0; i < IMPLEMENTORS_LIMIT; i++) {
-      long arrayOffset = Oop.getHeaderSize() + (i * db.getAddressSize());
-      implementors[i]    = new OopField(type.getOopField("_implementors[0]"), arrayOffset);
-    }
     fields               = new OopField(type.getOopField("_fields"), Oop.getHeaderSize());
     javaFieldsCount      = new CIntField(type.getCIntegerField("_java_fields_count"), Oop.getHeaderSize());
     constants            = new OopField(type.getOopField("_constants"), Oop.getHeaderSize());
@@ -107,7 +98,6 @@
     INITVAL_INDEX_OFFSET           = db.lookupIntConstant("FieldInfo::initval_index_offset").intValue();
     LOW_OFFSET                     = db.lookupIntConstant("FieldInfo::low_offset").intValue();
     HIGH_OFFSET                    = db.lookupIntConstant("FieldInfo::high_offset").intValue();
-    GENERIC_SIGNATURE_INDEX_OFFSET = db.lookupIntConstant("FieldInfo::generic_signature_offset").intValue();
     FIELD_SLOTS                    = db.lookupIntConstant("FieldInfo::field_slots").intValue();
     // read ClassState constants
     CLASS_STATE_UNPARSABLE_BY_GC = db.lookupIntConstant("instanceKlass::unparsable_by_gc").intValue();
@@ -136,8 +126,6 @@
   private static OopField  methodOrdering;
   private static OopField  localInterfaces;
   private static OopField  transitiveInterfaces;
-  private static CIntField nofImplementors;
-  private static OopField[] implementors;
   private static OopField  fields;
   private static CIntField javaFieldsCount;
   private static OopField  constants;
@@ -289,7 +277,25 @@
   }
 
   public short getFieldGenericSignatureIndex(int index) {
-    return getFields().getShortAt(index * FIELD_SLOTS + GENERIC_SIGNATURE_INDEX_OFFSET);
+    int len = (int)getFields().getLength();
+    int allFieldsCount = getAllFieldsCount();
+    int generic_signature_slot = allFieldsCount * FIELD_SLOTS;
+    for (int i = 0; i < allFieldsCount; i++) {
+      short flags = getFieldAccessFlags(i);
+      AccessFlags access = new AccessFlags(flags);
+      if (i == index) {
+        if (access.fieldHasGenericSignature()) {
+          return getFields().getShortAt(generic_signature_slot);
+        } else {
+          return 0;
+        }
+      } else {
+        if (access.fieldHasGenericSignature()) {
+          generic_signature_slot ++;
+        }
+      }
+    }
+    return 0;
   }
 
   public Symbol getFieldGenericSignature(int index) {
@@ -317,12 +323,20 @@
   public TypeArray getMethodOrdering()      { return (TypeArray)    methodOrdering.getValue(this); }
   public ObjArray  getLocalInterfaces()     { return (ObjArray)     localInterfaces.getValue(this); }
   public ObjArray  getTransitiveInterfaces() { return (ObjArray)     transitiveInterfaces.getValue(this); }
-  public long      nofImplementors()        { return                nofImplementors.getValue(this); }
-  public Klass     getImplementor()         { return (Klass)        implementors[0].getValue(this); }
-  public Klass     getImplementor(int i)    { return (Klass)        implementors[i].getValue(this); }
   public TypeArray getFields()              { return (TypeArray)    fields.getValue(this); }
   public int       getJavaFieldsCount()     { return                (int) javaFieldsCount.getValue(this); }
-  public int       getAllFieldsCount()      { return                (int)getFields().getLength() / FIELD_SLOTS; }
+  public int       getAllFieldsCount()      {
+    int len = (int)getFields().getLength();
+    int allFieldsCount = 0;
+    for (; allFieldsCount*FIELD_SLOTS < len; allFieldsCount++) {
+      short flags = getFieldAccessFlags(allFieldsCount);
+      AccessFlags access = new AccessFlags(flags);
+      if (access.fieldHasGenericSignature()) {
+        len --;
+      }
+    }
+    return allFieldsCount;
+  }
   public ConstantPool getConstants()        { return (ConstantPool) constants.getValue(this); }
   public Oop       getClassLoader()         { return                classLoader.getValue(this); }
   public Oop       getProtectionDomain()    { return                protectionDomain.getValue(this); }
@@ -359,6 +373,12 @@
     public static final int innerClassNextOffset = 4;
   };
 
+  public static interface EnclosingMethodAttributeOffset {
+    public static final int enclosing_method_class_index_offset = 0;
+    public static final int enclosing_method_method_index_offset = 1;
+    public static final int enclosing_method_attribute_size = 2;
+  };
+
   // refer to compute_modifier_flags in VM code.
   public long computeModifierFlags() {
     long access = getAccessFlags();
@@ -367,9 +387,14 @@
     int length = ( innerClassList == null)? 0 : (int) innerClassList.getLength();
     if (length > 0) {
        if (Assert.ASSERTS_ENABLED) {
-          Assert.that(length % InnerClassAttributeOffset.innerClassNextOffset == 0, "just checking");
+          Assert.that(length % InnerClassAttributeOffset.innerClassNextOffset == 0 ||
+                      length % InnerClassAttributeOffset.innerClassNextOffset == EnclosingMethodAttributeOffset.enclosing_method_attribute_size,
+                      "just checking");
        }
        for (int i = 0; i < length; i += InnerClassAttributeOffset.innerClassNextOffset) {
+          if (i == length - EnclosingMethodAttributeOffset.enclosing_method_attribute_size) {
+              break;
+          }
           int ioff = innerClassList.getShortAt(i +
                          InnerClassAttributeOffset.innerClassInnerClassInfoOffset);
           // 'ioff' can be zero.
@@ -419,9 +444,14 @@
     int length = ( innerClassList == null)? 0 : (int) innerClassList.getLength();
     if (length > 0) {
        if (Assert.ASSERTS_ENABLED) {
-         Assert.that(length % InnerClassAttributeOffset.innerClassNextOffset == 0, "just checking");
+         Assert.that(length % InnerClassAttributeOffset.innerClassNextOffset == 0 ||
+                     length % InnerClassAttributeOffset.innerClassNextOffset == EnclosingMethodAttributeOffset.enclosing_method_attribute_size,
+                     "just checking");
        }
        for (int i = 0; i < length; i += InnerClassAttributeOffset.innerClassNextOffset) {
+         if (i == length - EnclosingMethodAttributeOffset.enclosing_method_attribute_size) {
+             break;
+         }
          int ioff = innerClassList.getShortAt(i +
                         InnerClassAttributeOffset.innerClassInnerClassInfoOffset);
          // 'ioff' can be zero.
@@ -511,9 +541,6 @@
       visitor.doOop(methodOrdering, true);
       visitor.doOop(localInterfaces, true);
       visitor.doOop(transitiveInterfaces, true);
-      visitor.doCInt(nofImplementors, true);
-      for (int i = 0; i < IMPLEMENTORS_LIMIT; i++)
-        visitor.doOop(implementors[i], true);
       visitor.doOop(fields, true);
       visitor.doOop(constants, true);
       visitor.doOop(classLoader, true);
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,7 +48,6 @@
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type                  = db.lookupType("methodOopDesc");
     constMethod                = new OopField(type.getOopField("_constMethod"), 0);
-    constants                  = new OopField(type.getOopField("_constants"), 0);
     methodData                 = new OopField(type.getOopField("_method_data"), 0);
     methodSize                 = new CIntField(type.getCIntegerField("_method_size"), 0);
     maxStack                   = new CIntField(type.getCIntegerField("_max_stack"), 0);
@@ -83,7 +82,6 @@
 
   // Fields
   private static OopField  constMethod;
-  private static OopField  constants;
   private static OopField  methodData;
   private static CIntField methodSize;
   private static CIntField maxStack;
@@ -125,9 +123,10 @@
 
   // Accessors for declared fields
   public ConstMethod  getConstMethod()                { return (ConstMethod)  constMethod.getValue(this);       }
-  public ConstantPool getConstants()                  { return (ConstantPool) constants.getValue(this);         }
+  public ConstantPool getConstants()                  {
+    return getConstMethod().getConstants();
+  }
   public MethodData   getMethodData()                 { return (MethodData) methodData.getValue(this);          }
-  public TypeArray    getExceptionTable()             { return getConstMethod().getExceptionTable();            }
   /** WARNING: this is in words, not useful in this system; use getObjectSize() instead */
   public long         getMethodSize()                 { return                methodSize.getValue(this);        }
   public long         getMaxStack()                   { return                maxStack.getValue(this);          }
@@ -281,7 +280,6 @@
     super.iterateFields(visitor, doVMFields);
     if (doVMFields) {
       visitor.doOop(constMethod, true);
-      visitor.doOop(constants, true);
       visitor.doCInt(methodSize, true);
       visitor.doCInt(maxStack, true);
       visitor.doCInt(maxLocals, true);
@@ -329,6 +327,14 @@
     return null;
   }
 
+  public boolean hasExceptionTable() {
+    return getConstMethod().hasExceptionTable();
+  }
+
+  public ExceptionTableElement[] getExceptionTable() {
+    return getConstMethod().getExceptionTable();
+  }
+
   public boolean hasCheckedExceptions() {
     return getConstMethod().hasCheckedExceptions();
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/ClassConstants.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/ClassConstants.java	Sat Oct 20 00:08:35 2012 -0700
@@ -153,6 +153,8 @@
     public static final long JVM_ACC_FIELD_ACCESS_WATCHED         = 0x00002000;
     // field modification is watched by JVMTI
     public static final long JVM_ACC_FIELD_MODIFICATION_WATCHED   = 0x00008000;
+    // field has generic signature
+    public static final long JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE  = 0x00000800;
 
     // flags accepted by set_field_flags
     public static final long JVM_ACC_FIELD_FLAGS = 0x00008000 | JVM_ACC_WRITTEN_FLAGS;
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Sat Oct 20 00:08:35 2012 -0700
@@ -147,12 +147,6 @@
     }
   }
 
-  public boolean isRicochetFrame() {
-    CodeBlob cb = VM.getVM().getCodeCache().findBlob(getPC());
-    RicochetBlob rcb = VM.getVM().ricochetBlob();
-    return (cb == rcb && rcb != null && rcb.returnsToBounceAddr(getPC()));
-  }
-
   public boolean isCompiledFrame() {
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(!VM.getVM().isCore(), "noncore builds only");
@@ -216,8 +210,7 @@
   public Frame realSender(RegisterMap map) {
     if (!VM.getVM().isCore()) {
       Frame result = sender(map);
-      while (result.isRuntimeFrame() ||
-             result.isRicochetFrame()) {
+      while (result.isRuntimeFrame()) {
         result = result.sender(map);
       }
       return result;
@@ -631,9 +624,6 @@
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(cb != null, "sanity check");
     }
-    if (cb == VM.getVM().ricochetBlob()) {
-      oopsRicochetDo(oopVisitor, regMap);
-    }
     if (cb.getOopMaps() != null) {
       OopMapSet.oopsDo(this, cb, regMap, oopVisitor, VM.getVM().isDebugging());
 
@@ -650,10 +640,6 @@
     //    }
   }
 
-  private void oopsRicochetDo      (AddressVisitor oopVisitor, RegisterMap regMap) {
-    // XXX Empty for now
-  }
-
   // FIXME: implement the above routines, plus add
   // oops_interpreted_arguments_do and oops_compiled_arguments_do
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -91,6 +91,16 @@
                 access = new LinuxAMD64JavaThreadPDAccess();
             } else if (cpu.equals("sparc")) {
                 access = new LinuxSPARCJavaThreadPDAccess();
+            } else {
+              try {
+                access = (JavaThreadPDAccess)
+                  Class.forName("sun.jvm.hotspot.runtime.linux_" +
+                     cpu.toLowerCase() + ".Linux" + cpu.toUpperCase() +
+                     "JavaThreadPDAccess").newInstance();
+              } catch (Exception e) {
+                throw new RuntimeException("OS/CPU combination " + os + "/" + cpu +
+                                           " not yet supported");
+              }
             }
         } else if (os.equals("bsd")) {
             if (cpu.equals("x86")) {
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Sat Oct 20 00:08:35 2012 -0700
@@ -87,13 +87,13 @@
   private StubRoutines stubRoutines;
   private Bytes        bytes;
 
-  private RicochetBlob ricochetBlob;
-
   /** Flags indicating whether we are attached to a core, C1, or C2 build */
   private boolean      usingClientCompiler;
   private boolean      usingServerCompiler;
   /** Flag indicating whether UseTLAB is turned on */
   private boolean      useTLAB;
+  /** Flag indicating whether invokedynamic support is on */
+  private boolean      enableInvokeDynamic;
   /** alignment constants */
   private boolean      isLP64;
   private int          bytesPerLong;
@@ -319,6 +319,7 @@
     }
 
     useTLAB = (db.lookupIntConstant("UseTLAB").intValue() != 0);
+    enableInvokeDynamic = (db.lookupIntConstant("EnableInvokeDynamic").intValue() != 0);
 
     if (debugger != null) {
       isLP64 = debugger.getMachineDescription().isLP64();
@@ -554,6 +555,10 @@
     return useTLAB;
   }
 
+  public boolean getEnableInvokeDynamic() {
+    return enableInvokeDynamic;
+  }
+
   public TypeDataBase getTypeDataBase() {
     return db;
   }
@@ -628,18 +633,6 @@
     return stubRoutines;
   }
 
-  public RicochetBlob ricochetBlob() {
-    if (ricochetBlob == null) {
-      Type ricochetType  = db.lookupType("SharedRuntime");
-      AddressField ricochetBlobAddress = ricochetType.getAddressField("_ricochet_blob");
-      Address addr = ricochetBlobAddress.getValue();
-      if (addr != null) {
-        ricochetBlob = new RicochetBlob(addr);
-      }
-    }
-    return ricochetBlob;
-  }
-
   public VMRegImpl getVMRegImplInfo() {
     if (vmregImpl == null) {
       vmregImpl = new VMRegImpl();
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Sat Oct 20 00:08:35 2012 -0700
@@ -571,8 +571,6 @@
     //        registers callee-saved, then we will have to copy over
     //        the RegisterMap update logic from the Intel code.
 
-    if (isRicochetFrame()) return senderForRicochetFrame(map);
-
     // The constructor of the sender must know whether this frame is interpreted so it can set the
     // sender's _interpreter_sp_adjustment field.
     if (VM.getVM().getInterpreter().contains(pc)) {
@@ -945,20 +943,6 @@
   }
 
 
-  private Frame senderForRicochetFrame(SPARCRegisterMap map) {
-    if (DEBUG) {
-      System.out.println("senderForRicochetFrame");
-    }
-    //RicochetFrame* f = RicochetFrame::from_frame(fr);
-    // Cf. is_interpreted_frame path of frame::sender
-    Address youngerSP = getSP();
-    Address sp        = getSenderSP();
-    map.makeIntegerRegsUnsaved();
-    map.shiftWindow(sp, youngerSP);
-    boolean thisFrameAdjustedStack = true;  // I5_savedSP is live in this RF
-    return new SPARCFrame(biasSP(sp), biasSP(youngerSP), thisFrameAdjustedStack);
-  }
-
   private Frame senderForEntryFrame(RegisterMap regMap) {
     SPARCRegisterMap map = (SPARCRegisterMap) regMap;
 
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.runtime.sparc;
-
-import java.util.*;
-import sun.jvm.hotspot.asm.sparc.SPARCRegister;
-import sun.jvm.hotspot.asm.sparc.SPARCRegisters;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-public class SPARCRicochetFrame {
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private SPARCFrame frame;
-
-  private static void initialize(TypeDataBase db) {
-    // Type type = db.lookupType("MethodHandles::RicochetFrame");
-
-  }
-
-  static SPARCRicochetFrame fromFrame(SPARCFrame f) {
-    return new SPARCRicochetFrame(f);
-  }
-
-  private SPARCRicochetFrame(SPARCFrame f) {
-    frame = f;
-  }
-
-  private Address registerValue(SPARCRegister reg) {
-    return frame.getSP().addOffsetTo(reg.spOffsetInSavedWindow()).getAddressAt(0);
-  }
-
-  public Address savedArgsBase() {
-    return registerValue(SPARCRegisters.L4);
-  }
-  public Address exactSenderSP() {
-    return registerValue(SPARCRegisters.I5);
-  }
-  public Address senderLink() {
-    return frame.getSenderSP();
-  }
-  public Address senderPC() {
-    return frame.getSenderPC();
-  }
-  public Address extendedSenderSP() {
-    return savedArgsBase();
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Sat Oct 20 00:08:35 2012 -0700
@@ -269,7 +269,6 @@
 
     if (isEntryFrame())       return senderForEntryFrame(map);
     if (isInterpretedFrame()) return senderForInterpreterFrame(map);
-    if (isRicochetFrame())    return senderForRicochetFrame(map);
 
     if(cb == null) {
       cb = VM.getVM().getCodeCache().findBlob(getPC());
@@ -288,16 +287,6 @@
     return new X86Frame(getSenderSP(), getLink(), getSenderPC());
   }
 
-  private Frame senderForRicochetFrame(X86RegisterMap map) {
-    if (DEBUG) {
-      System.out.println("senderForRicochetFrame");
-    }
-    X86RicochetFrame f = X86RicochetFrame.fromFrame(this);
-    if (map.getUpdateMap())
-      updateMapWithSavedLink(map, f.senderLinkAddress());
-    return new X86Frame(f.extendedSenderSP(), f.exactSenderSP(), f.senderLink(), f.senderPC());
-  }
-
   private Frame senderForEntryFrame(X86RegisterMap map) {
     if (DEBUG) {
       System.out.println("senderForEntryFrame");
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.runtime.x86;
-
-import java.util.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-public class X86RicochetFrame extends VMObject {
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static void initialize(TypeDataBase db) {
-    Type type = db.lookupType("MethodHandles::RicochetFrame");
-
-    senderLinkField    = type.getAddressField("_sender_link");
-    savedArgsBaseField = type.getAddressField("_saved_args_base");
-    exactSenderSPField = type.getAddressField("_exact_sender_sp");
-    senderPCField      = type.getAddressField("_sender_pc");
-  }
-
-  private static AddressField senderLinkField;
-  private static AddressField savedArgsBaseField;
-  private static AddressField exactSenderSPField;
-  private static AddressField senderPCField;
-
-  static X86RicochetFrame fromFrame(X86Frame f) {
-    return new X86RicochetFrame(f.getFP().addOffsetTo(- senderLinkField.getOffset()));
-  }
-
-  private X86RicochetFrame(Address addr) {
-    super(addr);
-  }
-
-  public Address senderLink() {
-    return senderLinkField.getValue(addr);
-  }
-  public Address senderLinkAddress() {
-    return addr.addOffsetTo(senderLinkField.getOffset());
-  }
-  public Address savedArgsBase() {
-    return savedArgsBaseField.getValue(addr);
-  }
-  public Address extendedSenderSP() {
-    return savedArgsBase();
-  }
-  public Address exactSenderSP() {
-    return exactSenderSPField.getValue(addr);
-  }
-  public Address senderPC() {
-    return senderPCField.getValue(addr);
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassWriter.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassWriter.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -504,11 +504,14 @@
                             2           /* exp. table len.  */ +
                             2           /* code attr. count */;
 
-            TypeArray exceptionTable = m.getExceptionTable();
-            final int exceptionTableLen = (int) exceptionTable.getLength();
-            if (exceptionTableLen != 0) {
+            boolean hasExceptionTable = m.hasExceptionTable();
+            ExceptionTableElement[] exceptionTable = null;
+            int exceptionTableLen = 0;
+            if (hasExceptionTable) {
+                exceptionTable = m.getExceptionTable();
+                exceptionTableLen = exceptionTable.length;
                 if (DEBUG) debugMessage("\tmethod has exception table");
-                codeSize += (exceptionTableLen / 4) /* exception table is 4-tuple array */
+                codeSize += exceptionTableLen /* exception table is 4-tuple array */
                                          * (2 /* start_pc     */ +
                                             2 /* end_pc       */ +
                                             2 /* handler_pc   */ +
@@ -586,15 +589,15 @@
             dos.write(code);
 
             // write exception table size
-            dos.writeShort((short) (exceptionTableLen / 4));
-            if (DEBUG) debugMessage("\texception table length = " + (exceptionTableLen / 4));
+            dos.writeShort((short) exceptionTableLen);
+            if (DEBUG) debugMessage("\texception table length = " + exceptionTableLen);
 
             if (exceptionTableLen != 0) {
-                for (int e = 0; e < exceptionTableLen; e += 4) {
-                     dos.writeShort((short) exceptionTable.getIntAt(e));
-                     dos.writeShort((short) exceptionTable.getIntAt(e + 1));
-                     dos.writeShort((short) exceptionTable.getIntAt(e + 2));
-                     dos.writeShort((short) exceptionTable.getIntAt(e + 3));
+                for (int e = 0; e < exceptionTableLen; e++) {
+                     dos.writeShort((short) exceptionTable[e].getStartPC());
+                     dos.writeShort((short) exceptionTable[e].getEndPC());
+                     dos.writeShort((short) exceptionTable[e].getHandlerPC());
+                     dos.writeShort((short) exceptionTable[e].getCatchTypeIndex());
                 }
             }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -204,7 +204,13 @@
       } else if (cpu.equals("ia64")) {
          cpuHelper = new IA64Helper();
       } else {
+        try {
+          cpuHelper = (CPUHelper)Class.forName("sun.jvm.hotspot.asm." +
+             cpu.toLowerCase() + "." + cpu.toUpperCase() +
+             "Helper").newInstance();
+        } catch (Exception e) {
           throw new RuntimeException("cpu '" + cpu + "' is not yet supported!");
+        }
       }
    }
 
@@ -783,37 +789,39 @@
                        });
 
          // display exception table for this method
-         TypeArray exceptionTable = method.getExceptionTable();
-         // exception table is 4 tuple array of shorts
-         int numEntries = (int)exceptionTable.getLength() / 4;
-         if (numEntries != 0) {
-            buf.h4("Exception Table");
-            buf.beginTable(1);
-            buf.beginTag("tr");
-            buf.headerCell("start bci");
-            buf.headerCell("end bci");
-            buf.headerCell("handler bci");
-            buf.headerCell("catch type");
-            buf.endTag("tr");
-
-            for (int e = 0; e < numEntries; e += 4) {
+         boolean hasException = method.hasExceptionTable();
+         if (hasException) {
+            ExceptionTableElement[] exceptionTable = method.getExceptionTable();
+            int numEntries = exceptionTable.length;
+            if (numEntries != 0) {
+               buf.h4("Exception Table");
+               buf.beginTable(1);
                buf.beginTag("tr");
-               buf.cell(Integer.toString(exceptionTable.getIntAt(e)));
-               buf.cell(Integer.toString(exceptionTable.getIntAt(e + 1)));
-               buf.cell(Integer.toString(exceptionTable.getIntAt(e + 2)));
-               short cpIndex = (short) exceptionTable.getIntAt(e + 3);
-               ConstantPool.CPSlot obj = cpIndex == 0? null : cpool.getSlotAt(cpIndex);
-               if (obj == null) {
-                  buf.cell("Any");
-               } else if (obj.isMetaData()) {
-                 buf.cell(obj.getSymbol().asString().replace('/', '.'));
-               } else {
-                 buf.cell(genKlassLink((InstanceKlass)obj.getOop()));
+               buf.headerCell("start bci");
+               buf.headerCell("end bci");
+               buf.headerCell("handler bci");
+               buf.headerCell("catch type");
+               buf.endTag("tr");
+
+               for (int e = 0; e < numEntries; e ++) {
+                  buf.beginTag("tr");
+                  buf.cell(Integer.toString(exceptionTable[e].getStartPC()));
+                  buf.cell(Integer.toString(exceptionTable[e].getEndPC()));
+                  buf.cell(Integer.toString(exceptionTable[e].getHandlerPC()));
+                  short cpIndex = (short) exceptionTable[e].getCatchTypeIndex();
+                  ConstantPool.CPSlot obj = cpIndex == 0? null : cpool.getSlotAt(cpIndex);
+                  if (obj == null) {
+                     buf.cell("Any");
+                  } else if (obj.isMetaData()) {
+                     buf.cell(obj.getSymbol().asString().replace('/', '.'));
+                  } else {
+                     buf.cell(genKlassLink((InstanceKlass)obj.getOop()));
+                  }
+                  buf.endTag("tr");
                }
-               buf.endTag("tr");
+
+               buf.endTable();
             }
-
-            buf.endTable();
          }
 
          // display constant pool hyperlink
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/AltPlatformInfo.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.utilities;
+
+public interface AltPlatformInfo {
+  // Additional cpu types can be tested via this interface
+
+  public boolean knownCPU(String cpu);
+}
\ No newline at end of file
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/BasicHashtable.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/BasicHashtable.java	Sat Oct 20 00:08:35 2012 -0700
@@ -41,10 +41,10 @@
   }
 
   private static synchronized void initialize(TypeDataBase db) {
-    Type type = db.lookupType("BasicHashtable");
+    Type type = db.lookupType("BasicHashtable<mtInternal>");
     tableSizeField = type.getCIntegerField("_table_size");
     bucketsField   = type.getAddressField("_buckets");
-    bucketSize = db.lookupType("HashtableBucket").getSize();
+    bucketSize = db.lookupType("HashtableBucket<mtInternal>").getSize();
   }
 
   // Fields
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/BasicHashtableEntry.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/BasicHashtableEntry.java	Sat Oct 20 00:08:35 2012 -0700
@@ -41,7 +41,7 @@
   }
 
   private static synchronized void initialize(TypeDataBase db) {
-    Type type = db.lookupType("BasicHashtableEntry");
+    Type type = db.lookupType("BasicHashtableEntry<mtInternal>");
     hashField      = type.getCIntegerField("_hash");
     nextField      = type.getAddressField("_next");
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/Hashtable.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/Hashtable.java	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
 
   private static synchronized void initialize(TypeDataBase db) {
     // just to confirm that type exists
-    Type type = db.lookupType("Hashtable<intptr_t>");
+    Type type = db.lookupType("IntptrHashtable");
   }
 
   // derived class may return Class<? extends HashtableEntry>
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/HashtableBucket.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/HashtableBucket.java	Sat Oct 20 00:08:35 2012 -0700
@@ -39,7 +39,7 @@
   }
 
   private static synchronized void initialize(TypeDataBase db) {
-    Type type = db.lookupType("HashtableBucket");
+    Type type = db.lookupType("HashtableBucket<mtInternal>");
     entryField = type.getAddressField("_entry");
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/HashtableEntry.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/HashtableEntry.java	Sat Oct 20 00:08:35 2012 -0700
@@ -41,7 +41,7 @@
   }
 
   private static synchronized void initialize(TypeDataBase db) {
-    Type type = db.lookupType("HashtableEntry<intptr_t>");
+    Type type = db.lookupType("IntptrHashtableEntry");
     literalField   = type.getAddressField("_literal");
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	Sat Oct 20 00:08:35 2012 -0700
@@ -64,6 +64,13 @@
     } else if (cpu.equals("ia64") || cpu.equals("amd64") || cpu.equals("x86_64")) {
       return cpu;
     } else {
+      try {
+        Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed");
+        AltPlatformInfo api = (AltPlatformInfo)pic.newInstance();
+        if (api.knownCPU(cpu)) {
+          return cpu;
+        }
+      } catch (Exception e) {}
       throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported");
     }
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/soql/JSJavaFrame.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/soql/JSJavaFrame.java	Sat Oct 20 00:08:35 2012 -0700
@@ -153,7 +153,8 @@
             List visibleVars = new ArrayList(0);
             for (int i = 0; i < localVars.length; i++) {
                 LocalVariableTableElement cur = localVars[i];
-                if (cur.getStartBCI() >= bci && cur.getLength() > 0) {
+                int startBCI = cur.getStartBCI();
+                if (startBCI <= bci && bci < startBCI + cur.getLength()) {
                     visibleVars.add(cur);
                 }
             }
--- a/make/Makefile	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/Makefile	Sat Oct 20 00:08:35 2012 -0700
@@ -288,23 +288,25 @@
 ZERO_DIR=$(ZERO_BASE_DIR)/$(VM_SUBDIR)
 SHARK_DIR=$(SHARK_BASE_DIR)/$(VM_SUBDIR)
 
-# Misc files and generated files need to come from C1 or C2 area
-ifeq ($(ZERO_BUILD), true)
-ifeq ($(SHARK_BUILD), true)
-  MISC_DIR=$(SHARK_DIR)
-  GEN_DIR=$(SHARK_BASE_DIR)/generated
-else
-  MISC_DIR=$(ZERO_DIR)
-  GEN_DIR=$(ZERO_BASE_DIR)/generated
+ifeq ($(JVM_VARIANT_SERVER), true)
+    MISC_DIR=$(C2_DIR)
+    GEN_DIR=$(C2_BASE_DIR)/generated
+endif
+ifeq ($(JVM_VARIANT_CLIENT), true)
+    MISC_DIR=$(C1_DIR)
+    GEN_DIR=$(C1_BASE_DIR)/generated
 endif
-else
-ifeq ($(ARCH_DATA_MODEL), 32)
-  MISC_DIR=$(C1_DIR)
-  GEN_DIR=$(C1_BASE_DIR)/generated
-else
-  MISC_DIR=$(C2_DIR)
-  GEN_DIR=$(C2_BASE_DIR)/generated
+ifeq ($(JVM_VARIANT_KERNEL), true)
+    MISC_DIR=$(C2_DIR)
+    GEN_DIR=$(C2_BASE_DIR)/generated
 endif
+ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+    MISC_DIR=$(SHARK_DIR)
+    GEN_DIR=$(SHARK_BASE_DIR)/generated
+endif
+ifeq ($(JVM_VARIANT_ZERO), true)
+    MISC_DIR=$(ZERO_DIR)
+    GEN_DIR=$(ZERO_BASE_DIR)/generated
 endif
 
 # Bin files (windows)
@@ -357,66 +359,67 @@
 
 # Shared Library
 ifneq ($(OSNAME),windows)
-  ifeq ($(ZERO_BUILD), true)
-    ifeq ($(SHARK_BUILD), true)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-$(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-    else
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-$(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
+    ifeq ($(JVM_VARIANT_SERVER), true)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C2_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(C2_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/64/%.$(LIBRARY_SUFFIX):    $(C2_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: 		$(C2_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.debuginfo:       		$(C2_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/64/%.debuginfo:    		$(C2_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: 			$(C2_DIR)/%.diz
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.diz:       			$(C2_DIR)/%.diz
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/64/%.diz:    			$(C2_DIR)/%.diz
+		$(install-file)
     endif
-  else
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C1_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C2_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-$(EXPORT_CLIENT_DIR)/%.$(LIBRARY_SUFFIX):       $(C1_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-$(EXPORT_CLIENT_DIR)/64/%.$(LIBRARY_SUFFIX):    $(C1_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-$(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(C2_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-$(EXPORT_SERVER_DIR)/64/%.$(LIBRARY_SUFFIX):    $(C2_DIR)/%.$(LIBRARY_SUFFIX)
-	$(install-file)
-
-# Debug info for shared library
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(C1_DIR)/%.debuginfo
-	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(C2_DIR)/%.debuginfo
-	$(install-file)
-$(EXPORT_CLIENT_DIR)/%.debuginfo:       $(C1_DIR)/%.debuginfo
-	$(install-file)
-$(EXPORT_CLIENT_DIR)/64/%.debuginfo:    $(C1_DIR)/%.debuginfo
-	$(install-file)
-$(EXPORT_SERVER_DIR)/%.debuginfo:       $(C2_DIR)/%.debuginfo
-	$(install-file)
-$(EXPORT_SERVER_DIR)/64/%.debuginfo:    $(C2_DIR)/%.debuginfo
-	$(install-file)
-
-# ZIP'ed debug info for shared library
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(C1_DIR)/%.diz
-	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(C2_DIR)/%.diz
-	$(install-file)
-$(EXPORT_CLIENT_DIR)/%.diz:       $(C1_DIR)/%.diz
-	$(install-file)
-$(EXPORT_CLIENT_DIR)/64/%.diz:    $(C1_DIR)/%.diz
-	$(install-file)
-$(EXPORT_SERVER_DIR)/%.diz:       $(C2_DIR)/%.diz
-	$(install-file)
-$(EXPORT_SERVER_DIR)/64/%.diz:    $(C2_DIR)/%.diz
-	$(install-file)
-  endif
+    ifeq ($(JVM_VARIANT_CLIENT), true)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C1_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_CLIENT_DIR)/%.$(LIBRARY_SUFFIX):       $(C1_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_CLIENT_DIR)/64/%.$(LIBRARY_SUFFIX):    $(C1_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: 		$(C1_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_CLIENT_DIR)/%.debuginfo:       		$(C1_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_CLIENT_DIR)/64/%.debuginfo:    		$(C1_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: 			$(C1_DIR)/%.diz
+		$(install-file)
+        $(EXPORT_CLIENT_DIR)/%.diz:       			$(C1_DIR)/%.diz
+		$(install-file)
+        $(EXPORT_CLIENT_DIR)/64/%.diz:    			$(C1_DIR)/%.diz
+		$(install-file)
+    endif
+    ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+    endif
+    ifeq ($(JVM_VARIANT_ZERO), true)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
+		$(install-file)
+    endif
 endif
 
 # Jar file (sa-jdi.jar)
 $(EXPORT_LIB_DIR)/%.jar: $(GEN_DIR)/%.jar
 	$(install-file)
 
+$(EXPORT_JRE_LIB_DIR)/%.jar: $(GEN_DIR)/%.jar
+	$(install-file)
+
 # Include files (jvmti.h, jvmticmlr.h, jni.h, $(JDK_INCLUDE_SUBDIR)/jni_md.h, jmm.h, jfr.h)
 $(EXPORT_INCLUDE_DIR)/%: $(GEN_DIR)/jvmtifiles/%
 	$(install-file)
@@ -486,18 +489,19 @@
 	 ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xf -)
 
 test_jdk:
-    ifeq ($(ARCH_DATA_MODEL), 32)
-      ifneq ($(ZERO_BUILD), true)
-	$(JDK_IMAGE_DIR)/bin/java -d32 -client -Xinternalversion
-	$(JDK_IMAGE_DIR)/bin/java -d32 -client -version
-      endif
-	$(JDK_IMAGE_DIR)/bin/java -d32 -server -Xinternalversion
-	$(JDK_IMAGE_DIR)/bin/java -d32 -server -version
-    endif
-    ifeq ($(ARCH_DATA_MODEL), 64)
-	$(JDK_IMAGE_DIR)/bin/java -d64 -server -Xinternalversion
-	$(JDK_IMAGE_DIR)/bin/java -d64 -server -version
-    endif
+  ifeq ($(JVM_VARIANT_CLIENT), true)
+	$(JDK_IMAGE_DIR)/bin/java -d$(ARCH_DATA_MODEL) -client -Xinternalversion
+	$(JDK_IMAGE_DIR)/bin/java -d$(ARCH_DATA_MODEL) -client -version
+  endif
+  ifeq ($(findstring true, $(JVM_VARIANT_SERVER)\
+		$(JVM_VARIANT_ZERO)$(JVM_VARIANT_ZEROSHARK)), true)
+	$(JDK_IMAGE_DIR)/bin/java -d$(ARCH_DATA_MODEL) -server -Xinternalversion
+	$(JDK_IMAGE_DIR)/bin/java -d$(ARCH_DATA_MODEL) -server -version
+  endif
+  ifeq ($(JVM_VARIANT_KERNEL), true)
+	$(JDK_IMAGE_DIR)/bin/java -d$(ARCH_DATA_MODEL) -kernel -Xinternalversion
+	$(JDK_IMAGE_DIR)/bin/java -d$(ARCH_DATA_MODEL) -kernel -version
+  endif
 
 copy_product_jdk::
 	$(RM) -r $(JDK_IMAGE_DIR)
--- a/make/bsd/Makefile	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/Makefile	Sat Oct 20 00:08:35 2012 -0700
@@ -188,7 +188,7 @@
 # in the build.sh script:
 TARGETS           = debug jvmg fastdebug optimized profiled product
 
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   SUBDIR_DOCS     = $(OSNAME)_$(VARIANTARCH)_docs
 else
   SUBDIR_DOCS     = $(OSNAME)_$(BUILDARCH)_docs
--- a/make/bsd/makefiles/buildtree.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/makefiles/buildtree.make	Sat Oct 20 00:08:35 2012 -0700
@@ -58,6 +58,7 @@
 # needs to be set here since this Makefile doesn't include defs.make
 OS_VENDOR:=$(shell uname -s)
 
+-include $(SPEC)
 include $(GAMMADIR)/make/scm.make
 include $(GAMMADIR)/make/altsrc.make
 
@@ -68,7 +69,7 @@
 # For now, until the compiler is less wobbly:
 TESTFLAGS	= -Xbatch -showversion
 
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   PLATFORM_FILE = $(shell dirname $(shell dirname $(shell pwd)))/platform_zero
 else
   ifdef USE_SUNCC
@@ -247,6 +248,8 @@
 	    echo "HOTSPOT_EXTRA_SYSDEFS\$$(HOTSPOT_EXTRA_SYSDEFS) = $(HOTSPOT_EXTRA_SYSDEFS)" && \
 	    echo "SYSDEFS += \$$(HOTSPOT_EXTRA_SYSDEFS)"; \
 	echo; \
+	[ -n "$(SPEC)" ] && \
+	    echo "include $(SPEC)"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(VARIANT).make"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(COMPILER).make"; \
 	) > $@
--- a/make/bsd/makefiles/defs.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/makefiles/defs.make	Sat Oct 20 00:08:35 2012 -0700
@@ -38,7 +38,7 @@
 endif
 
 # zero
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   ifeq ($(ARCH_DATA_MODEL), 64)
     MAKE_ARGS      += LP64=1
   endif
@@ -124,6 +124,18 @@
   HS_ARCH          = ppc
 endif
 
+# On 32 bit bsd we build server and client, on 64 bit just server.
+ifeq ($(JVM_VARIANTS),)
+  ifeq ($(ARCH_DATA_MODEL), 32)
+    JVM_VARIANTS:=client,server
+    JVM_VARIANT_CLIENT:=true
+    JVM_VARIANT_SERVER:=true
+  else
+    JVM_VARIANTS:=server
+    JVM_VARIANT_SERVER:=true
+  endif
+endif
+
 JDK_INCLUDE_SUBDIR=bsd
 
 # Library suffix
@@ -144,16 +156,16 @@
 EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
 EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
 
-ifndef BUILD_CLIENT_ONLY
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
+EXPORT_LIST += $(EXPORT_JRE_LIB_DIR)/wb.jar
+
+ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
 endif
 
-ifneq ($(ZERO_BUILD), true)
-  ifeq ($(ARCH_DATA_MODEL), 32)
-    EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
-    EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX)
-  endif
+ifeq ($(JVM_VARIANT_CLIENT),true)
+  EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
+  EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX)
 endif
 
 # Serviceability Binaries
--- a/make/bsd/makefiles/gcc.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/makefiles/gcc.make	Sat Oct 20 00:08:35 2012 -0700
@@ -27,53 +27,57 @@
 #------------------------------------------------------------------------
 # CC, CXX & AS
 
-# When cross-compiling the ALT_COMPILER_PATH points
-# to the cross-compilation toolset
-ifdef CROSS_COMPILE_ARCH
- CXX = $(ALT_COMPILER_PATH)/g++
- CC  = $(ALT_COMPILER_PATH)/gcc
- HOSTCXX = g++
- HOSTCC  = gcc
-else ifneq ($(OS_VENDOR), Darwin)
- CXX = g++
- CC  = gcc
- HOSTCXX = $(CXX)
- HOSTCC  = $(CC)
+# If a SPEC is not set already, then use these defaults.
+ifeq ($(SPEC),)
+  # When cross-compiling the ALT_COMPILER_PATH points
+  # to the cross-compilation toolset
+  ifdef CROSS_COMPILE_ARCH
+    CXX = $(ALT_COMPILER_PATH)/g++
+    CC  = $(ALT_COMPILER_PATH)/gcc
+    HOSTCXX = g++
+    HOSTCC  = gcc
+  else ifneq ($(OS_VENDOR), Darwin)
+    CXX = g++
+    CC  = gcc
+    HOSTCXX = $(CXX)
+    HOSTCC  = $(CC)
+  endif
+
+  # i486 hotspot requires -mstackrealign on Darwin.
+  # llvm-gcc supports this in Xcode 3.2.6 and 4.0.
+  # gcc-4.0 supports this on earlier versions.
+  # Prefer llvm-gcc where available.
+  ifeq ($(OS_VENDOR), Darwin)
+    ifeq ($(origin CXX), default)
+      CXX = llvm-g++
+    endif
+    ifeq ($(origin CC), default)
+      CC  = llvm-gcc
+    endif
+
+    ifeq ($(ARCH), i486)
+      LLVM_SUPPORTS_STACKREALIGN := $(shell \
+       [ "0"`llvm-gcc -v 2>&1 | grep LLVM | sed -E "s/.*LLVM build ([0-9]+).*/\1/"` -gt "2333" ] \
+       && echo true || echo false)
+
+      ifeq ($(LLVM_SUPPORTS_STACKREALIGN), true)
+        CXX32 ?= llvm-g++
+        CC32  ?= llvm-gcc
+      else
+        CXX32 ?= g++-4.0
+        CC32  ?= gcc-4.0
+      endif
+      CXX = $(CXX32)
+      CC  = $(CC32)
+    endif
+
+    HOSTCXX = $(CXX)
+    HOSTCC  = $(CC)
+  endif
+
+  AS   = $(CC) -c -x assembler-with-cpp
 endif
 
-# i486 hotspot requires -mstackrealign on Darwin.
-# llvm-gcc supports this in Xcode 3.2.6 and 4.0.
-# gcc-4.0 supports this on earlier versions.
-# Prefer llvm-gcc where available.
-ifeq ($(OS_VENDOR), Darwin)
-  ifeq ($(origin CXX), default)
-   CXX = llvm-g++
-  endif
-  ifeq ($(origin CC), default)
-   CC  = llvm-gcc
-  endif
-
-  ifeq ($(ARCH), i486)
-  LLVM_SUPPORTS_STACKREALIGN := $(shell \
-   [ "0"`llvm-gcc -v 2>&1 | grep LLVM | sed -E "s/.*LLVM build ([0-9]+).*/\1/"` -gt "2333" ] \
-   && echo true || echo false)
-
-  ifeq ($(LLVM_SUPPORTS_STACKREALIGN), true)
-    CXX32 ?= llvm-g++
-    CC32  ?= llvm-gcc
-  else
-    CXX32 ?= g++-4.0
-    CC32  ?= gcc-4.0
-  endif
-  CXX = $(CXX32)
-  CC  = $(CC32)
-  endif
-
-  HOSTCXX = $(CXX)
-  HOSTCC  = $(CC)
-endif
-
-AS   = $(CC) -c -x assembler-with-cpp
 
 # -dumpversion in gcc-2.91 shows "egcs-2.91.66". In later version, it only
 # prints the numbers (e.g. "2.95", "3.2.1")
@@ -101,11 +105,12 @@
 VM_PICFLAG/AOUT   =
 VM_PICFLAG        = $(VM_PICFLAG/$(LINK_INTO))
 
-ifeq ($(ZERO_BUILD), true)
-CFLAGS += $(LIBFFI_CFLAGS)
+ifeq ($(JVM_VARIANT_ZERO), true)
+  CFLAGS += $(LIBFFI_CFLAGS)
 endif
-ifeq ($(SHARK_BUILD), true)
-CFLAGS += $(LLVM_CFLAGS)
+ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+  CFLAGS += $(LIBFFI_CFLAGS)
+  CFLAGS += $(LLVM_CFLAGS)
 endif
 CFLAGS += $(VM_PICFLAG)
 CFLAGS += -fno-rtti
@@ -209,7 +214,7 @@
 
 # Flags for generating make dependency flags.
 ifneq ("${CC_VER_MAJOR}", "2")
-DEPFLAGS = -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d)
+DEPFLAGS = -fpch-deps -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d)
 endif
 
 # -DDONT_USE_PRECOMPILED_HEADER will exclude all includes in precompiled.hpp.
--- a/make/bsd/makefiles/jvmg.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/makefiles/jvmg.make	Sat Oct 20 00:08:35 2012 -0700
@@ -27,7 +27,9 @@
 # Compiler specific DEBUG_CFLAGS are passed in from gcc.make, sparcWorks.make
 DEBUG_CFLAGS/DEFAULT= $(DEBUG_CFLAGS)
 DEBUG_CFLAGS/BYFILE = $(DEBUG_CFLAGS/$@)$(DEBUG_CFLAGS/DEFAULT$(DEBUG_CFLAGS/$@))
-CFLAGS += $(DEBUG_CFLAGS/BYFILE)
+
+# _NMT_NOINLINE_ informs NMT that no inlining by Compiler
+CFLAGS += $(DEBUG_CFLAGS/BYFILE) -D_NMT_NOINLINE_
 
 # Set the environment variable HOTSPARC_GENERIC to "true"
 # to inhibit the effect of the previous line on CFLAGS.
--- a/make/bsd/makefiles/sparcWorks.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/makefiles/sparcWorks.make	Sat Oct 20 00:08:35 2012 -0700
@@ -25,12 +25,15 @@
 #------------------------------------------------------------------------
 # CC, CXX & AS
 
-CXX = CC
-CC  = cc
-AS  = $(CC) -c
+# If a SPEC is not set already, then use these defaults.
+ifeq ($(SPEC),)
+  CXX = CC
+  CC  = cc
+  AS  = $(CC) -c
 
-HOSTCXX = $(CXX)
-HOSTCC  = $(CC)
+  HOSTCXX = $(CXX)
+  HOSTCC  = $(CC)
+endif
 
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
 ARCHFLAG/i486    = -m32
--- a/make/bsd/makefiles/universal.gmk	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/makefiles/universal.gmk	Sat Oct 20 00:08:35 2012 -0700
@@ -110,4 +110,5 @@
 
 .PHONY:	universal_product universal_fastdebug universal_debug \
 	all_product_universal all_fastdebug_universal all_debug_universal \
-	universalize export_universal copy_universal
+	universalize export_universal copy_universal \
+	$(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
--- a/make/bsd/makefiles/vm.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/bsd/makefiles/vm.make	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -42,7 +42,7 @@
 -include $(DEP_DIR)/*.d
 
 # read machine-specific adjustments (%%% should do this via buildtree.make?)
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   include $(MAKEFILES_DIR)/zeroshark.make
 else
   include $(MAKEFILES_DIR)/$(BUILDARCH).make
@@ -271,12 +271,12 @@
 
   LIBS_VM                  += $(LIBS)
 endif
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(JVM_VARIANT_ZERO), true)
   LIBS_VM += $(LIBFFI_LIBS)
 endif
-ifeq ($(SHARK_BUILD), true)
+ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+  LIBS_VM   += $(LIBFFI_LIBS) $(LLVM_LIBS)
   LFLAGS_VM += $(LLVM_LDFLAGS)
-  LIBS_VM   += $(LLVM_LIBS)
 endif
 
 
@@ -335,6 +335,9 @@
 # Serviceability agent
 include $(MAKEFILES_DIR)/saproc.make
 
+# Whitebox testing API
+include $(MAKEFILES_DIR)/wb.make
+
 #----------------------------------------------------------------------
 
 ifeq ($(OS_VENDOR), Darwin)
@@ -342,10 +345,10 @@
 	dsymutil $(LIBJVM)
 
 # no libjvm_db for macosx
-build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(BUILDLIBSAPROC) dtraceCheck $(LIBJVM).dSYM
+build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(BUILDLIBSAPROC) dtraceCheck $(LIBJVM).dSYM $(WB_JAR)
 	echo "Doing vm.make build:"
 else
-build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(BUILDLIBSAPROC)
+build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(BUILDLIBSAPROC) $(WB_JAR)
 endif
 
 install: install_jvm install_jsig install_saproc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/bsd/makefiles/wb.make	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# Rules to build whitebox testing library, used by vm.make
+WB = wb
+
+WBSRCDIR = $(GAMMADIR)/src/share/tools/whitebox
+
+WB_JAR = $(GENERATED)/$(WB).jar
+
+WB_JAVA_SRCS = $(shell find $(WBSRCDIR) -name '*.java')
+WB_JAVA_CLASSDIR = $(GENERATED)/wb/classes
+
+WB_JAVA_CLASSES  = $(patsubst $(WBSRCDIR)/%,$(WB_JAVA_CLASSDIR)/%, \
+	$(patsubst %.java,%.class,$(WB_JAVA_SRCS)))
+
+$(WB_JAVA_CLASSDIR)/%.class: $(WBSRCDIR)/%.java $(WB_JAVA_CLASSDIR)
+	$(REMOTE) $(COMPILE.JAVAC) -sourcepath $(WBSRCDIR) -nowarn -d $(WB_JAVA_CLASSDIR) $<
+
+$(WB_JAR): $(WB_JAVA_CLASSES)
+	$(QUIETLY) $(REMOTE) $(RUN.JAR) cf $@ -C $(WB_JAVA_CLASSDIR)/ .
+
+$(WB_JAVA_CLASSDIR):
+	$(QUIETLY) mkdir -p $@
+
--- a/make/defs.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/defs.make	Sat Oct 20 00:08:35 2012 -0700
@@ -22,8 +22,21 @@
 #  
 #
 
+ifeq ($(HS_ALT_MAKE),)
+  ifneq ($(OPENJDK),true)
+    HS_ALT_MAKE=$(GAMMADIR)/make/closed
+  else
+    HS_ALT_MAKE=NO_SUCH_PATH
+  endif
+endif
+
 # The common definitions for hotspot builds.
 
+# Optionally include SPEC file generated by configure.
+ifneq ($(SPEC),)
+  include $(SPEC)
+endif
+
 # Default to verbose build logs (show all compile lines):
 MAKE_VERBOSE=y
 
@@ -50,6 +63,27 @@
 @$(RM) $@
 endef
 
+# Default values for JVM_VARIANT* variables if configure hasn't set
+# it already.
+ifeq ($(JVM_VARIANTS),)
+  ifeq ($(ZERO_BUILD), true)
+    ifeq ($(SHARK_BUILD), true)
+      JVM_VARIANTS:=zeroshark
+      JVM_VARIANT_ZEROSHARK:=true
+    else
+      JVM_VARIANTS:=zero
+      JVM_VARIANT_ZERO:=true
+    endif
+  else
+    # A default is needed
+    ifeq ($(BUILD_CLIENT_ONLY), true)
+      JVM_VARIANTS:=client
+      JVM_VARIANT_CLIENT:=true
+    endif
+    # Further defaults are platform and arch specific
+  endif
+endif
+
 # Directory paths and user name
 # Unless GAMMADIR is set on the command line, search upward from
 # the current directory for a parent directory containing "src/share/vm".
@@ -301,3 +335,4 @@
 ifndef JAVASE_EMBEDDED
 EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/jfr.h
 endif
+
--- a/make/hotspot_version	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/hotspot_version	Sat Oct 20 00:08:35 2012 -0700
@@ -33,9 +33,9 @@
 # Don't put quotes (fail windows build).
 HOTSPOT_VM_COPYRIGHT=Copyright 2012
 
-HS_MAJOR_VER=23
-HS_MINOR_VER=6
-HS_BUILD_NUMBER=03
+HS_MAJOR_VER=24
+HS_MINOR_VER=0
+HS_BUILD_NUMBER=23
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/make/jprt.properties	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/jprt.properties	Sat Oct 20 00:08:35 2012 -0700
@@ -160,16 +160,6 @@
 jprt.build.targets.jdk8=${jprt.build.targets.all}
 jprt.build.targets.jdk7=${jprt.build.targets.all}
 jprt.build.targets.jdk7u10=${jprt.build.targets.all}
-jprt.build.targets.jdk7temp=${jprt.build.targets.all}
-jprt.build.targets.jdk7b107=${jprt.build.targets.all}
-jprt.build.targets.jdk6=${jprt.build.targets.standard}
-jprt.build.targets.jdk6perf=${jprt.build.targets.standard}
-jprt.build.targets.jdk6u10=${jprt.build.targets.standard}
-jprt.build.targets.jdk6u14=${jprt.build.targets.standard}
-jprt.build.targets.jdk6u18=${jprt.build.targets.standard}
-jprt.build.targets.jdk6u20=${jprt.build.targets.standard}
-jprt.build.targets.ejdk6=${jprt.build.targets.all}
-jprt.build.targets.ejdk7=${jprt.build.targets.all}
 jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}}
 
 # Subset lists of test targets for this source tree
@@ -496,11 +486,26 @@
   ${jprt.my.macosx.x64}-fastdebug-c2-internalvmtests, \
   ${jprt.my.windows.i586}-fastdebug-c2-internalvmtests, \
   ${jprt.my.windows.x64}-fastdebug-c2-internalvmtests
-  
+
+jprt.make.rule.test.targets.standard.wbapi = \
+  ${jprt.my.solaris.sparc}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.solaris.i586}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.solaris.x64}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.linux.i586}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.linux.x64}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.windows.i586}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.windows.x64}-{product|fastdebug}-c2-wbapitest, \
+  ${jprt.my.solaris.sparc}-{product|fastdebug}-c1-wbapitest, \
+  ${jprt.my.solaris.i586}-{product|fastdebug}-c1-wbapitest, \
+  ${jprt.my.linux.i586}-{product|fastdebug}-c1-wbapitest, \
+  ${jprt.my.windows.i586}-{product|fastdebug}-c1-wbapitest
+
 jprt.make.rule.test.targets.standard = \
   ${jprt.make.rule.test.targets.standard.client}, \
   ${jprt.make.rule.test.targets.standard.server}, \
-  ${jprt.make.rule.test.targets.standard.internalvmtests}
+  ${jprt.make.rule.test.targets.standard.internalvmtests}, \
+  ${jprt.make.rule.test.targets.standard.wbapi}
 
 jprt.make.rule.test.targets.embedded = \
   ${jprt.make.rule.test.targets.standard.client}
--- a/make/linux/Makefile	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/Makefile	Sat Oct 20 00:08:35 2012 -0700
@@ -188,7 +188,7 @@
 # in the build.sh script:
 TARGETS           = debug jvmg fastdebug optimized profiled product
 
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   SUBDIR_DOCS     = $(OSNAME)_$(VARIANTARCH)_docs
 else
   SUBDIR_DOCS     = $(OSNAME)_$(BUILDARCH)_docs
--- a/make/linux/makefiles/buildtree.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/buildtree.make	Sat Oct 20 00:08:35 2012 -0700
@@ -55,6 +55,7 @@
 # The makefiles are split this way so that "make foo" will run faster by not
 # having to read the dependency files for the vm.
 
+-include $(SPEC)
 include $(GAMMADIR)/make/scm.make
 include $(GAMMADIR)/make/altsrc.make
 
@@ -65,7 +66,7 @@
 # For now, until the compiler is less wobbly:
 TESTFLAGS	= -Xbatch -showversion
 
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   PLATFORM_FILE = $(shell dirname $(shell dirname $(shell pwd)))/platform_zero
 else
   ifdef USE_SUNCC
@@ -250,6 +251,8 @@
 	    echo "HOTSPOT_EXTRA_SYSDEFS\$$(HOTSPOT_EXTRA_SYSDEFS) = $(HOTSPOT_EXTRA_SYSDEFS)" && \
 	    echo "SYSDEFS += \$$(HOTSPOT_EXTRA_SYSDEFS)"; \
 	echo; \
+	[ -n "$(SPEC)" ] && \
+	    echo "include $(SPEC)"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(VARIANT).make"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(COMPILER).make"; \
 	) > $@
--- a/make/linux/makefiles/defs.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/defs.make	Sat Oct 20 00:08:35 2012 -0700
@@ -38,7 +38,7 @@
 endif
 
 # zero
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   ifeq ($(ARCH_DATA_MODEL), 64)
     MAKE_ARGS      += LP64=1
   endif
@@ -114,6 +114,18 @@
   HS_ARCH          = ppc
 endif
 
+# On 32 bit linux we build server and client, on 64 bit just server.
+ifeq ($(JVM_VARIANTS),)
+  ifeq ($(ARCH_DATA_MODEL), 32)
+    JVM_VARIANTS:=client,server
+    JVM_VARIANT_CLIENT:=true
+    JVM_VARIANT_SERVER:=true
+  else
+    JVM_VARIANTS:=server
+    JVM_VARIANT_SERVER:=true
+  endif
+endif
+
 # determine if HotSpot is being built in JDK6 or earlier version
 JDK6_OR_EARLIER=0
 ifeq "$(shell expr \( '$(JDK_MAJOR_VERSION)' != '' \& '$(JDK_MINOR_VERSION)' != '' \& '$(JDK_MICRO_VERSION)' != '' \))" "1"
@@ -237,9 +249,11 @@
 EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
 EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
 
-ifndef BUILD_CLIENT_ONLY
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
+EXPORT_LIST += $(EXPORT_JRE_LIB_DIR)/wb.jar
+
+ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
     ifeq ($(ZIP_DEBUGINFO_FILES),1)
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.diz
@@ -249,18 +263,16 @@
   endif
 endif
 
-ifneq ($(ZERO_BUILD), true)
-  ifeq ($(ARCH_DATA_MODEL), 32)
-    EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
-    EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX)
-    ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-      ifeq ($(ZIP_DEBUGINFO_FILES),1)
-        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.diz
-      else
-        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.debuginfo
-      endif
+ifeq ($(JVM_VARIANT_CLIENT),true)
+  EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
+  EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX)
+  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+    ifeq ($(ZIP_DEBUGINFO_FILES),1)
+      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.diz
+    else
+      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.debuginfo
     endif
-  endif
+  endif 
 endif
 
 # Serviceability Binaries
@@ -283,6 +295,8 @@
 ADD_SA_BINARIES/arm   = 
 ADD_SA_BINARIES/zero  = 
 
+-include $(HS_ALT_MAKE)/linux/makefiles/defs.make
+
 EXPORT_LIST += $(ADD_SA_BINARIES/$(HS_ARCH))
 
 
--- a/make/linux/makefiles/gcc.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/gcc.make	Sat Oct 20 00:08:35 2012 -0700
@@ -25,21 +25,26 @@
 #------------------------------------------------------------------------
 # CC, CXX & AS
 
-# When cross-compiling the ALT_COMPILER_PATH points
-# to the cross-compilation toolset
-ifdef CROSS_COMPILE_ARCH
-CXX = $(ALT_COMPILER_PATH)/g++
-CC  = $(ALT_COMPILER_PATH)/gcc
-HOSTCXX = g++
-HOSTCC  = gcc
-else
-CXX = g++
-CC  = gcc
-HOSTCXX = $(CXX)
-HOSTCC  = $(CC)
+# If a SPEC is not set already, then use these defaults.
+ifeq ($(SPEC),)
+  # When cross-compiling the ALT_COMPILER_PATH points
+  # to the cross-compilation toolset
+  ifdef CROSS_COMPILE_ARCH
+    CXX = $(ALT_COMPILER_PATH)/g++
+    CC  = $(ALT_COMPILER_PATH)/gcc
+    HOSTCXX = g++
+    HOSTCC  = gcc
+    STRIP = $(ALT_COMPILER_PATH)/strip
+  else
+    CXX = g++
+    CC  = gcc
+    HOSTCXX = $(CXX)
+    HOSTCC  = $(CC)
+    STRIP = strip
+  endif
+  AS  = $(CC) -c
 endif
 
-AS  = $(CC) -c
 
 # -dumpversion in gcc-2.91 shows "egcs-2.91.66". In later version, it only
 # prints the numbers (e.g. "2.95", "3.2.1")
@@ -67,10 +72,11 @@
 VM_PICFLAG/AOUT   =
 VM_PICFLAG        = $(VM_PICFLAG/$(LINK_INTO))
 
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(JVM_VARIANT_ZERO), true)
 CFLAGS += $(LIBFFI_CFLAGS)
 endif
-ifeq ($(SHARK_BUILD), true)
+ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+CFLAGS += $(LIBFFI_CFLAGS)
 CFLAGS += $(LLVM_CFLAGS)
 endif
 CFLAGS += $(VM_PICFLAG)
@@ -160,7 +166,7 @@
 
 # Flags for generating make dependency flags.
 ifneq ("${CC_VER_MAJOR}", "2")
-DEPFLAGS = -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d)
+DEPFLAGS = -fpch-deps -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d)
 endif
 
 # -DDONT_USE_PRECOMPILED_HEADER will exclude all includes in precompiled.hpp.
@@ -260,9 +266,3 @@
 ifdef MINIMIZE_RAM_USAGE
 CFLAGS += -DMINIMIZE_RAM_USAGE
 endif
-
-ifdef CROSS_COMPILE_ARCH
-  STRIP = $(ALT_COMPILER_PATH)/strip
-else
-  STRIP = strip
-endif
--- a/make/linux/makefiles/jvmg.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/jvmg.make	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,9 @@
 # Compiler specific DEBUG_CFLAGS are passed in from gcc.make, sparcWorks.make
 DEBUG_CFLAGS/DEFAULT= $(DEBUG_CFLAGS)
 DEBUG_CFLAGS/BYFILE = $(DEBUG_CFLAGS/$@)$(DEBUG_CFLAGS/DEFAULT$(DEBUG_CFLAGS/$@))
-CFLAGS += $(DEBUG_CFLAGS/BYFILE)
+
+# _NMT_NOINLINE_ informs NMT that no inlining by Compiler
+CFLAGS += $(DEBUG_CFLAGS/BYFILE) -D_NMT_NOINLINE_
 
 # Set the environment variable HOTSPARC_GENERIC to "true"
 # to inhibit the effect of the previous line on CFLAGS.
--- a/make/linux/makefiles/sa.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/sa.make	Sat Oct 20 00:08:35 2012 -0700
@@ -30,10 +30,16 @@
 
 include $(GAMMADIR)/make/linux/makefiles/rules.make
 
+include $(GAMMADIR)/make/defs.make
+include $(GAMMADIR)/make/altsrc.make
+
 AGENT_DIR = $(GAMMADIR)/agent
 
 include $(GAMMADIR)/make/sa.files
 
+-include $(HS_ALT_MAKE)/linux/makefiles/sa.make
+
+
 TOPDIR    = $(shell echo `pwd`)
 GENERATED = $(TOPDIR)/../generated
 
@@ -52,17 +58,15 @@
 SA_PROPERTIES = $(SA_CLASSDIR)/sa.properties
 
 # if $(AGENT_DIR) does not exist, we don't build SA
-# also, we don't build SA on Itanium, PowerPC, ARM or zero.
+# also, we don't build SA on Itanium or zero.
 
 all: 
 	if [ -d $(AGENT_DIR) -a "$(SRCARCH)" != "ia64" \
-             -a "$(SRCARCH)" != "arm" \
-             -a "$(SRCARCH)" != "ppc" \
              -a "$(SRCARCH)" != "zero" ] ; then \
 	   $(MAKE) -f sa.make $(GENERATED)/sa-jdi.jar; \
 	fi
 
-$(GENERATED)/sa-jdi.jar: $(AGENT_FILES)
+$(GENERATED)/sa-jdi.jar:: $(AGENT_FILES)
 	$(QUIETLY) echo "Making $@"
 	$(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
 	  echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \
@@ -111,3 +115,5 @@
 	rm -rf $(SA_CLASSDIR)
 	rm -rf $(GENERATED)/sa-jdi.jar
 	rm -rf $(AGENT_FILES_LIST)
+
+-include $(HS_ALT_MAKE)/linux/makefiles/sa-rules.make
--- a/make/linux/makefiles/saproc.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/saproc.make	Sat Oct 20 00:08:35 2012 -0700
@@ -21,6 +21,8 @@
 # questions.
 #  
 #
+include $(GAMMADIR)/make/defs.make
+include $(GAMMADIR)/make/altsrc.make
 
 # Rules to build serviceability agent library, used by vm.make
 
@@ -48,6 +50,8 @@
              $(SASRCDIR)/ps_core.c                    \
              $(SASRCDIR)/LinuxDebuggerLocal.c
 
+-include $(HS_ALT_MAKE)/linux/makefiles/saproc.make
+
 SAMAPFILE = $(SASRCDIR)/mapfile
 
 DEST_SAPROC           = $(JDK_LIBDIR)/$(LIBSAPROC)
@@ -60,15 +64,19 @@
 endif
 
 # if $(AGENT_DIR) does not exist, we don't build SA
-# also, we don't build SA on Itanium, PPC, ARM or zero.
+# also, we don't build SA on Itanium or zero.
 
 ifneq ($(wildcard $(AGENT_DIR)),)
-ifneq ($(filter-out ia64 arm ppc zero,$(SRCARCH)),)
+ifneq ($(filter-out ia64 zero,$(SRCARCH)),)
   BUILDLIBSAPROC = $(LIBSAPROC)
 endif
 endif
 
-
+ifneq ($(ALT_SASRCDIR),)
+ALT_SAINCDIR=-I$(ALT_SASRCDIR)
+else
+ALT_SAINCDIR=
+endif
 SA_LFLAGS = $(MAPFLAG:FILENAME=$(SAMAPFILE)) $(LDFLAGS_HASH_STYLE)
 
 $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE)
@@ -84,6 +92,7 @@
 	           -I$(GENERATED)                                       \
 	           -I$(BOOT_JAVA_HOME)/include                          \
 	           -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family)    \
+			   $(ALT_SAINCDIR) 										\
 	           $(SASRCFILES)                                        \
 	           $(SA_LFLAGS)                                         \
 	           $(SA_DEBUG_CFLAGS)                                   \
--- a/make/linux/makefiles/sparcWorks.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/sparcWorks.make	Sat Oct 20 00:08:35 2012 -0700
@@ -25,12 +25,15 @@
 #------------------------------------------------------------------------
 # CC, CXX & AS
 
-CXX = CC
-CC  = cc
-AS  = $(CC) -c
+# If a SPEC is not set already, then use these defaults.
+ifeq ($(SPEC),)
+  CXX = CC
+  CC  = cc
+  AS  = $(CC) -c
 
-HOSTCXX = $(CXX)
-HOSTCC  = $(CC)
+  HOSTCXX = $(CXX)
+  HOSTCC  = $(CC)
+endif
 
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
 ARCHFLAG/i486    = -m32
--- a/make/linux/makefiles/vm.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/linux/makefiles/vm.make	Sat Oct 20 00:08:35 2012 -0700
@@ -42,7 +42,7 @@
 -include $(DEP_DIR)/*.d
 
 # read machine-specific adjustments (%%% should do this via buildtree.make?)
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   include $(MAKEFILES_DIR)/zeroshark.make
 else
   include $(MAKEFILES_DIR)/$(BUILDARCH).make
@@ -242,7 +242,7 @@
 vm.def: $(Res_Files) $(Obj_Files)
 	sh $(GAMMADIR)/make/linux/makefiles/build_vm_def.sh *.o > $@
 
-ifeq ($(SHARK_BUILD), true)
+ifeq ($(JVM_VARIANT_ZEROSHARK), true)
   STATIC_CXX = false
 else
   ifeq ($(ZERO_LIBARCH), ppc64)
@@ -274,12 +274,12 @@
 
   LIBS_VM                  += $(LIBS)
 endif
-ifeq ($(ZERO_BUILD), true)
+ifeq ($(JVM_VARIANT_ZERO), true)
   LIBS_VM += $(LIBFFI_LIBS)
 endif
-ifeq ($(SHARK_BUILD), true)
+ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+  LIBS_VM   += $(LIBFFI_LIBS) $(LLVM_LIBS)
   LFLAGS_VM += $(LLVM_LDFLAGS)
-  LIBS_VM   += $(LLVM_LIBS)
 endif
 
 LINK_VM = $(LINK_LIB.CC)
@@ -382,9 +382,12 @@
 # Serviceability agent
 include $(MAKEFILES_DIR)/saproc.make
 
+# Whitebox testing API
+include $(MAKEFILES_DIR)/wb.make
+
 #----------------------------------------------------------------------
 
-build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(BUILDLIBSAPROC)
+build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(BUILDLIBSAPROC) $(WB_JAR)
 
 install: install_jvm install_jsig install_saproc
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/linux/makefiles/wb.make	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# Rules to build whitebox testing library, used by vm.make
+WB = wb
+
+WBSRCDIR = $(GAMMADIR)/src/share/tools/whitebox
+
+WB_JAR = $(GENERATED)/$(WB).jar
+
+WB_JAVA_SRCS = $(shell find $(WBSRCDIR) -name '*.java')
+WB_JAVA_CLASSDIR = $(GENERATED)/wb/classes
+
+WB_JAVA_CLASSES  = $(patsubst $(WBSRCDIR)/%,$(WB_JAVA_CLASSDIR)/%, \
+	$(patsubst %.java,%.class,$(WB_JAVA_SRCS)))
+
+$(WB_JAVA_CLASSDIR)/%.class: $(WBSRCDIR)/%.java $(WB_JAVA_CLASSDIR)
+	$(REMOTE) $(COMPILE.JAVAC) -sourcepath $(WBSRCDIR) -nowarn -d $(WB_JAVA_CLASSDIR) $<
+
+$(WB_JAR): $(WB_JAVA_CLASSES)
+	$(QUIETLY) $(REMOTE) $(RUN.JAR) cf $@ -C $(WB_JAVA_CLASSDIR)/ .
+
+$(WB_JAVA_CLASSDIR):
+	$(QUIETLY) mkdir -p $@
+
--- a/make/solaris/makefiles/buildtree.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/buildtree.make	Sat Oct 20 00:08:35 2012 -0700
@@ -55,6 +55,7 @@
 # The makefiles are split this way so that "make foo" will run faster by not
 # having to read the dependency files for the vm.
 
+-include $(SPEC)
 include $(GAMMADIR)/make/scm.make
 include $(GAMMADIR)/make/altsrc.make
 
@@ -243,6 +244,8 @@
 	    echo "HOTSPOT_EXTRA_SYSDEFS\$$(HOTSPOT_EXTRA_SYSDEFS) = $(HOTSPOT_EXTRA_SYSDEFS)" && \
 	    echo "SYSDEFS += \$$(HOTSPOT_EXTRA_SYSDEFS)"; \
 	echo; \
+	[ -n "$(SPEC)" ] && \
+	    echo "include $(SPEC)"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(VARIANT).make"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(COMPILER).make"; \
 	) > $@
--- a/make/solaris/makefiles/defs.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/defs.make	Sat Oct 20 00:08:35 2012 -0700
@@ -59,6 +59,18 @@
   endif
 endif
 
+# On 32 bit solaris we build server and client, on 64 bit just server.
+ifeq ($(JVM_VARIANTS),)
+  ifeq ($(ARCH_DATA_MODEL), 32)
+    JVM_VARIANTS:=client,server
+    JVM_VARIANT_CLIENT:=true
+    JVM_VARIANT_SERVER:=true
+  else
+    JVM_VARIANTS:=server
+    JVM_VARIANT_SERVER:=true
+  endif
+endif
+
 # determine if HotSpot is being built in JDK6 or earlier version
 JDK6_OR_EARLIER=0
 ifeq "$(shell expr \( '$(JDK_MAJOR_VERSION)' != '' \& '$(JDK_MINOR_VERSION)' != '' \& '$(JDK_MICRO_VERSION)' != '' \))" "1"
@@ -172,58 +184,65 @@
   endif
 endif
 
+EXPORT_LIST += $(EXPORT_JRE_LIB_DIR)/wb.jar
+
 EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
 EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
 
-ifneq ($(BUILD_CLIENT_ONLY),true)
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.$(LIBRARY_SUFFIX)
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.$(LIBRARY_SUFFIX)
+ifeq ($(JVM_VARIANT_SERVER),true)
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.$(LIBRARY_SUFFIX)
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.$(LIBRARY_SUFFIX)
+  ifeq ($(ARCH_DATA_MODEL),32)
+    EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.$(LIBRARY_SUFFIX)
+    EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.$(LIBRARY_SUFFIX)
+  endif
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
     ifeq ($(ZIP_DEBUGINFO_FILES),1)
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.diz
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.diz
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.diz
+      ifeq ($(ARCH_DATA_MODEL),32)
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.diz
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.diz
+      endif
     else
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.debuginfo
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.debuginfo
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.debuginfo
+      ifeq ($(ARCH_DATA_MODEL),32)
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.debuginfo
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.debuginfo
+      endif
     endif
   endif
 endif
-ifeq ($(ARCH_DATA_MODEL), 32)
+ifeq ($(JVM_VARIANT_CLIENT),true)
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX) 
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm_db.$(LIBRARY_SUFFIX) 
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm_dtrace.$(LIBRARY_SUFFIX)
-  EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_db.$(LIBRARY_SUFFIX)
-  EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_dtrace.$(LIBRARY_SUFFIX)
+  ifeq ($(ARCH_DATA_MODEL),32)
+    EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_db.$(LIBRARY_SUFFIX)
+    EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_dtrace.$(LIBRARY_SUFFIX)
+  endif
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
     ifeq ($(ZIP_DEBUGINFO_FILES),1)
       EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.diz
       EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm_db.diz
       EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm_dtrace.diz
-      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_db.diz
-      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_dtrace.diz
+      ifeq ($(ARCH_DATA_MODEL),32)
+        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_db.diz
+        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_dtrace.diz
+      endif
     else
       EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.debuginfo
       EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm_db.debuginfo
       EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm_dtrace.debuginfo
-      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_db.debuginfo
-      EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_dtrace.debuginfo
-    endif
-  endif
-  ifneq ($(BUILD_CLIENT_ONLY), true)
-    EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.$(LIBRARY_SUFFIX)
-    EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.$(LIBRARY_SUFFIX)
-    ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-      ifeq ($(ZIP_DEBUGINFO_FILES),1)
-        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.diz
-        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.diz
-      else
-        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.debuginfo
-        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.debuginfo
+      ifeq ($(ARCH_DATA_MODEL),32)
+        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_db.debuginfo
+        EXPORT_LIST += $(EXPORT_CLIENT_DIR)/64/libjvm_dtrace.debuginfo
       endif
     endif
   endif
--- a/make/solaris/makefiles/fastdebug.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/fastdebug.make	Sat Oct 20 00:08:35 2012 -0700
@@ -36,6 +36,11 @@
 ifeq ("${Platform_compiler}", "sparcWorks")
 OPT_CFLAGS/SLOWER = -xO2
 
+ifeq ($(COMPILER_REV_NUMERIC), 510)
+# CC 5.10 has bug XXXXX with -xO4
+OPT_CFLAGS/jvmtiClassFileReconstituter.o = $(OPT_CFLAGS/SLOWER)
+endif # COMPILER_REV_NUMERIC == 510
+
 ifeq ($(COMPILER_REV_NUMERIC), 509)
 # To avoid jvm98 crash
 OPT_CFLAGS/instanceKlass.o = $(OPT_CFLAGS/SLOWER)
--- a/make/solaris/makefiles/gcc.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/gcc.make	Sat Oct 20 00:08:35 2012 -0700
@@ -25,9 +25,13 @@
 #------------------------------------------------------------------------
 # CC, CXX & AS
 
-CXX = g++
-CC  = gcc
-AS  = $(CC) -c
+# If a SPEC is not set already, then use these defaults.
+ifeq ($(SPEC),)
+  CXX = g++
+  CC  = gcc
+  AS  = $(CC) -c
+  MCS = /usr/ccs/bin/mcs
+endif
 
 Compiler = gcc
 
@@ -137,7 +141,7 @@
 
 # Flags for generating make dependency flags.
 ifneq ("${CC_VER_MAJOR}", "2")
-DEPFLAGS = -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d)
+DEPFLAGS = -fpch-deps -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d)
 endif
 
 # -DDONT_USE_PRECOMPILED_HEADER will exclude all includes in precompiled.hpp.
@@ -193,5 +197,3 @@
 ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),) 
 DEBUG_CFLAGS += -gstabs 
 endif 
-
-MCS = /usr/ccs/bin/mcs
--- a/make/solaris/makefiles/jvmg.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/jvmg.make	Sat Oct 20 00:08:35 2012 -0700
@@ -37,7 +37,8 @@
 endif
 endif
 
-CFLAGS += $(DEBUG_CFLAGS/BYFILE)
+# _NMT_NOINLINE_ informs NMT that no inlining by Compiler
+CFLAGS += $(DEBUG_CFLAGS/BYFILE) -D_NMT_NOINLINE_
 
 # Set the environment variable HOTSPARC_GENERIC to "true"
 # to inhibit the effect of the previous line on CFLAGS.
--- a/make/solaris/makefiles/optimized.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/optimized.make	Sat Oct 20 00:08:35 2012 -0700
@@ -32,6 +32,11 @@
 # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
 ifeq ("${Platform_compiler}", "sparcWorks")
 
+ifeq ($(COMPILER_REV_NUMERIC), 510)
+# CC 5.10 has bug XXXXX with -xO4
+OPT_CFLAGS/jvmtiClassFileReconstituter.o = $(OPT_CFLAGS/O2)
+endif # COMPILER_REV_NUMERIC == 510
+
 ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
 # dtrace cannot handle tail call optimization (6672627, 6693876)
 OPT_CFLAGS/jni.o = $(OPT_CFLAGS/DEFAULT) $(OPT_CCFLAGS/NO_TAIL_CALL_OPT)
--- a/make/solaris/makefiles/product.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/product.make	Sat Oct 20 00:08:35 2012 -0700
@@ -40,6 +40,11 @@
 # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
 ifeq ("${Platform_compiler}", "sparcWorks")
 
+ifeq ($(COMPILER_REV_NUMERIC), 510)
+# CC 5.10 has bug XXXXX with -xO4
+OPT_CFLAGS/jvmtiClassFileReconstituter.o = $(OPT_CFLAGS/O2)
+endif # COMPILER_REV_NUMERIC == 510
+
 ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
 # dtrace cannot handle tail call optimization (6672627, 6693876)
 OPT_CFLAGS/jni.o = $(OPT_CFLAGS/DEFAULT) $(OPT_CCFLAGS/NO_TAIL_CALL_OPT)
--- a/make/solaris/makefiles/sparcWorks.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/sparcWorks.make	Sat Oct 20 00:08:35 2012 -0700
@@ -22,18 +22,22 @@
 #  
 #
 
-# Compiler-specific flags for sparcworks.
-
-# tell make which C and C++ compilers to use
-CC	= cc
-CXX	= CC
+# If a SPEC is not set already, then use these defaults.
+ifeq ($(SPEC),)
+  # Compiler-specific flags for sparcworks.
+  CC	= cc
+  CXX	= CC
 
-# Note that this 'as' is an older version of the Sun Studio 'fbe', and will
-#   use the older style options. The 'fbe' options will match 'cc' and 'CC'.
-AS	= /usr/ccs/bin/as
+  # Note that this 'as' is an older version of the Sun Studio 'fbe', and will
+  #   use the older style options. The 'fbe' options will match 'cc' and 'CC'.
+  AS	= /usr/ccs/bin/as
 
-NM	= /usr/ccs/bin/nm
-NAWK    = /bin/nawk
+  NM    = /usr/ccs/bin/nm
+  NAWK  = /bin/nawk
+
+  MCS	= /usr/ccs/bin/mcs
+  STRIP	= /usr/ccs/bin/strip
+endif
 
 REORDER_FLAG = -xF
 
@@ -557,9 +561,6 @@
 #LINK_INTO = LIBJVM
 endif
 
-MCS	= /usr/ccs/bin/mcs
-STRIP	= /usr/ccs/bin/strip
-
 # Solaris platforms collect lots of redundant file-ident lines,
 # to the point of wasting a significant percentage of file space.
 # (The text is stored in ELF .comment sections, contributed by
--- a/make/solaris/makefiles/vm.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/solaris/makefiles/vm.make	Sat Oct 20 00:08:35 2012 -0700
@@ -348,9 +348,12 @@
 # Serviceability agent
 include $(MAKEFILES_DIR)/saproc.make
 
+# Whitebox testing API
+include $(MAKEFILES_DIR)/wb.make
+
 #----------------------------------------------------------------------
 
-build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(LIBJVM_DTRACE) $(BUILDLIBSAPROC) dtraceCheck
+build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(LIBJVM_DTRACE) $(BUILDLIBSAPROC) dtraceCheck $(WB_JAR)
 
 install: install_jvm install_jsig install_saproc
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/solaris/makefiles/wb.make	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+# Rules to build whitebox testing library, used by vm.make
+
+WB = wb
+
+WBSRCDIR = $(GAMMADIR)/src/share/tools/whitebox
+
+WB_JAR = $(GENERATED)/$(WB).jar
+
+WB_JAVA_SRCS = $(shell find $(WBSRCDIR) -name '*.java')
+WB_JAVA_CLASSDIR = $(GENERATED)/wb/classes
+
+WB_JAVA_CLASSES  = $(patsubst $(WBSRCDIR)/%,$(WB_JAVA_CLASSDIR)/%, \
+	$(patsubst %.java,%.class,$(WB_JAVA_SRCS)))
+
+$(WB_JAVA_CLASSDIR)/%.class: $(WBSRCDIR)/%.java $(WB_JAVA_CLASSDIR)
+	$(REMOTE) $(COMPILE.JAVAC) -sourcepath $(WBSRCDIR) -nowarn -d $(WB_JAVA_CLASSDIR) $<
+
+$(WB_JAR): $(WB_JAVA_CLASSES)
+	$(QUIETLY) $(REMOTE) $(RUN.JAR) cf $@ -C $(WB_JAVA_CLASSDIR)/ .
+
+$(WB_JAVA_CLASSDIR):
+	$(QUIETLY) mkdir -p $@
+
--- a/make/windows/build.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/build.make	Sat Oct 20 00:08:35 2012 -0700
@@ -297,6 +297,10 @@
 	@ echo BUILDARCH=$(BUILDARCH)         			>> $@
 	@ echo Platform_arch=$(Platform_arch)        		>> $@
 	@ echo Platform_arch_model=$(Platform_arch_model)	>> $@
+	@ echo CXX=$(CXX)					>> $@
+	@ echo LD=$(LD)						>> $@
+	@ echo MT=$(MT)						>> $@
+	@ echo RC=$(RC)						>> $@
 	@ sh $(WorkSpace)/make/windows/get_msc_ver.sh		>> $@
 	@ if "$(ENABLE_FULL_DEBUG_SYMBOLS)" NEQ "" echo ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS) >> $@
 	@ if "$(ZIP_DEBUGINFO_FILES)" NEQ "" echo ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES) >> $@
--- a/make/windows/create_obj_files.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/create_obj_files.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -80,6 +80,8 @@
   BASE_PATHS="${BASE_PATHS} ${ALTSRC}/share/vm/jfr"
 fi
 
+BASE_PATHS="${BASE_PATHS} ${COMMONSRC}/share/vm/prims/wbtestmethods"
+
 CORE_PATHS="${BASE_PATHS}"
 # shared is already in BASE_PATHS. Should add vm/memory but that one is also in BASE_PATHS.
 if [ -d "${ALTSRC}/share/vm/gc_implementation" ]; then
--- a/make/windows/makefiles/compile.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/compile.make	Sat Oct 20 00:08:35 2012 -0700
@@ -23,7 +23,9 @@
 #
 
 # Generic compiler settings
+!if "x$(CXX)" == "x"
 CXX=cl.exe
+!endif
 
 # CXX Flags: (these vary slightly from VC6->VS2003->VS2005 compilers)
 #   /nologo   Supress copyright message at every cl.exe startup
@@ -185,8 +187,10 @@
 LD_FLAGS = /manifest $(LD_FLAGS) $(BUFFEROVERFLOWLIB)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
+!if "x$(MT)" == "x"
 MT=mt.exe
 !endif
+!endif
 
 !if "$(COMPILER_NAME)" == "VS2008"
 PRODUCT_OPT_OPTION   = /O2 /Oy-
@@ -196,8 +200,10 @@
 LD_FLAGS = /manifest $(LD_FLAGS)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
+!if "x$(MT)" == "x"
 MT=mt.exe
 !endif
+!endif
 
 !if "$(COMPILER_NAME)" == "VS2010"
 PRODUCT_OPT_OPTION   = /O2 /Oy-
@@ -207,7 +213,9 @@
 LD_FLAGS = /manifest $(LD_FLAGS)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
+!if "x$(MT)" == "x"
 MT=mt.exe
+!endif
 !if "$(BUILDARCH)" == "i486"
 LD_FLAGS = /SAFESEH $(LD_FLAGS)
 !endif
@@ -227,7 +235,9 @@
 !endif
 
 # Generic linker settings
+!if "x$(LD)" == "x"
 LD=link.exe
+!endif
 LD_FLAGS= $(LD_FLAGS) kernel32.lib user32.lib gdi32.lib winspool.lib \
  comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib \
  uuid.lib Wsock32.lib winmm.lib /nologo /machine:$(MACHINE) /opt:REF \
@@ -242,7 +252,9 @@
 !endif
 
 # Resource compiler settings
+!if "x$(RC)" == "x"
 RC=rc.exe
+!endif
 RC_FLAGS=/D "HS_VER=$(HS_VER)" \
 	 /D "HS_DOTVER=$(HS_DOTVER)" \
 	 /D "HS_BUILD_ID=$(HS_BUILD_ID)" \
--- a/make/windows/makefiles/debug.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/debug.make	Sat Oct 20 00:08:35 2012 -0700
@@ -33,12 +33,13 @@
 BUILD_PCH_FILE=_build_pch_file.obj
 !endif
 
-default:: $(BUILD_PCH_FILE) $(AOUT) launcher checkAndBuildSA
+default:: $(BUILD_PCH_FILE) $(AOUT) launcher checkAndBuildSA wb
 
 !include ../local.make
 !include compile.make
 
-CXX_FLAGS=$(CXX_FLAGS) $(DEBUG_OPT_OPTION)
+# _NMT_NOINLINE_ informs NMT that no inlining by Compiler
+CXX_FLAGS=$(CXX_FLAGS) $(DEBUG_OPT_OPTION) /D "_NMT_NOINLINE_"
 
 !include $(WorkSpace)/make/windows/makefiles/vm.make
 !include local.make
@@ -71,3 +72,4 @@
 !include $(WorkSpace)/make/windows/makefiles/shared.make
 !include $(WorkSpace)/make/windows/makefiles/sa.make
 !include $(WorkSpace)/make/windows/makefiles/launcher.make
+!include $(WorkSpace)/make/windows/makefiles/wb.make
--- a/make/windows/makefiles/defs.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/defs.make	Sat Oct 20 00:08:35 2012 -0700
@@ -151,6 +151,19 @@
 MAKE_ARGS += RM="$(RM)"
 MAKE_ARGS += ZIPEXE=$(ZIPEXE)
 
+# On 32 bit windows we build server, client and kernel, on 64 bit just server.
+ifeq ($(JVM_VARIANTS),)
+  ifeq ($(ARCH_DATA_MODEL), 32)
+    JVM_VARIANTS:=client,server,kernel
+    JVM_VARIANT_CLIENT:=true
+    JVM_VARIANT_SERVER:=true
+    JVM_VARIANT_KERNEL:=true
+  else
+    JVM_VARIANTS:=server
+    JVM_VARIANT_SERVER:=true
+  endif
+endif
+
 JDK_INCLUDE_SUBDIR=win32
 
 # Library suffix
@@ -175,14 +188,22 @@
   MAKE_ARGS += JDK_BUILD_NUMBER=$(COOKED_BUILD_NUMBER)
 endif
 
-NMAKE= MAKEFLAGS= MFLAGS= nmake /NOLOGO
+NMAKE= MAKEFLAGS= MFLAGS= nmake -NOLOGO
+ifndef SYSTEM_UNAME
+  SYSTEM_UNAME := $(shell uname)
+  export SYSTEM_UNAME
+endif
 
 # Check for CYGWIN
-ifneq (,$(findstring CYGWIN,$(shell uname)))
+ifneq (,$(findstring CYGWIN,$(SYSTEM_UNAME)))
   USING_CYGWIN=true
 else
   USING_CYGWIN=false
 endif
+# Check for MinGW
+ifneq (,$(findstring MINGW,$(SYSTEM_UNAME)))
+  USING_MINGW=true
+endif
 # FIXUP: The subdirectory for a debug build is NOT the same on all platforms
 VM_DEBUG=debug
 
@@ -195,11 +216,16 @@
   ABS_BOOTDIR     := $(subst /,\\,$(shell /bin/cygpath -m -a "$(BOOTDIR)"))
   ABS_GAMMADIR    := $(subst /,\\,$(shell /bin/cygpath -m -a "$(GAMMADIR)"))
   ABS_OS_MAKEFILE := $(shell /bin/cygpath -m -a "$(HS_MAKE_DIR)/$(OSNAME)")/build.make
-else
-  ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
-  ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
-  ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
-  ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
+else ifeq ($(USING_MINGW), true)
+    ABS_OUTPUTDIR   := $(shell $(CD) $(OUTPUTDIR);$(PWD))
+    ABS_BOOTDIR     := $(shell $(CD) $(BOOTDIR);$(PWD))
+    ABS_GAMMADIR    := $(shell $(CD) $(GAMMADIR);$(PWD))
+    ABS_OS_MAKEFILE := $(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make
+  else
+    ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
+    ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
+    ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
+    ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
 endif
 
 # Disable building SA on windows until we are sure
@@ -221,18 +247,20 @@
 EXPORT_CLIENT_DIR = $(EXPORT_JRE_BIN_DIR)/client
 EXPORT_KERNEL_DIR = $(EXPORT_JRE_BIN_DIR)/kernel
 
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
-EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.$(LIBRARY_SUFFIX)
-ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-  ifeq ($(ZIP_DEBUGINFO_FILES),1)
-    EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.diz
-  else
-    EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.pdb
-    EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.map
+ifeq ($(JVM_VARIANT_SERVER),true)
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
+  EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.$(LIBRARY_SUFFIX)
+  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+    ifeq ($(ZIP_DEBUGINFO_FILES),1)
+      EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.diz
+    else
+      EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.pdb
+      EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.map
+    endif
   endif
+  EXPORT_LIST += $(EXPORT_LIB_DIR)/jvm.lib
 endif
-EXPORT_LIST += $(EXPORT_LIB_DIR)/jvm.lib
-ifeq ($(ARCH_DATA_MODEL), 32)
+ifeq ($(JVM_VARIANT_CLIENT),true)
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/jvm.$(LIBRARY_SUFFIX)
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
@@ -243,7 +271,8 @@
       EXPORT_LIST += $(EXPORT_CLIENT_DIR)/jvm.map
     endif
   endif
-  # kernel vm
+endif
+ifeq ($(JVM_VARIANT_KERNEL),true)
   EXPORT_LIST += $(EXPORT_KERNEL_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_KERNEL_DIR)/jvm.$(LIBRARY_SUFFIX)
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
@@ -256,6 +285,8 @@
   endif
 endif
 
+EXPORT_LIST += $(EXPORT_JRE_LIB_DIR)/wb.jar
+
 ifeq ($(BUILD_WIN_SA), 1)
   EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.$(LIBRARY_SUFFIX)
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
@@ -270,3 +301,19 @@
   # Must pass this down to nmake.
   MAKE_ARGS += BUILD_WIN_SA=1
 endif
+
+# Propagate compiler and tools paths from configure to nmake. 
+# Need to make sure they contain \\ and not /.
+ifneq ($(SPEC),)
+  ifeq ($(USING_CYGWIN), true)
+    MAKE_ARGS += CXX="$(subst /,\\,$(shell /bin/cygpath -s -m -a $(CXX)))"
+    MAKE_ARGS += LD="$(subst /,\\,$(shell /bin/cygpath -s -m -a $(LD)))"
+    MAKE_ARGS += RC="$(subst /,\\,$(shell /bin/cygpath -s -m -a $(RC)))"
+    MAKE_ARGS += MT="$(subst /,\\,$(shell /bin/cygpath -s -m -a $(MT)))"
+  else
+    MAKE_ARGS += CXX="$(subst /,\\,$(CXX))"
+    MAKE_ARGS += LD="$(subst /,\\,$(LD))"
+    MAKE_ARGS += RC="$(subst /,\\,$(RC))"
+    MAKE_ARGS += MT="$(subst /,\\,$(MT))"
+  endif
+endif
--- a/make/windows/makefiles/fastdebug.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/fastdebug.make	Sat Oct 20 00:08:35 2012 -0700
@@ -33,7 +33,7 @@
 BUILD_PCH_FILE=_build_pch_file.obj
 !endif
 
-default:: $(BUILD_PCH_FILE) $(AOUT) launcher checkAndBuildSA
+default:: $(BUILD_PCH_FILE) $(AOUT) launcher checkAndBuildSA wb
 
 !include ../local.make
 !include compile.make
@@ -71,3 +71,4 @@
 !include $(WorkSpace)/make/windows/makefiles/shared.make
 !include $(WorkSpace)/make/windows/makefiles/sa.make
 !include $(WorkSpace)/make/windows/makefiles/launcher.make
+!include $(WorkSpace)/make/windows/makefiles/wb.make
--- a/make/windows/makefiles/product.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/product.make	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,7 @@
 BUILD_PCH_FILE=_build_pch_file.obj
 !endif
 
-default:: $(BUILD_PCH_FILE) $(AOUT) launcher checkAndBuildSA
+default:: $(BUILD_PCH_FILE) $(AOUT) launcher checkAndBuildSA wb
 
 !include ../local.make
 !include compile.make
@@ -82,3 +82,4 @@
 !include $(WorkSpace)/make/windows/makefiles/shared.make
 !include $(WorkSpace)/make/windows/makefiles/sa.make
 !include $(WorkSpace)/make/windows/makefiles/launcher.make
+!include $(WorkSpace)/make/windows/makefiles/wb.make
--- a/make/windows/makefiles/projectcreator.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/projectcreator.make	Sat Oct 20 00:08:35 2012 -0700
@@ -51,6 +51,7 @@
         -relativeInclude src\closed\cpu\$(Platform_arch)\vm \
         -relativeInclude src\share\vm \
         -relativeInclude src\share\vm\precompiled \
+        -relativeInclude src\share\vm\prims\wbtestmethods \
         -relativeInclude src\share\vm\prims \
         -relativeInclude src\os\windows\vm \
         -relativeInclude src\os_cpu\windows_$(Platform_arch)\vm \
--- a/make/windows/makefiles/rules.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/rules.make	Sat Oct 20 00:08:35 2012 -0700
@@ -23,14 +23,15 @@
 #
 
 # These are the commands used externally to compile and run.
-
+# The \ are used here for traditional Windows apps and " quoted to get
+# past the Unix-like shell:
 !ifdef BootStrapDir
-RUN_JAVA=$(BootStrapDir)\bin\java
-RUN_JAVAP=$(BootStrapDir)\bin\javap
-RUN_JAVAH=$(BootStrapDir)\bin\javah
-RUN_JAR=$(BootStrapDir)\bin\jar
-COMPILE_JAVAC=$(BootStrapDir)\bin\javac $(BOOTSTRAP_JAVAC_FLAGS)
-COMPILE_RMIC=$(BootStrapDir)\bin\rmic
+RUN_JAVA="$(BootStrapDir)\bin\java"
+RUN_JAVAP="$(BootStrapDir)\bin\javap"
+RUN_JAVAH="$(BootStrapDir)\bin\javah"
+RUN_JAR="$(BootStrapDir)\bin\jar"
+COMPILE_JAVAC="$(BootStrapDir)\bin\javac" $(BOOTSTRAP_JAVAC_FLAGS)
+COMPILE_RMIC="$(BootStrapDir)\bin\rmic"
 BOOT_JAVA_HOME=$(BootStrapDir)
 !else
 RUN_JAVA=java
--- a/make/windows/makefiles/sa.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/sa.make	Sat Oct 20 00:08:35 2012 -0700
@@ -36,37 +36,37 @@
 !include $(WorkSpace)/make/windows/makefiles/rules.make
 !include $(WorkSpace)/make/sa.files
 
-GENERATED = ..\generated
+GENERATED = ../generated
 
 # tools.jar is needed by the JDI - SA binding
-SA_CLASSPATH = $(BOOT_JAVA_HOME)\lib\tools.jar
+SA_CLASSPATH = $(BOOT_JAVA_HOME)/lib/tools.jar
 
-SA_CLASSDIR = $(GENERATED)\saclasses
+SA_CLASSDIR = $(GENERATED)/saclasses
 
 SA_BUILD_VERSION_PROP = sun.jvm.hotspot.runtime.VM.saBuildVersion=$(SA_BUILD_VERSION)
 
-SA_PROPERTIES = $(SA_CLASSDIR)\sa.properties
+SA_PROPERTIES = $(SA_CLASSDIR)/sa.properties
 
-default::  $(GENERATED)\sa-jdi.jar
+default::  $(GENERATED)/sa-jdi.jar
 
 # Remove the space between $(SA_BUILD_VERSION_PROP) and > below as it adds a white space
 # at the end of SA version string and causes a version mismatch with the target VM version.
 
-$(GENERATED)\sa-jdi.jar: $(AGENT_FILES:/=\)
-	@if not exist $(SA_CLASSDIR) mkdir $(SA_CLASSDIR)
-	@echo ...Building sa-jdi.jar
+$(GENERATED)/sa-jdi.jar: $(AGENT_FILES)
+	$(QUIETLY) mkdir -p $(SA_CLASSDIR)
+	@echo ...Building sa-jdi.jar into $(SA_CLASSDIR)
 	@echo ...$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -d $(SA_CLASSDIR) ....
-	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES:/=\)
+	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES)
 	$(COMPILE_RMIC) -classpath $(SA_CLASSDIR) -d $(SA_CLASSDIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
 	$(QUIETLY) echo $(SA_BUILD_VERSION_PROP)> $(SA_PROPERTIES)
 	$(QUIETLY) rm -f $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql/sa.js
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql
 	$(QUIETLY) rm -rf $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
-	$(QUIETLY) mkdir $(SA_CLASSDIR)\sun\jvm\hotspot\ui\resources
+	$(QUIETLY) mkdir $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/ui/resources/*.png $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp -r $(AGENT_SRC_DIR)/images/* $(SA_CLASSDIR)
 	$(RUN_JAR) cf $@ -C $(SA_CLASSDIR) .
-	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR:/=\) META-INF\services\com.sun.jdi.connect.Connector
+	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR) META-INF/services/com.sun.jdi.connect.Connector
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.windbg.WindbgDebuggerLocal
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext 
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.ia64.IA64ThreadContext 
@@ -85,27 +85,27 @@
 # will be useful to have the assertion checks in place
 
 !if "$(BUILDARCH)" == "ia64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !elseif "$(BUILDARCH)" == "amd64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
 SA_LD_FLAGS = bufferoverflowU.lib
 !endif
 !else
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 -Gm $(GX_OPTION) -Od -D "WIN32" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -GZ -c
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_CFLAGS = $(SA_CFLAGS) /ZI
+SA_CFLAGS = $(SA_CFLAGS) -ZI
 !endif
 !endif
 !if "$(MT)" != ""
-SA_LD_FLAGS = /manifest $(SA_LD_FLAGS)
+SA_LD_FLAGS = -manifest $(SA_LD_FLAGS)
 !endif
 SASRCFILE = $(AGENT_DIR)/src/os/win32/windbg/sawindbg.cpp
-SA_LFLAGS = $(SA_LD_FLAGS) /nologo /subsystem:console /machine:$(MACHINE)
+SA_LFLAGS = $(SA_LD_FLAGS) -nologo -subsystem:console -machine:$(MACHINE)
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_LFLAGS = $(SA_LFLAGS) /map /debug
+SA_LFLAGS = $(SA_LFLAGS) -map -debug
 !endif
 
 # Note that we do not keep sawindbj.obj around as it would then
@@ -117,15 +117,15 @@
 $(SAWINDBG): $(SASRCFILE)
 	set INCLUDE=$(SA_INCLUDE)$(INCLUDE)
 	$(CXX) @<<
-	  /I"$(BootStrapDir)/include" /I"$(BootStrapDir)/include/win32" 
-	  /I"$(GENERATED)" $(SA_CFLAGS)
+	  -I"$(BootStrapDir)/include" -I"$(BootStrapDir)/include/win32" 
+	  -I"$(GENERATED)" $(SA_CFLAGS)
 	  $(SASRCFILE)
-	  /out:$*.obj
+	  -out:$*.obj
 <<
 	set LIB=$(SA_LIB)$(LIB)
-	$(LD) /out:$@ /DLL $*.obj dbgeng.lib $(SA_LFLAGS)
+	$(LD) -out:$@ -DLL $*.obj dbgeng.lib $(SA_LFLAGS)
 !if "$(MT)" != ""
-	$(MT) /manifest $(@F).manifest /outputresource:$(@F);#2
+	$(MT) -manifest $(@F).manifest -outputresource:$(@F);#2
 !endif
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
 !if "$(ZIP_DEBUGINFO_FILES)" == "1"
@@ -136,6 +136,6 @@
 	-@rm -f $*.obj
 
 cleanall :
-	rm -rf $(GENERATED:\=/)/saclasses
-	rm -rf $(GENERATED:\=/)/sa-jdi.jar
+	rm -rf $(GENERATED)/saclasses
+	rm -rf $(GENERATED)/sa-jdi.jar
 !endif
--- a/make/windows/makefiles/shared.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/shared.make	Sat Oct 20 00:08:35 2012 -0700
@@ -36,11 +36,12 @@
 
 
 !ifdef SUBDIRS
+# \ is used below because $(MAKE) is nmake here, which expects Windows paths
 $(SUBDIRS): FORCE
 	@if not exist $@ mkdir $@
-	@if not exist $@\local.make echo # Empty > $@\local.make
-	@echo nmake $(ACTION) in $(DIR)\$@
-	cd $@ && $(MAKE) /NOLOGO /f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
+	@if not exist $@/local.make echo # Empty > $@/local.make
+	@echo nmake $(ACTION) in $(DIR)/$@
+	cd $@ && $(MAKE) -NOLOGO -f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
 !endif
 
 # Creates the needed directory
--- a/make/windows/makefiles/vm.make	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/makefiles/vm.make	Sat Oct 20 00:08:35 2012 -0700
@@ -172,6 +172,7 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/oops
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/prims
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/prims/wbtestmethods
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/runtime
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/services
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/trace
@@ -269,6 +270,9 @@
 {$(COMMONSRC)\share\vm\prims}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
+{$(COMMONSRC)\share\vm\prims\wbtestmethods}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(COMMONSRC)\share\vm\runtime}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
@@ -349,6 +353,9 @@
 {$(ALTSRC)\share\vm\prims}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
+{$(ALTSRC)\share\vm\prims\wbtestmethods}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(ALTSRC)\share\vm\runtime}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/windows/makefiles/wb.make	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,54 @@
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# This makefile is used to build the whitebox testing lib
+# and compile the tests which use it
+
+!include $(WorkSpace)/make/windows/makefiles/rules.make
+
+WBSRCDIR = $(WorkSpace)/src/share/tools/whitebox
+
+# turn GENERATED into a windows path to get sane dependencies
+WB_CLASSES=$(GENERATED:/=\)\wb\classes
+WB_JAR=$(GENERATED:/=\)\wb.jar
+
+# call recursive make to do wildcard expansion
+.SUFFIXES : .java .class
+wb_java_srcs: $(WorkSpace)\src\share\tools\whitebox\sun\hotspot\*.java $(WB_CLASSES)
+	$(MAKE) -f $(WorkSpace)\make\windows\makefiles\$(BUILD_FLAVOR).make $(**:.java=.class)
+
+
+{$(WorkSpace)\src\share\tools\whitebox\sun\hotspot}.java.class::
+	$(COMPILE_JAVAC) -sourcepath $(WBSRCDIR) -d $(WB_CLASSES) $<
+
+$(WB_JAR): wb_java_srcs
+	$(RUN_JAR) cf $@ -C $(WB_CLASSES) .
+
+# turn $@ to a unix path because mkdir in PATH is cygwin/mks mkdir
+$(WB_CLASSES):
+	mkdir -p $(@:\=/)
+
+# main target to build wb
+wb: $(WB_JAR)
+
--- a/make/windows/projectfiles/common/Makefile	Wed Sep 26 21:45:41 2012 -0700
+++ b/make/windows/projectfiles/common/Makefile	Sat Oct 20 00:08:35 2012 -0700
@@ -108,7 +108,7 @@
       -define              HOTSPOT_VM_DISTRO=\\\"$(HOTSPOT_VM_DISTRO)\\\"
 
 $(HOTSPOTBUILDSPACE)/$(ProjectFile): $(HOTSPOTBUILDSPACE)/classes/ProjectCreator.class
-	@$(RUN_JAVA) -Djava.class.path=$(HOTSPOTBUILDSPACE)/classes ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
+	@$(RUN_JAVA) -Djava.class.path="$(HOTSPOTBUILDSPACE)/classes" ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
 
 clean:
 	@rm -rf $(HOTSPOTBUILDSPACE)/classes
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,8 +44,10 @@
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
 #else
 #define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
 #endif
 
 // Convert the raw encoding form into the form expected by the
@@ -992,7 +994,7 @@
   save_frame(0);                // to avoid clobbering O0
   ld_ptr(pc_addr, L0);
   br_null_short(L0, Assembler::pt, PcOk);
-  stop("last_Java_pc not zeroed before leaving Java");
+  STOP("last_Java_pc not zeroed before leaving Java");
   bind(PcOk);
 
   // Verify that flags was zeroed on return to Java
@@ -1001,7 +1003,7 @@
   tst(L0);
   br(Assembler::zero, false, Assembler::pt, FlagsOk);
   delayed() -> restore();
-  stop("flags not zeroed before leaving Java");
+  STOP("flags not zeroed before leaving Java");
   bind(FlagsOk);
 #endif /* ASSERT */
   //
@@ -1021,7 +1023,7 @@
   andcc(last_java_sp, 0x01, G0);
   br(Assembler::notZero, false, Assembler::pt, StackOk);
   delayed()->nop();
-  stop("Stack Not Biased in set_last_Java_frame");
+  STOP("Stack Not Biased in set_last_Java_frame");
   bind(StackOk);
 #endif // ASSERT
   assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
@@ -1650,23 +1652,28 @@
 
 
 void RegistersForDebugging::print(outputStream* s) {
+  FlagSetting fs(Debugging, true);
   int j;
-  for ( j = 0;  j < 8;  ++j )
-    if ( j != 6 ) s->print_cr("i%d = 0x%.16lx", j, i[j]);
-    else          s->print_cr( "fp = 0x%.16lx",    i[j]);
-  s->cr();
-
-  for ( j = 0;  j < 8;  ++j )
-    s->print_cr("l%d = 0x%.16lx", j, l[j]);
+  for (j = 0; j < 8; ++j) {
+    if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); }
+    else        { s->print( "fp = "   ); os::print_location(s, i[j]); }
+  }
   s->cr();
 
-  for ( j = 0;  j < 8;  ++j )
-    if ( j != 6 ) s->print_cr("o%d = 0x%.16lx", j, o[j]);
-    else          s->print_cr( "sp = 0x%.16lx",    o[j]);
+  for (j = 0;  j < 8;  ++j) {
+    s->print("l%d = ", j); os::print_location(s, l[j]);
+  }
   s->cr();
 
-  for ( j = 0;  j < 8;  ++j )
-    s->print_cr("g%d = 0x%.16lx", j, g[j]);
+  for (j = 0; j < 8; ++j) {
+    if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); }
+    else        { s->print( "sp = "   ); os::print_location(s, o[j]); }
+  }
+  s->cr();
+
+  for (j = 0; j < 8; ++j) {
+    s->print("g%d = ", j); os::print_location(s, g[j]);
+  }
   s->cr();
 
   // print out floats with compression
@@ -2020,8 +2027,8 @@
   char* b = new char[1024];
   sprintf(b, "untested: %s", what);
 
-  if ( ShowMessageBoxOnError )   stop(b);
-  else                           warn(b);
+  if (ShowMessageBoxOnError) { STOP(b); }
+  else                       { warn(b); }
 }
 
 
@@ -2998,26 +3005,60 @@
 }
 
 
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg());
+  Register sethi_temp = method_result;
+  const int base = (instanceKlass::vtable_start_offset() * wordSize +
+                    // method pointer offset within the vtable entry:
+                    vtableEntry::method_offset_in_bytes());
+  RegisterOrConstant vtable_offset = vtable_index;
+  // Each of the following three lines potentially generates an instruction.
+  // But the total number of address formation instructions will always be
+  // at most two, and will often be zero.  In any case, it will be optimal.
+  // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x).
+  // If vtable_index is a constant, we will have at most (set B+X<<N,t; ld_ptr k,t).
+  vtable_offset = regcon_sll_ptr(vtable_index, exact_log2(vtableEntry::size() * wordSize), vtable_offset);
+  vtable_offset = regcon_inc_ptr(vtable_offset, base, vtable_offset, sethi_temp);
+  Address vtable_entry_addr(recv_klass, ensure_simm13_or_reg(vtable_offset, sethi_temp));
+  ld_ptr(vtable_entry_addr, method_result);
+}
+
+
 void MacroAssembler::check_klass_subtype(Register sub_klass,
                                          Register super_klass,
                                          Register temp_reg,
                                          Register temp2_reg,
                                          Label& L_success) {
-  Label L_failure, L_pop_to_failure;
-  check_klass_subtype_fast_path(sub_klass, super_klass,
-                                temp_reg, temp2_reg,
-                                &L_success, &L_failure, NULL);
   Register sub_2 = sub_klass;
   Register sup_2 = super_klass;
   if (!sub_2->is_global())  sub_2 = L0;
   if (!sup_2->is_global())  sup_2 = L1;
-
-  save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+  bool did_save = false;
+  if (temp_reg == noreg || temp2_reg == noreg) {
+    temp_reg = L2;
+    temp2_reg = L3;
+    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+    sub_klass = sub_2;
+    super_klass = sup_2;
+    did_save = true;
+  }
+  Label L_failure, L_pop_to_failure, L_pop_to_success;
+  check_klass_subtype_fast_path(sub_klass, super_klass,
+                                temp_reg, temp2_reg,
+                                (did_save ? &L_pop_to_success : &L_success),
+                                (did_save ? &L_pop_to_failure : &L_failure), NULL);
+
+  if (!did_save)
+    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
   check_klass_subtype_slow_path(sub_2, sup_2,
                                 L2, L3, L4, L5,
                                 NULL, &L_pop_to_failure);
 
   // on success:
+  bind(L_pop_to_success);
   restore();
   ba_short(L_success);
 
@@ -3234,54 +3275,6 @@
 }
 
 
-void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                              Register temp_reg,
-                                              Label& wrong_method_type) {
-  assert_different_registers(mtype_reg, mh_reg, temp_reg);
-  // compare method type against that of the receiver
-  RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
-  load_heap_oop(mh_reg, mhtype_offset, temp_reg);
-  cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
-}
-
-
-// A method handle has a "vmslots" field which gives the size of its
-// argument list in JVM stack slots.  This field is either located directly
-// in every method handle, or else is indirectly accessed through the
-// method handle's MethodType.  This macro hides the distinction.
-void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                                Register temp_reg) {
-  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
-  // load mh.type.form.vmslots
-  Register temp2_reg = vmslots_reg;
-  load_heap_oop(Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)),      temp2_reg);
-  load_heap_oop(Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)),        temp2_reg);
-  ld(           Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
-}
-
-
-void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg, bool emit_delayed_nop) {
-  assert(mh_reg == G3_method_handle, "caller must put MH object in G3");
-  assert_different_registers(mh_reg, temp_reg);
-
-  // pick out the interpreted side of the handler
-  // NOTE: vmentry is not an oop!
-  ld_ptr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg), temp_reg);
-
-  // off we go...
-  ld_ptr(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes(), temp_reg);
-  jmp(temp_reg, 0);
-
-  // for the various stubs which take control at this point,
-  // see MethodHandles::generate_method_handle_stub
-
-  // Some callers can fill the delay slot.
-  if (emit_delayed_nop) {
-    delayed()->nop();
-  }
-}
-
-
 RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
                                                    Register temp_reg,
                                                    int extra_slot_offset) {
@@ -3914,7 +3907,7 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
     or3(t1, t2, t3);
     cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
-    stop("assert(top >= start)");
+    STOP("assert(top >= start)");
     should_not_reach_here();
 
     bind(next);
@@ -3922,13 +3915,13 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
     or3(t3, t2, t3);
     cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
-    stop("assert(top <= end)");
+    STOP("assert(top <= end)");
     should_not_reach_here();
 
     bind(next2);
     and3(t3, MinObjAlignmentInBytesMask, t3);
     cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
-    stop("assert(aligned)");
+    STOP("assert(aligned)");
     should_not_reach_here();
 
     bind(ok);
@@ -3976,7 +3969,7 @@
       btst(MinObjAlignmentInBytesMask, obj);
       br(Assembler::zero, false, Assembler::pt, L);
       delayed()->nop();
-      stop("eden top is not properly aligned");
+      STOP("eden top is not properly aligned");
       bind(L);
     }
 #endif // ASSERT
@@ -4013,7 +4006,7 @@
       btst(MinObjAlignmentInBytesMask, top_addr);
       br(Assembler::zero, false, Assembler::pt, L);
       delayed()->nop();
-      stop("eden top is not properly aligned");
+      STOP("eden top is not properly aligned");
       bind(L);
     }
 #endif // ASSERT
@@ -4066,7 +4059,7 @@
     btst(MinObjAlignmentInBytesMask, free);
     br(Assembler::zero, false, Assembler::pt, L);
     delayed()->nop();
-    stop("updated TLAB free is not properly aligned");
+    STOP("updated TLAB free is not properly aligned");
     bind(L);
   }
 #endif // ASSERT
@@ -4164,7 +4157,7 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
     sll_ptr(t2, LogHeapWordSize, t2);
     cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
-    stop("assert(t1 == tlab_size)");
+    STOP("assert(t1 == tlab_size)");
     should_not_reach_here();
 
     bind(ok);
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -2221,7 +2221,7 @@
   // traps as per trap.h (SPARC ABI?)
 
   void breakpoint_trap();
-  void breakpoint_trap(Condition c, CC cc = icc);
+  void breakpoint_trap(Condition c, CC cc);
   void flush_windows_trap();
   void clean_windows_trap();
   void get_psr_trap();
@@ -2538,6 +2538,11 @@
                                Register temp_reg, Register temp2_reg,
                                Label& no_such_interface);
 
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
   // Test sub_klass against super_klass, with fast and slow paths.
 
   // The fast path produces a tri-state answer: yes / no / maybe-slow.
@@ -2577,12 +2582,6 @@
                            Label& L_success);
 
   // method handles (JSR 292)
-  void check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                Register temp_reg,
-                                Label& wrong_method_type);
-  void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                  Register temp_reg);
-  void jump_to_method_handle_entry(Register mh_reg, Register temp_reg, bool emit_delayed_nop = true);
   // offset relative to Gargs of argument at tos[arg_slot].
   // (arg_slot == 0 means the last argument, not the first).
   RegisterOrConstant argument_offset(RegisterOrConstant arg_slot,
@@ -2590,7 +2589,7 @@
                                      int extra_slot_offset = 0);
   // Address of Gargs and argument_offset.
   Address            argument_address(RegisterOrConstant arg_slot,
-                                      Register temp_reg,
+                                      Register temp_reg = noreg,
                                       int extra_slot_offset = 0);
 
   // Stack overflow checking
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -435,85 +435,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    if (__ is_in_wdisp16_range(_continuation)) {
-      __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation);
-    } else {
-      __ cmp(src_reg, G0);
-      __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-    }
-    __ delayed()->nop();
-  }
-
-  // Generate src->_klass->_reference_type() == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(src_reg, tmp_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ ldub(ref_type_adr, tmp_reg);
-
-  // _reference_type field is of type ReferenceType (enum)
-  assert(REF_NONE == 0, "check this code");
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ ld(in_progress, tmp_reg);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ ldsb(in_progress, tmp_reg);
-  }
-
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation);
-  } else {
-    __ cmp(val_reg, G0);
-    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-  }
-  __ delayed()->nop();
-
-  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
-  __ delayed()->mov(val_reg, G4);
-  __ br(Assembler::always, false, Assembler::pt, _continuation);
-  __ delayed()->nop();
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -2956,6 +2956,7 @@
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
   int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
 
   // Update counter for all call types
   ciMethodData* md = method->method_data_or_null();
@@ -2984,9 +2985,11 @@
 
   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
   Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
   // Perform additional virtual call profiling for invokevirtual and
   // invokeinterface bytecodes
   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // required for optimized MH invokes
       C1ProfileVirtualCalls) {
     assert(op->recv()->is_single_cpu(), "recv must be allocated");
     Register recv = op->recv()->as_register();
--- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -644,30 +644,6 @@
 }
 
 
-void LIRGenerator::do_AttemptUpdate(Intrinsic* x) {
-  assert(x->number_of_arguments() == 3, "wrong type");
-  LIRItem obj       (x->argument_at(0), this);  // AtomicLong object
-  LIRItem cmp_value (x->argument_at(1), this);  // value to compare with field
-  LIRItem new_value (x->argument_at(2), this);  // replace field with new_value if it matches cmp_value
-
-  obj.load_item();
-  cmp_value.load_item();
-  new_value.load_item();
-
-  // generate compare-and-swap and produce zero condition if swap occurs
-  int value_offset = sun_misc_AtomicLongCSImpl::value_offset();
-  LIR_Opr addr = FrameMap::O7_opr;
-  __ add(obj.result(), LIR_OprFact::intConst(value_offset), addr);
-  LIR_Opr t1 = FrameMap::G1_opr;  // temp for 64-bit value
-  LIR_Opr t2 = FrameMap::G3_opr;  // temp for 64-bit value
-  __ cas_long(addr, cmp_value.result(), new_value.result(), t1, t2);
-
-  // generate conditional move of boolean result
-  LIR_Opr result = rlock_result(x);
-  __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result, T_LONG);
-}
-
-
 void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
   assert(x->number_of_arguments() == 4, "wrong type");
   LIRItem obj   (x->argument_at(0), this);  // object
@@ -738,7 +714,8 @@
     case vmIntrinsics::_dlog: // fall through
     case vmIntrinsics::_dsin: // fall through
     case vmIntrinsics::_dtan: // fall through
-    case vmIntrinsics::_dcos: {
+    case vmIntrinsics::_dcos: // fall through
+    case vmIntrinsics::_dexp: {
       assert(x->number_of_arguments() == 1, "wrong type");
 
       address runtime_entry = NULL;
@@ -758,12 +735,23 @@
       case vmIntrinsics::_dlog10:
         runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
         break;
+      case vmIntrinsics::_dexp:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+        break;
       default:
         ShouldNotReachHere();
       }
 
       LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
       set_result(x, result);
+      break;
+    }
+    case vmIntrinsics::_dpow: {
+      assert(x->number_of_arguments() == 2, "wrong type");
+      address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+      LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
     }
   }
 }
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -490,7 +490,8 @@
                       ConstantPoolCacheEntry::size()) * BytesPerWord), G1_scratch);
 
     // get constant pool cache
-    __ ld_ptr(G5_method, in_bytes(methodOopDesc::constants_offset()), G3_scratch);
+    __ ld_ptr(G5_method, in_bytes(methodOopDesc::const_offset()), G3_scratch);
+    __ ld_ptr(G3_scratch, in_bytes(constMethodOopDesc::constants_offset()), G3_scratch);
     __ ld_ptr(G3_scratch, constantPoolOopDesc::cache_offset_in_bytes(), G3_scratch);
 
     // get specific constant pool cache entry
@@ -514,9 +515,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Get the type from the constant pool cache
-    __ srl(G1_scratch, ConstantPoolCacheEntry::tosBits, G1_scratch);
-    // Make sure we don't need to mask G1_scratch for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ srl(G1_scratch, ConstantPoolCacheEntry::tos_state_shift, G1_scratch);
+    // Make sure we don't need to mask G1_scratch after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
     __ cmp(G1_scratch, atos );
     __ br(Assembler::equal, true, Assembler::pt, xreturn_path);
     __ delayed()->ld_ptr(Otos_i, G3_scratch, Otos_i);
@@ -768,7 +769,8 @@
     // for static methods insert the mirror argument
     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 
-    __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: constants_offset())), O1);
+    __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: const_offset())), O1);
+    __ ld_ptr(Address(O1, 0, in_bytes(constMethodOopDesc::constants_offset())), O1);
     __ ld_ptr(Address(O1, 0, constantPoolOopDesc::pool_holder_offset_in_bytes()), O1);
     __ ld_ptr(O1, mirror_offset, O1);
     // where the mirror handle body is allocated:
@@ -1047,7 +1049,7 @@
   assert_different_registers(state, prev_state);
   assert_different_registers(prev_state, G3_scratch);
   const Register Gtmp = G3_scratch;
-  const Address constants         (G5_method, 0, in_bytes(methodOopDesc::constants_offset()));
+  const Address constMethod       (G5_method, 0, in_bytes(methodOopDesc::const_offset()));
   const Address access_flags      (G5_method, 0, in_bytes(methodOopDesc::access_flags_offset()));
   const Address size_of_parameters(G5_method, 0, in_bytes(methodOopDesc::size_of_parameters_offset()));
   const Address max_stack         (G5_method, 0, in_bytes(methodOopDesc::max_stack_offset()));
@@ -1155,7 +1157,8 @@
   __ set((int) BytecodeInterpreter::method_entry, O1);
   __ st(O1, XXX_STATE(_msg));
 
-  __ ld_ptr(constants, O3);
+  __ ld_ptr(constMethod, O3);
+  __ ld_ptr(O3, in_bytes(constMethodOopDesc::constants_offset()), O3);
   __ ld_ptr(O3, constantPoolOopDesc::cache_offset_in_bytes(), O2);
   __ st_ptr(O2, XXX_STATE(_constants));
 
@@ -1178,7 +1181,8 @@
     __ ld_ptr(XXX_STATE(_locals), O1);
     __ br( Assembler::zero, true, Assembler::pt, got_obj);
     __ delayed()->ld_ptr(O1, 0, O1);                  // get receiver for not-static case
-    __ ld_ptr(constants, O1);
+    __ ld_ptr(constMethod, O1);
+    __ ld_ptr( O1, in_bytes(constMethodOopDesc::constants_offset()), O1);
     __ ld_ptr( O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1);
     // lock the mirror, not the klassOop
     __ ld_ptr( O1, mirror_offset, O1);
@@ -1187,7 +1191,7 @@
 
   #ifdef ASSERT
     __ tst(O1);
-    __ breakpoint_trap(Assembler::zero);
+    __ breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
   #endif // ASSERT
 
     const int entry_size            = frame::interpreter_frame_monitor_size() * wordSize;
@@ -1536,7 +1540,7 @@
   const Register Gtmp1 = G3_scratch;
   // const Register Lmirror = L1;     // native mirror (native calls only)
 
-  const Address constants         (G5_method, 0, in_bytes(methodOopDesc::constants_offset()));
+  const Address constMethod       (G5_method, 0, in_bytes(methodOopDesc::const_offset()));
   const Address access_flags      (G5_method, 0, in_bytes(methodOopDesc::access_flags_offset()));
   const Address size_of_parameters(G5_method, 0, in_bytes(methodOopDesc::size_of_parameters_offset()));
   const Address max_stack         (G5_method, 0, in_bytes(methodOopDesc::max_stack_offset()));
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -514,7 +514,6 @@
   // interpreted but its pc is in the code cache (for c1 -> osr_frame_return_id stub), so it must be
   // explicitly recognized.
 
-  if (is_ricochet_frame())    return sender_for_ricochet_frame(map);
 
   bool frame_is_interpreted = is_interpreted_frame();
   if (frame_is_interpreted) {
@@ -821,9 +820,7 @@
     values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
   }
 
-  if (is_ricochet_frame()) {
-    MethodHandles::RicochetFrame::describe(this, values, frame_no);
-  } else if (is_interpreted_frame()) {
+  if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_padding);
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -75,4 +75,43 @@
 
 // GC Ergo Flags
 define_pd_global(intx, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                            \
+  product(intx, UseVIS, 99,                                                 \
+          "Highest supported VIS instructions set on Sparc")                \
+                                                                            \
+  product(bool, UseCBCond, false,                                           \
+          "Use compare and branch instruction on SPARC")                    \
+                                                                            \
+  product(bool, UseBlockZeroing, false,                                     \
+          "Use special cpu instructions for block zeroing")                 \
+                                                                            \
+  product(intx, BlockZeroingLowLimit, 2048,                                 \
+          "Minimum size in bytes when block zeroing will be used")          \
+                                                                            \
+  product(bool, UseBlockCopy, false,                                        \
+          "Use special cpu instructions for block copy")                    \
+                                                                            \
+  product(intx, BlockCopyLowLimit, 2048,                                    \
+          "Minimum size in bytes when block copy will be used")             \
+                                                                            \
+  develop(bool, UseV8InstrsOnly, false,                                     \
+          "Use SPARC-V8 Compliant instruction subset")                      \
+                                                                            \
+  product(bool, UseNiagaraInstrs, false,                                    \
+          "Use Niagara-efficient instruction subset")                       \
+                                                                            \
+  develop(bool, UseCASForSwap, false,                                       \
+          "Do not use swap instructions, but only CAS (in a loop) on SPARC")\
+                                                                            \
+  product(uintx,  ArraycopySrcPrefetchDistance, 0,                          \
+          "Distance to prefetch source array in arracopy")                  \
+                                                                            \
+  product(uintx,  ArraycopyDstPrefetchDistance, 0,                          \
+          "Distance to prefetch destination array in arracopy")             \
+                                                                            \
+  develop(intx, V8AtomicOperationUnderLockSpinCount,    50,                 \
+          "Number of times to spin wait on a v8 atomic operation lock")     \
+
 #endif // CPU_SPARC_VM_GLOBALS_SPARC_HPP
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -505,7 +505,7 @@
 void InterpreterMacroAssembler::load_receiver(Register param_count,
                                               Register recv) {
   sll(param_count, Interpreter::logStackElementSize, param_count);
-  ld_ptr(Lesp, param_count, recv);                      // gets receiver Oop
+  ld_ptr(Lesp, param_count, recv);  // gets receiver oop
 }
 
 void InterpreterMacroAssembler::empty_expression_stack() {
@@ -767,8 +767,12 @@
   get_cache_and_index_at_bcp(cache, temp, bcp_offset, index_size);
   ld_ptr(cache, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset(), bytecode);
   const int shift_count = (1 + byte_no) * BitsPerByte;
-  srl( bytecode, shift_count, bytecode);
-  and3(bytecode,        0xFF, bytecode);
+  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
+         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
+         "correct shift count");
+  srl(bytecode, shift_count, bytecode);
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+  and3(bytecode, ConstantPoolCacheEntry::bytecode_1_mask, bytecode);
 }
 
 
@@ -934,8 +938,14 @@
 }
 
 
+void InterpreterMacroAssembler::get_const(Register Rdst) {
+  ld_ptr(Lmethod, in_bytes(methodOopDesc::const_offset()), Rdst);
+}
+
+
 void InterpreterMacroAssembler::get_constant_pool(Register Rdst) {
-  ld_ptr(Lmethod, in_bytes(methodOopDesc::constants_offset()), Rdst);
+  get_const(Rdst);
+  ld_ptr(Rdst, in_bytes(constMethodOopDesc::constants_offset()), Rdst);
 }
 
 
--- a/src/cpu/sparc/vm/interp_masm_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -205,6 +205,7 @@
   void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
   void index_check_without_pop(Register array, Register index, int index_shift, Register tmp, Register res);
 
+  void get_const(Register Rdst);
   void get_constant_pool(Register Rdst);
   void get_constant_pool_cache(Register Rdst);
   void get_cpool_and_tags(Register Rcpool, Register Rtags);
--- a/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,6 @@
   address generate_normal_entry(bool synchronized);
   address generate_native_entry(bool synchronized);
   address generate_abstract_entry(void);
-  address generate_method_handle_entry(void);
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -255,17 +255,6 @@
 }
 
 
-// Method handle invoker
-// Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
-address InterpreterGenerator::generate_method_handle_entry(void) {
-  if (!EnableInvokeDynamic) {
-    return generate_abstract_entry();
-  }
-
-  return MethodHandles::generate_method_handle_interpreter_entry(_masm);
-}
-
-
 //----------------------------------------------------------------------------------------------------
 // Entry points & stack frame layout
 //
@@ -395,7 +384,7 @@
     case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();        break;
     case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();     break;
     case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();     break;
-    case Interpreter::method_handle          : entry_point = ((InterpreterGenerator*)this)->generate_method_handle_entry(); break;
+
     case Interpreter::java_lang_math_sin     :                                                                             break;
     case Interpreter::java_lang_math_cos     :                                                                             break;
     case Interpreter::java_lang_math_tan     :                                                                             break;
@@ -403,9 +392,13 @@
     case Interpreter::java_lang_math_abs     :                                                                             break;
     case Interpreter::java_lang_math_log     :                                                                             break;
     case Interpreter::java_lang_math_log10   :                                                                             break;
+    case Interpreter::java_lang_math_pow     :                                                                             break;
+    case Interpreter::java_lang_math_exp     :                                                                             break;
     case Interpreter::java_lang_ref_reference_get
                                              : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
-    default                                  : ShouldNotReachHere();                                                       break;
+    default:
+      fatal(err_msg("unexpected method kind: %d", kind));
+      break;
   }
 
   if (entry_point) return entry_point;
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,452 +31,37 @@
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
 #else
 #define BLOCK_COMMENT(str) __ block_comment(str)
+#define STOP(error) block_comment(error); __ stop(error)
 #endif
 
 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 
-address MethodHandleEntry::start_compiled_entry(MacroAssembler* _masm,
-                                                address interpreted_entry) {
-  // Just before the actual machine code entry point, allocate space
-  // for a MethodHandleEntry::Data record, so that we can manage everything
-  // from one base pointer.
-  __ align(wordSize);
-  address target = __ pc() + sizeof(Data);
-  while (__ pc() < target) {
-    __ nop();
-    __ align(wordSize);
-  }
-
-  MethodHandleEntry* me = (MethodHandleEntry*) __ pc();
-  me->set_end_address(__ pc());         // set a temporary end_address
-  me->set_from_interpreted_entry(interpreted_entry);
-  me->set_type_checking_entry(NULL);
-
-  return (address) me;
-}
-
-MethodHandleEntry* MethodHandleEntry::finish_compiled_entry(MacroAssembler* _masm,
-                                                address start_addr) {
-  MethodHandleEntry* me = (MethodHandleEntry*) start_addr;
-  assert(me->end_address() == start_addr, "valid ME");
-
-  // Fill in the real end_address:
-  __ align(wordSize);
-  me->set_end_address(__ pc());
-
-  return me;
-}
-
-// stack walking support
-
-frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
-  //RicochetFrame* f = RicochetFrame::from_frame(fr);
-  // Cf. is_interpreted_frame path of frame::sender
-  intptr_t* younger_sp = fr.sp();
-  intptr_t* sp         = fr.sender_sp();
-  map->make_integer_regs_unsaved();
-  map->shift_window(sp, younger_sp);
-  bool this_frame_adjusted_stack = true;  // I5_savedSP is live in this RF
-  return frame(sp, younger_sp, this_frame_adjusted_stack);
-}
-
-void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map) {
-  ResourceMark rm;
-  RicochetFrame* f = RicochetFrame::from_frame(fr);
-
-  // pick up the argument type descriptor:
-  Thread* thread = Thread::current();
-  Handle cookie(thread, f->compute_saved_args_layout(true, true));
-
-  // process fixed part
-  blk->do_oop((oop*)f->saved_target_addr());
-  blk->do_oop((oop*)f->saved_args_layout_addr());
-
-  // process variable arguments:
-  if (cookie.is_null())  return;  // no arguments to describe
-
-  // the cookie is actually the invokeExact method for my target
-  // his argument signature is what I'm interested in
-  assert(cookie->is_method(), "");
-  methodHandle invoker(thread, methodOop(cookie()));
-  assert(invoker->name() == vmSymbols::invokeExact_name(), "must be this kind of method");
-  assert(!invoker->is_static(), "must have MH argument");
-  int slot_count = invoker->size_of_parameters();
-  assert(slot_count >= 1, "must include 'this'");
-  intptr_t* base = f->saved_args_base();
-  intptr_t* retval = NULL;
-  if (f->has_return_value_slot())
-    retval = f->return_value_slot_addr();
-  int slot_num = slot_count - 1;
-  intptr_t* loc = &base[slot_num];
-  //blk->do_oop((oop*) loc);   // original target, which is irrelevant
-  int arg_num = 0;
-  for (SignatureStream ss(invoker->signature()); !ss.is_done(); ss.next()) {
-    if (ss.at_return_type())  continue;
-    BasicType ptype = ss.type();
-    if (ptype == T_ARRAY)  ptype = T_OBJECT; // fold all refs to T_OBJECT
-    assert(ptype >= T_BOOLEAN && ptype <= T_OBJECT, "not array or void");
-    slot_num -= type2size[ptype];
-    loc = &base[slot_num];
-    bool is_oop = (ptype == T_OBJECT && loc != retval);
-    if (is_oop)  blk->do_oop((oop*)loc);
-    arg_num += 1;
-  }
-  assert(slot_num == 0, "must have processed all the arguments");
-}
-
-// Ricochet Frames
-const Register MethodHandles::RicochetFrame::L1_continuation      = L1;
-const Register MethodHandles::RicochetFrame::L2_saved_target      = L2;
-const Register MethodHandles::RicochetFrame::L3_saved_args_layout = L3;
-const Register MethodHandles::RicochetFrame::L4_saved_args_base   = L4; // cf. Gargs = G4
-const Register MethodHandles::RicochetFrame::L5_conversion        = L5;
-#ifdef ASSERT
-const Register MethodHandles::RicochetFrame::L0_magic_number_1    = L0;
-#endif //ASSERT
-
-oop MethodHandles::RicochetFrame::compute_saved_args_layout(bool read_cache, bool write_cache) {
-  if (read_cache) {
-    oop cookie = saved_args_layout();
-    if (cookie != NULL)  return cookie;
-  }
-  oop target = saved_target();
-  oop mtype  = java_lang_invoke_MethodHandle::type(target);
-  oop mtform = java_lang_invoke_MethodType::form(mtype);
-  oop cookie = java_lang_invoke_MethodTypeForm::vmlayout(mtform);
-  if (write_cache)  {
-    (*saved_args_layout_addr()) = cookie;
-  }
-  return cookie;
+// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
+static RegisterOrConstant constant(int value) {
+  return RegisterOrConstant(value);
 }
 
-void MethodHandles::RicochetFrame::generate_ricochet_blob(MacroAssembler* _masm,
-                                                          // output params:
-                                                          int* bounce_offset,
-                                                          int* exception_offset,
-                                                          int* frame_size_in_words) {
-  (*frame_size_in_words) = RicochetFrame::frame_size_in_bytes() / wordSize;
-
-  address start = __ pc();
-
-#ifdef ASSERT
-  __ illtrap(0); __ illtrap(0); __ illtrap(0);
-  // here's a hint of something special:
-  __ set(MAGIC_NUMBER_1, G0);
-  __ set(MAGIC_NUMBER_2, G0);
-#endif //ASSERT
-  __ illtrap(0);  // not reached
-
-  // Return values are in registers.
-  // L1_continuation contains a cleanup continuation we must return
-  // to.
-
-  (*bounce_offset) = __ pc() - start;
-  BLOCK_COMMENT("ricochet_blob.bounce");
-
-  if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-  trace_method_handle(_masm, "return/ricochet_blob.bounce");
-
-  __ JMP(L1_continuation, 0);
-  __ delayed()->nop();
-  __ illtrap(0);
-
-  DEBUG_ONLY(__ set(MAGIC_NUMBER_2, G0));
-
-  (*exception_offset) = __ pc() - start;
-  BLOCK_COMMENT("ricochet_blob.exception");
-
-  // compare this to Interpreter::rethrow_exception_entry, which is parallel code
-  // for example, see TemplateInterpreterGenerator::generate_throw_exception
-  // Live registers in:
-  //   Oexception  (O0): exception
-  //   Oissuing_pc (O1): return address/pc that threw exception (ignored, always equal to bounce addr)
-  __ verify_oop(Oexception);
-
-  // Take down the frame.
-
-  // Cf. InterpreterMacroAssembler::remove_activation.
-  leave_ricochet_frame(_masm, /*recv_reg=*/ noreg, I5_savedSP, I7);
-
-  // We are done with this activation frame; find out where to go next.
-  // The continuation point will be an exception handler, which expects
-  // the following registers set up:
-  //
-  // Oexception: exception
-  // Oissuing_pc: the local call that threw exception
-  // Other On: garbage
-  // In/Ln:  the contents of the caller's register window
-  //
-  // We do the required restore at the last possible moment, because we
-  // need to preserve some state across a runtime call.
-  // (Remember that the caller activation is unknown--it might not be
-  // interpreted, so things like Lscratch are useless in the caller.)
-  __ mov(Oexception,  Oexception ->after_save());  // get exception in I0 so it will be on O0 after restore
-  __ add(I7, frame::pc_return_offset, Oissuing_pc->after_save());  // likewise set I1 to a value local to the caller
-  __ call_VM_leaf(L7_thread_cache,
-                  CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
-                  G2_thread, Oissuing_pc->after_save());
-
-  // The caller's SP was adjusted upon method entry to accomodate
-  // the callee's non-argument locals. Undo that adjustment.
-  __ JMP(O0, 0);                         // return exception handler in caller
-  __ delayed()->restore(I5_savedSP, G0, SP);
-
-  // (same old exception object is already in Oexception; see above)
-  // Note that an "issuing PC" is actually the next PC after the call
-}
-
-void MethodHandles::RicochetFrame::enter_ricochet_frame(MacroAssembler* _masm,
-                                                        Register recv_reg,
-                                                        Register argv_reg,
-                                                        address return_handler) {
-  // does not include the __ save()
-  assert(argv_reg == Gargs, "");
-  Address G3_mh_vmtarget(   recv_reg, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes());
-  Address G3_amh_conversion(recv_reg, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
-
-  // Create the RicochetFrame.
-  // Unlike on x86 we can store all required information in local
-  // registers.
-  BLOCK_COMMENT("push RicochetFrame {");
-  __ set(ExternalAddress(return_handler),          L1_continuation);
-  __ load_heap_oop(G3_mh_vmtarget,                 L2_saved_target);
-  __ mov(G0,                                       L3_saved_args_layout);
-  __ mov(Gargs,                                    L4_saved_args_base);
-  __ lduw(G3_amh_conversion,                       L5_conversion);  // 32-bit field
-  // I5, I6, I7 are already set up
-  DEBUG_ONLY(__ set((int32_t) MAGIC_NUMBER_1,      L0_magic_number_1));
-  BLOCK_COMMENT("} RicochetFrame");
-}
-
-void MethodHandles::RicochetFrame::leave_ricochet_frame(MacroAssembler* _masm,
-                                                        Register recv_reg,
-                                                        Register new_sp_reg,
-                                                        Register sender_pc_reg) {
-  assert(new_sp_reg == I5_savedSP, "exact_sender_sp already in place");
-  assert(sender_pc_reg == I7, "in a fixed place");
-  // does not include the __ ret() & __ restore()
-  assert_different_registers(recv_reg, new_sp_reg, sender_pc_reg);
-  // Take down the frame.
-  // Cf. InterpreterMacroAssembler::remove_activation.
-  BLOCK_COMMENT("end_ricochet_frame {");
-  if (recv_reg->is_valid())
-    __ mov(L2_saved_target, recv_reg);
-  BLOCK_COMMENT("} end_ricochet_frame");
-}
-
-// Emit code to verify that FP is pointing at a valid ricochet frame.
-#ifndef PRODUCT
-enum {
-  ARG_LIMIT = 255, SLOP = 45,
-  // use this parameter for checking for garbage stack movements:
-  UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
-  // the slop defends against false alarms due to fencepost errors
-};
-#endif
-
-#ifdef ASSERT
-void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
-  // The stack should look like this:
-  //    ... keep1 | dest=42 | keep2 | magic | handler | magic | recursive args | [RF]
-  // Check various invariants.
-
-  Register O7_temp = O7, O5_temp = O5;
-
-  Label L_ok_1, L_ok_2, L_ok_3, L_ok_4;
-  BLOCK_COMMENT("verify_clean {");
-  // Magic numbers must check out:
-  __ set((int32_t) MAGIC_NUMBER_1, O7_temp);
-  __ cmp_and_br_short(O7_temp, L0_magic_number_1, Assembler::equal, Assembler::pt, L_ok_1);
-  __ stop("damaged ricochet frame: MAGIC_NUMBER_1 not found");
-
-  __ BIND(L_ok_1);
-
-  // Arguments pointer must look reasonable:
-#ifdef _LP64
-  Register FP_temp = O5_temp;
-  __ add(FP, STACK_BIAS, FP_temp);
-#else
-  Register FP_temp = FP;
-#endif
-  __ cmp_and_brx_short(L4_saved_args_base, FP_temp, Assembler::greaterEqualUnsigned, Assembler::pt, L_ok_2);
-  __ stop("damaged ricochet frame: L4 < FP");
-
-  __ BIND(L_ok_2);
-  // Disable until we decide on it's fate
-  // __ sub(L4_saved_args_base, UNREASONABLE_STACK_MOVE * Interpreter::stackElementSize, O7_temp);
-  // __ cmp(O7_temp, FP_temp);
-  // __ br(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok_3);
-  // __ delayed()->nop();
-  // __ stop("damaged ricochet frame: (L4 - UNREASONABLE_STACK_MOVE) > FP");
-
-  __ BIND(L_ok_3);
-  extract_conversion_dest_type(_masm, L5_conversion, O7_temp);
-  __ cmp_and_br_short(O7_temp, T_VOID, Assembler::equal, Assembler::pt, L_ok_4);
-  extract_conversion_vminfo(_masm, L5_conversion, O5_temp);
-  __ ld_ptr(L4_saved_args_base, __ argument_offset(O5_temp, O5_temp), O7_temp);
-  assert(Assembler::is_simm13(RETURN_VALUE_PLACEHOLDER), "must be simm13");
-  __ cmp_and_brx_short(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER, Assembler::equal, Assembler::pt, L_ok_4);
-  __ stop("damaged ricochet frame: RETURN_VALUE_PLACEHOLDER not found");
-  __ BIND(L_ok_4);
-  BLOCK_COMMENT("} verify_clean");
-}
-#endif //ASSERT
-
 void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg) {
   if (VerifyMethodHandles)
     verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(), temp_reg, temp2_reg,
-                 "AMH argument is a Class");
+                 "MH argument is a Class");
   __ load_heap_oop(Address(klass_reg, java_lang_Class::klass_offset_in_bytes()), klass_reg);
 }
 
-void MethodHandles::load_conversion_vminfo(MacroAssembler* _masm, Address conversion_field_addr, Register reg) {
-  assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-  assert(CONV_VMINFO_MASK == right_n_bits(BitsPerByte), "else change type of following load");
-  __ ldub(conversion_field_addr.plus_disp(BytesPerInt - 1), reg);
-}
-
-void MethodHandles::extract_conversion_vminfo(MacroAssembler* _masm, Register conversion_field_reg, Register reg) {
-  assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-  __ and3(conversion_field_reg, CONV_VMINFO_MASK, reg);
-}
-
-void MethodHandles::extract_conversion_dest_type(MacroAssembler* _masm, Register conversion_field_reg, Register reg) {
-  __ srl(conversion_field_reg, CONV_DEST_TYPE_SHIFT, reg);
-  __ and3(reg, 0x0F, reg);
-}
-
-void MethodHandles::load_stack_move(MacroAssembler* _masm,
-                                    Address G3_amh_conversion,
-                                    Register stack_move_reg) {
-  BLOCK_COMMENT("load_stack_move {");
-  __ ldsw(G3_amh_conversion, stack_move_reg);
-  __ sra(stack_move_reg, CONV_STACK_MOVE_SHIFT, stack_move_reg);
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    Label L_ok, L_bad;
-    int32_t stack_move_limit = 0x0800;  // extra-large
-    __ cmp_and_br_short(stack_move_reg, stack_move_limit, Assembler::greaterEqual, Assembler::pn, L_bad);
-    __ cmp(stack_move_reg, -stack_move_limit);
-    __ br(Assembler::greater, false, Assembler::pt, L_ok);
-    __ delayed()->nop();
-    __ BIND(L_bad);
-    __ stop("load_stack_move of garbage value");
-    __ BIND(L_ok);
-  }
-#endif
-  BLOCK_COMMENT("} load_stack_move");
-}
-
 #ifdef ASSERT
-void MethodHandles::RicochetFrame::verify() const {
-  assert(magic_number_1() == MAGIC_NUMBER_1, "");
-  if (!Universe::heap()->is_gc_active()) {
-    if (saved_args_layout() != NULL) {
-      assert(saved_args_layout()->is_method(), "must be valid oop");
-    }
-    if (saved_target() != NULL) {
-      assert(java_lang_invoke_MethodHandle::is_instance(saved_target()), "checking frame value");
-    }
-  }
-  int conv_op = adapter_conversion_op(conversion());
-  assert(conv_op == java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS ||
-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS ||
-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF,
-         "must be a sane conversion");
-  if (has_return_value_slot()) {
-    assert(*return_value_slot_addr() == RETURN_VALUE_PLACEHOLDER, "");
-  }
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, err_msg("%s should be nonzero", xname));
+  return x;
 }
-
-void MethodHandles::verify_argslot(MacroAssembler* _masm, Register argslot_reg, Register temp_reg, const char* error_message) {
-  // Verify that argslot lies within (Gargs, FP].
-  Label L_ok, L_bad;
-  BLOCK_COMMENT("verify_argslot {");
-  __ cmp_and_brx_short(Gargs, argslot_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
-  __ add(FP, STACK_BIAS, temp_reg);  // STACK_BIAS is zero on !_LP64
-  __ cmp_and_brx_short(argslot_reg, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
-  __ BIND(L_bad);
-  __ stop(error_message);
-  __ BIND(L_ok);
-  BLOCK_COMMENT("} verify_argslot");
-}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //ASSERT
 
-void MethodHandles::verify_argslots(MacroAssembler* _masm,
-                                    RegisterOrConstant arg_slots,
-                                    Register arg_slot_base_reg,
-                                    Register temp_reg,
-                                    Register temp2_reg,
-                                    bool negate_argslots,
-                                    const char* error_message) {
-  // Verify that [argslot..argslot+size) lies within (Gargs, FP).
-  Label L_ok, L_bad;
-  BLOCK_COMMENT("verify_argslots {");
-  if (negate_argslots) {
-    if (arg_slots.is_constant()) {
-      arg_slots = -1 * arg_slots.as_constant();
-    } else {
-      __ neg(arg_slots.as_register(), temp_reg);
-      arg_slots = temp_reg;
-    }
-  }
-  __ add(arg_slot_base_reg, __ argument_offset(arg_slots, temp_reg), temp_reg);
-  __ add(FP, STACK_BIAS, temp2_reg);  // STACK_BIAS is zero on !_LP64
-  __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
-  // Gargs points to the first word so adjust by BytesPerWord
-  __ add(arg_slot_base_reg, BytesPerWord, temp_reg);
-  __ cmp_and_brx_short(Gargs, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
-  __ BIND(L_bad);
-  __ stop(error_message);
-  __ BIND(L_ok);
-  BLOCK_COMMENT("} verify_argslots");
-}
-
-// Make sure that arg_slots has the same sign as the given direction.
-// If (and only if) arg_slots is a assembly-time constant, also allow it to be zero.
-void MethodHandles::verify_stack_move(MacroAssembler* _masm,
-                                      RegisterOrConstant arg_slots, int direction) {
-  enum { UNREASONABLE_STACK_MOVE = 256 * 4 };  // limit of 255 arguments
-  bool allow_zero = arg_slots.is_constant();
-  if (direction == 0) { direction = +1; allow_zero = true; }
-  assert(stack_move_unit() == -1, "else add extra checks here");
-  if (arg_slots.is_register()) {
-    Label L_ok, L_bad;
-    BLOCK_COMMENT("verify_stack_move {");
-    // __ btst(-stack_move_unit() - 1, arg_slots.as_register());  // no need
-    // __ br(Assembler::notZero, false, Assembler::pn, L_bad);
-    // __ delayed()->nop();
-    __ cmp(arg_slots.as_register(), (int32_t) NULL_WORD);
-    if (direction > 0) {
-      __ br(allow_zero ? Assembler::less : Assembler::lessEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-      __ cmp(arg_slots.as_register(), (int32_t) UNREASONABLE_STACK_MOVE);
-      __ br(Assembler::less, false, Assembler::pn, L_ok);
-      __ delayed()->nop();
-    } else {
-      __ br(allow_zero ? Assembler::greater : Assembler::greaterEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-      __ cmp(arg_slots.as_register(), (int32_t) -UNREASONABLE_STACK_MOVE);
-      __ br(Assembler::greater, false, Assembler::pn, L_ok);
-      __ delayed()->nop();
-    }
-    __ BIND(L_bad);
-    if (direction > 0)
-      __ stop("assert arg_slots > 0");
-    else
-      __ stop("assert arg_slots < 0");
-    __ BIND(L_ok);
-    BLOCK_COMMENT("} verify_stack_move");
-  } else {
-    intptr_t size = arg_slots.as_constant();
-    if (direction < 0)  size = -size;
-    assert(size >= 0, "correct direction of constant move");
-    assert(size < UNREASONABLE_STACK_MOVE, "reasonable size of constant move");
-  }
-}
-
+#ifdef ASSERT
 void MethodHandles::verify_klass(MacroAssembler* _masm,
                                  Register obj_reg, KlassHandle klass,
                                  Register temp_reg, Register temp2_reg,
@@ -485,6 +70,14 @@
   assert(klass_addr >= SystemDictionaryHandles::Object_klass().raw_value() &&
          klass_addr <= SystemDictionaryHandles::Long_klass().raw_value(),
          "must be one of the SystemDictionaryHandles");
+  bool did_save = false;
+  if (temp_reg == noreg || temp2_reg == noreg) {
+    temp_reg = L1;
+    temp2_reg = L2;
+    __ save_frame_and_mov(0, obj_reg, L0);
+    obj_reg = L0;
+    did_save = true;
+  }
   Label L_ok, L_bad;
   BLOCK_COMMENT("verify_klass {");
   __ verify_oop(obj_reg);
@@ -499,537 +92,415 @@
   __ ld_ptr(Address(temp2_reg, 0), temp2_reg);
   __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok);
   __ BIND(L_bad);
-  __ stop(error_message);
+  if (did_save)  __ restore();
+  __ STOP(error_message);
   __ BIND(L_ok);
+  if (did_save)  __ restore();
   BLOCK_COMMENT("} verify_klass");
 }
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
+  Label L;
+  BLOCK_COMMENT("verify_ref_kind {");
+  __ lduw(Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())), temp);
+  __ srl( temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT, temp);
+  __ and3(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK,  temp);
+  __ cmp_and_br_short(temp, ref_kind, Assembler::equal, Assembler::pt, L);
+  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+    if (ref_kind == JVM_REF_invokeVirtual ||
+        ref_kind == JVM_REF_invokeSpecial)
+      // could do this for all ref_kinds, but would explode assembly code size
+      trace_method_handle(_masm, buf);
+    __ STOP(buf);
+  }
+  BLOCK_COMMENT("} verify_ref_kind");
+  __ bind(L);
+}
+
 #endif // ASSERT
 
-
-void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp) {
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp,
+                                            bool for_compiler_entry) {
   assert(method == G5_method, "interpreter calling convention");
   __ verify_oop(method);
-  __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_interpreted_offset()), target);
-  if (JvmtiExport::can_post_interpreter_events()) {
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
     // JVMTI events, such as single-stepping, are implemented partly by avoiding running
     // compiled code in threads for which the event is enabled.  Check here for
     // interp_only_mode if these events CAN be enabled.
     __ verify_thread();
-    Label skip_compiled_code;
-
     const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset());
     __ ld(interp_only, temp);
-    __ tst(temp);
-    __ br(Assembler::notZero, true, Assembler::pn, skip_compiled_code);
-    __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target);
-    __ bind(skip_compiled_code);
+    __ cmp_and_br_short(temp, 0, Assembler::zero, Assembler::pt, run_compiled_code);
+    __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target);
+    __ jmp(target, 0);
+    __ delayed()->nop();
+    __ BIND(run_compiled_code);
+    // Note: we could fill some delay slots here, but
+    // it doesn't matter, since this is interpreter code.
   }
+
+  const ByteSize entry_offset = for_compiler_entry ? methodOopDesc::from_compiled_offset() :
+                                                     methodOopDesc::from_interpreted_offset();
+  __ ld_ptr(G5_method, in_bytes(entry_offset), target);
   __ jmp(target, 0);
   __ delayed()->nop();
 }
 
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2, Register temp3,
+                                        bool for_compiler_entry) {
+  BLOCK_COMMENT("jump_to_lambda_form {");
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2, temp3);
+  assert(method_temp == G5_method, "required register for loading method");
+
+  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(Address(recv,        NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())),       method_temp);
+  __ verify_oop(method_temp);
+  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), method_temp);
+  __ verify_oop(method_temp);
+  // the following assumes that a methodOop is normally compressed in the vmtarget field:
+  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())),     method_temp);
+  __ verify_oop(method_temp);
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // make sure recv is already on stack
+    __ load_sized_value(Address(method_temp, methodOopDesc::size_of_parameters_offset()),
+                        temp2,
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    Label L;
+    __ ld_ptr(__ argument_address(temp2, temp2, -1), temp2);
+    __ cmp_and_br_short(temp2, recv, Assembler::equal, Assembler::pt, L);
+    __ STOP("receiver not on stack");
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, temp3, for_compiler_entry);
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
 
 // Code generation
-address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm) {
-  // I5_savedSP/O5_savedSP: sender SP (must preserve)
-  // G4 (Gargs): incoming argument list (must preserve)
-  // G5_method:  invoke methodOop
-  // G3_method_handle: receiver method handle (must load from sp[MethodTypeForm.vmslots])
-  // O0, O1, O2, O3, O4: garbage temps, blown away
-  Register O0_mtype   = O0;
-  Register O1_scratch = O1;
-  Register O2_scratch = O2;
-  Register O3_scratch = O3;
-  Register O4_argslot = O4;
-  Register O4_argbase = O4;
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  if (iid == vmIntrinsics::_invokeGeneric ||
+      iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    __ should_not_reach_here();           // empty stubs make SG sick
+    return NULL;
+  }
 
-  // emit WrongMethodType path first, to enable back-branch from main path
-  Label wrong_method_type;
-  __ bind(wrong_method_type);
-  Label invoke_generic_slow_path;
-  assert(methodOopDesc::intrinsic_id_size_in_bytes() == sizeof(u1), "");;
-  __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
-  __ cmp(O1_scratch, (int) vmIntrinsics::_invokeExact);
-  __ brx(Assembler::notEqual, false, Assembler::pt, invoke_generic_slow_path);
-  __ delayed()->nop();
-  __ mov(O0_mtype, G5_method_type);  // required by throw_WrongMethodType
-  __ mov(G3_method_handle, G3_method_handle);  // already in this register
-  // O0 will be filled in with JavaThread in stub
-  __ jump_to(AddressLiteral(StubRoutines::throw_WrongMethodTypeException_entry()), O3_scratch);
-  __ delayed()->nop();
+  // I5_savedSP/O5_savedSP: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+  // G5_method:  methodOop
+  // G4 (Gargs): incoming argument list (must preserve)
+  // O0: used as temp to hold mh or receiver
+  // O1, O4: garbage temps, blown away
+  Register O1_scratch    = O1;
+  Register O4_param_size = O4;   // size of parameters
+
+  address code_start = __ pc();
 
   // here's where control starts out:
   __ align(CodeEntryAlignment);
   address entry_point = __ pc();
 
-  // fetch the MethodType from the method handle
-  // FIXME: Interpreter should transmit pre-popped stack pointer, to locate base of arg list.
-  // This would simplify several touchy bits of code.
-  // See 6984712: JSR 292 method handle calls need a clean argument base pointer
-  {
-    Register tem = G5_method;
-    for (jint* pchase = methodOopDesc::method_type_offsets_chain(); (*pchase) != -1; pchase++) {
-      __ ld_ptr(Address(tem, *pchase), O0_mtype);
-      tem = O0_mtype;          // in case there is another indirection
+  if (VerifyMethodHandles) {
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+    __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
+    __ cmp_and_br_short(O1_scratch, (int) iid, Assembler::equal, Assembler::pt, L);
+    if (iid == vmIntrinsics::_linkToVirtual ||
+        iid == vmIntrinsics::_linkToSpecial) {
+      // could do this for all kinds, but would explode assembly code size
+      trace_method_handle(_masm, "bad methodOop::intrinsic_id");
     }
+    __ STOP("bad methodOop::intrinsic_id");
+    __ bind(L);
+    BLOCK_COMMENT("} verify_intrinsic_id");
+  }
+
+  // First task:  Find out how big the argument list is.
+  Address O4_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ load_sized_value(Address(G5_method, methodOopDesc::size_of_parameters_offset()),
+                        O4_param_size,
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    O4_first_arg_addr = __ argument_address(O4_param_size, O4_param_size, -1);
+  } else {
+    DEBUG_ONLY(O4_param_size = noreg);
+  }
+
+  Register O0_mh = noreg;
+  if (!is_signature_polymorphic_static(iid)) {
+    __ ld_ptr(O4_first_arg_addr, O0_mh = O0);
+    DEBUG_ONLY(O4_param_size = noreg);
   }
 
-  // given the MethodType, find out where the MH argument is buried
-  __ load_heap_oop(Address(O0_mtype,   __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,        O1_scratch)), O4_argslot);
-  __ ldsw(         Address(O4_argslot, __ delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, O1_scratch)), O4_argslot);
-  __ add(__ argument_address(O4_argslot, O4_argslot, 1), O4_argbase);
-  // Note: argument_address uses its input as a scratch register!
-  Address mh_receiver_slot_addr(O4_argbase, -Interpreter::stackElementSize);
-  __ ld_ptr(mh_receiver_slot_addr, G3_method_handle);
+  // O4_first_arg_addr is live!
 
-  trace_method_handle(_masm, "invokeExact");
-
-  __ check_method_handle_type(O0_mtype, G3_method_handle, O1_scratch, wrong_method_type);
-
-  // Nobody uses the MH receiver slot after this.  Make sure.
-  DEBUG_ONLY(__ set((int32_t) 0x999999, O1_scratch); __ st_ptr(O1_scratch, mh_receiver_slot_addr));
-
-  __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+  if (TraceMethodHandles) {
+    const char* name = vmIntrinsics::name_at(iid);
+    if (*name == '_')  name += 1;
+    const size_t len = strlen(name) + 50;
+    char* qname = NEW_C_HEAP_ARRAY(char, len, mtInternal);
+    const char* suffix = "";
+    if (vmIntrinsics::method_for(iid) == NULL ||
+        !vmIntrinsics::method_for(iid)->access_flags().is_public()) {
+      if (is_signature_polymorphic_static(iid))
+        suffix = "/static";
+      else
+        suffix = "/private";
+    }
+    jio_snprintf(qname, len, "MethodHandle::interpreter_entry::%s%s", name, suffix);
+    if (O0_mh != noreg)
+      __ mov(O0_mh, G3_method_handle);  // make stub happy
+    trace_method_handle(_masm, qname);
+  }
 
-  // for invokeGeneric (only), apply argument and result conversions on the fly
-  __ bind(invoke_generic_slow_path);
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    Label L;
-    __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
-    __ cmp(O1_scratch, (int) vmIntrinsics::_invokeGeneric);
-    __ brx(Assembler::equal, false, Assembler::pt, L);
-    __ delayed()->nop();
-    __ stop("bad methodOop::intrinsic_id");
-    __ bind(L);
-  }
-#endif //ASSERT
+  if (iid == vmIntrinsics::_invokeBasic) {
+    generate_method_handle_dispatch(_masm, iid, O0_mh, noreg, not_for_compiler_entry);
 
-  // make room on the stack for another pointer:
-  insert_arg_slots(_masm, 2 * stack_move_unit(), O4_argbase, O1_scratch, O2_scratch, O3_scratch);
-  // load up an adapter from the calling type (Java weaves this)
-  Register O2_form    = O2_scratch;
-  Register O3_adapter = O3_scratch;
-  __ load_heap_oop(Address(O0_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,               O1_scratch)), O2_form);
-  __ load_heap_oop(Address(O2_form,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, O1_scratch)), O3_adapter);
-  __ verify_oop(O3_adapter);
-  __ st_ptr(O3_adapter, Address(O4_argbase, 1 * Interpreter::stackElementSize));
-  // As a trusted first argument, pass the type being called, so the adapter knows
-  // the actual types of the arguments and return values.
-  // (Generic invokers are shared among form-families of method-type.)
-  __ st_ptr(O0_mtype,   Address(O4_argbase, 0 * Interpreter::stackElementSize));
-  // FIXME: assert that O3_adapter is of the right method-type.
-  __ mov(O3_adapter, G3_method_handle);
-  trace_method_handle(_masm, "invokeGeneric");
-  __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register O0_recv = noreg;
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+      __ ld_ptr(O4_first_arg_addr, O0_recv = O0);
+      DEBUG_ONLY(O4_param_size = noreg);
+    }
+    Register G5_member = G5_method;  // MemberName ptr; incoming method ptr is dead now
+    __ ld_ptr(__ argument_address(constant(0)), G5_member);
+    __ add(Gargs, Interpreter::stackElementSize, Gargs);
+    generate_method_handle_dispatch(_masm, iid, O0_recv, G5_member, not_for_compiler_entry);
+  }
+
+  if (PrintMethodHandleStubs) {
+    address code_end = __ pc();
+    tty->print_cr("--------");
+    tty->print_cr("method handle interpreter entry for %s", vmIntrinsics::name_at(iid));
+    Disassembler::decode(code_start, code_end);
+    tty->cr();
+  }
 
   return entry_point;
 }
 
-// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
-static RegisterOrConstant constant(int value) {
-  return RegisterOrConstant(value);
-}
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
+  Register temp1 = (for_compiler_entry ? G1_scratch : O1);
+  Register temp2 = (for_compiler_entry ? G4_scratch : O4);
+  Register temp3 = G3_scratch;
+  Register temp4 = (for_compiler_entry ? noreg      : O2);
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : O0), "only valid assignment");
+    assert_different_registers(temp1,      O0, O1, O2, O3, O4, O5);
+    assert_different_registers(temp2,      O0, O1, O2, O3, O4, O5);
+    assert_different_registers(temp3,      O0, O1, O2, O3, O4, O5);
+    assert_different_registers(temp4,      O0, O1, O2, O3, O4, O5);
+  }
+  if (receiver_reg != noreg)  assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg);
+  if (member_reg   != noreg)  assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
+  if (!for_compiler_entry)    assert_different_registers(temp1, temp2, temp3, temp4, O5_savedSP);  // don't trash lastSP
 
-static void load_vmargslot(MacroAssembler* _masm, Address vmargslot_addr, Register result) {
-  __ ldsw(vmargslot_addr, result);
-}
+  if (iid == vmIntrinsics::_invokeBasic) {
+    // indirect through MH.form.vmentry.vmtarget
+    jump_to_lambda_form(_masm, receiver_reg, G5_method, temp2, temp3, for_compiler_entry);
+
+  } else {
+    // The method is a member invoker used by direct method handles.
+    if (VerifyMethodHandles) {
+      // make sure the trailing argument really is a MemberName (caller responsibility)
+      verify_klass(_masm, member_reg, SystemDictionaryHandles::MemberName_klass(),
+                   temp1, temp2,
+                   "MemberName required for invokeVirtual etc.");
+    }
+
+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
 
-static RegisterOrConstant adjust_SP_and_Gargs_down_by_slots(MacroAssembler* _masm,
-                                                            RegisterOrConstant arg_slots,
-                                                            Register temp_reg, Register temp2_reg) {
-  // Keep the stack pointer 2*wordSize aligned.
-  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
-  if (arg_slots.is_constant()) {
-    const int        offset = arg_slots.as_constant() << LogBytesPerWord;
-    const int masked_offset = round_to(offset, 2 * BytesPerWord);
-    const int masked_offset2 = (offset + 1*BytesPerWord) & ~TwoWordAlignmentMask;
-    assert(masked_offset == masked_offset2, "must agree");
-    __ sub(Gargs,        offset, Gargs);
-    __ sub(SP,    masked_offset, SP   );
-    return offset;
-  } else {
-#ifdef ASSERT
+    Register temp1_recv_klass = temp1;
+    if (iid != vmIntrinsics::_linkToStatic) {
+      __ verify_oop(receiver_reg);
+      if (iid == vmIntrinsics::_linkToSpecial) {
+        // Don't actually load the klass; just null-check the receiver.
+        __ null_check(receiver_reg);
+      } else {
+        // load receiver klass itself
+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+        __ load_klass(receiver_reg, temp1_recv_klass);
+        __ verify_oop(temp1_recv_klass);
+      }
+      BLOCK_COMMENT("check_receiver {");
+      // The receiver for the MemberName must be in receiver_reg.
+      // Check the receiver against the MemberName.clazz
+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+        // Did not load it above...
+        __ load_klass(receiver_reg, temp1_recv_klass);
+        __ verify_oop(temp1_recv_klass);
+      }
+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+        Label L_ok;
+        Register temp2_defc = temp2;
+        __ load_heap_oop(member_clazz, temp2_defc);
+        load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
+        __ verify_oop(temp2_defc);
+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok);
+        // If we get here, the type check failed!
+        __ STOP("receiver class disagrees with MemberName.clazz");
+        __ bind(L_ok);
+      }
+      BLOCK_COMMENT("} check_receiver");
+    }
+    if (iid == vmIntrinsics::_linkToSpecial ||
+        iid == vmIntrinsics::_linkToStatic) {
+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
+    }
+
+    // Live registers at this point:
+    //  member_reg - MemberName that was the trailing argument
+    //  temp1_recv_klass - klass of stacked receiver, if needed
+    //  O5_savedSP - interpreter linkage (if interpreted)
+    //  O0..O7,G1,G4 - compiler arguments (if compiled)
+
+    bool method_is_live = false;
+    switch (iid) {
+    case vmIntrinsics::_linkToSpecial:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+      }
+      __ load_heap_oop(member_vmtarget, G5_method);
+      method_is_live = true;
+      break;
+
+    case vmIntrinsics::_linkToStatic:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+      }
+      __ load_heap_oop(member_vmtarget, G5_method);
+      method_is_live = true;
+      break;
+
+    case vmIntrinsics::_linkToVirtual:
     {
-      Label L_ok;
-      __ cmp_and_br_short(arg_slots.as_register(), 0, Assembler::greaterEqual, Assembler::pt, L_ok);
-      __ stop("negative arg_slots");
-      __ bind(L_ok);
+      // same as TemplateTable::invokevirtual,
+      // minus the CP setup and profiling:
+
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+      }
+
+      // pick out the vtable index from the MemberName, and then we can discard it:
+      Register temp2_index = temp2;
+      __ ld_ptr(member_vmindex, temp2_index);
+
+      if (VerifyMethodHandles) {
+        Label L_index_ok;
+        __ cmp_and_br_short(temp2_index, (int) 0, Assembler::greaterEqual, Assembler::pn, L_index_ok);
+        __ STOP("no virtual index");
+        __ BIND(L_index_ok);
+      }
+
+      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
+
+      // get target methodOop & entry point
+      __ lookup_virtual_method(temp1_recv_klass, temp2_index, G5_method);
+      method_is_live = true;
+      break;
     }
-#endif
-    __ sll_ptr(arg_slots.as_register(), LogBytesPerWord, temp_reg);
-    __ add( temp_reg,  1*BytesPerWord,       temp2_reg);
-    __ andn(temp2_reg, TwoWordAlignmentMask, temp2_reg);
-    __ sub(Gargs, temp_reg,  Gargs);
-    __ sub(SP,    temp2_reg, SP   );
-    return temp_reg;
-  }
-}
+
+    case vmIntrinsics::_linkToInterface:
+    {
+      // same as TemplateTable::invokeinterface
+      // (minus the CP setup and profiling, with different argument motion)
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+      }
+
+      Register temp3_intf = temp3;
+      __ load_heap_oop(member_clazz, temp3_intf);
+      load_klass_from_Class(_masm, temp3_intf, temp2, temp4);
+      __ verify_oop(temp3_intf);
 
-static RegisterOrConstant adjust_SP_and_Gargs_up_by_slots(MacroAssembler* _masm,
-                                                          RegisterOrConstant arg_slots,
-                                                          Register temp_reg, Register temp2_reg) {
-  // Keep the stack pointer 2*wordSize aligned.
-  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
-  if (arg_slots.is_constant()) {
-    const int        offset = arg_slots.as_constant() << LogBytesPerWord;
-    const int masked_offset = offset & ~TwoWordAlignmentMask;
-    __ add(Gargs,        offset, Gargs);
-    __ add(SP,    masked_offset, SP   );
-    return offset;
-  } else {
-    __ sll_ptr(arg_slots.as_register(), LogBytesPerWord, temp_reg);
-    __ andn(temp_reg, TwoWordAlignmentMask, temp2_reg);
-    __ add(Gargs, temp_reg,  Gargs);
-    __ add(SP,    temp2_reg, SP   );
-    return temp_reg;
+      Register G5_index = G5_method;
+      __ ld_ptr(member_vmindex, G5_index);
+      if (VerifyMethodHandles) {
+        Label L;
+        __ cmp_and_br_short(G5_index, 0, Assembler::greaterEqual, Assembler::pt, L);
+        __ STOP("invalid vtable index for MH.invokeInterface");
+        __ bind(L);
+      }
+
+      // given intf, index, and recv klass, dispatch to the implementation method
+      Label L_no_such_interface;
+      Register no_sethi_temp = noreg;
+      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                 // note: next two args must be the same:
+                                 G5_index, G5_method,
+                                 temp2, no_sethi_temp,
+                                 L_no_such_interface);
+
+      __ verify_oop(G5_method);
+      jump_from_method_handle(_masm, G5_method, temp2, temp3, for_compiler_entry);
+
+      __ bind(L_no_such_interface);
+      AddressLiteral icce(StubRoutines::throw_IncompatibleClassChangeError_entry());
+      __ jump_to(icce, temp3);
+      __ delayed()->nop();
+      break;
+    }
+
+    default:
+      fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      break;
+    }
+
+    if (method_is_live) {
+      // live at this point:  G5_method, O5_savedSP (if interpreted)
+
+      // After figuring out which concrete method to call, jump into it.
+      // Note that this works in the interpreter with no data motion.
+      // But the compiled version will require that rcx_recv be shifted out.
+      __ verify_oop(G5_method);
+      jump_from_method_handle(_masm, G5_method, temp1, temp3, for_compiler_entry);
+    }
   }
 }
 
-// Helper to insert argument slots into the stack.
-// arg_slots must be a multiple of stack_move_unit() and < 0
-// argslot_reg is decremented to point to the new (shifted) location of the argslot
-// But, temp_reg ends up holding the original value of argslot_reg.
-void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
-                                     RegisterOrConstant arg_slots,
-                                     Register argslot_reg,
-                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
-  // allow constant zero
-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
-    return;
-
-  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
-                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));
-
-  BLOCK_COMMENT("insert_arg_slots {");
-  if (VerifyMethodHandles)
-    verify_argslot(_masm, argslot_reg, temp_reg, "insertion point must fall within current frame");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, arg_slots, -1);
-
-  // Make space on the stack for the inserted argument(s).
-  // Then pull down everything shallower than argslot_reg.
-  // The stacked return address gets pulled down with everything else.
-  // That is, copy [sp, argslot) downward by -size words.  In pseudo-code:
-  //   sp -= size;
-  //   for (temp = sp + size; temp < argslot; temp++)
-  //     temp[-size] = temp[0]
-  //   argslot -= size;
-
-  // offset is temp3_reg in case of arg_slots being a register.
-  RegisterOrConstant offset = adjust_SP_and_Gargs_up_by_slots(_masm, arg_slots, temp3_reg, temp_reg);
-  __ sub(Gargs, offset, temp_reg);  // source pointer for copy
-
-  {
-    Label loop;
-    __ BIND(loop);
-    // pull one word down each time through the loop
-    __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
-    __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
-    __ add(temp_reg, wordSize, temp_reg);
-    __ cmp_and_brx_short(temp_reg, argslot_reg, Assembler::lessUnsigned, Assembler::pt, loop);
-  }
-
-  // Now move the argslot down, to point to the opened-up space.
-  __ add(argslot_reg, offset, argslot_reg);
-  BLOCK_COMMENT("} insert_arg_slots");
-}
-
-
-// Helper to remove argument slots from the stack.
-// arg_slots must be a multiple of stack_move_unit() and > 0
-void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
-                                     RegisterOrConstant arg_slots,
-                                     Register argslot_reg,
-                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
-  // allow constant zero
-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
-    return;
-  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
-                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));
-
-  BLOCK_COMMENT("remove_arg_slots {");
-  if (VerifyMethodHandles)
-    verify_argslots(_masm, arg_slots, argslot_reg, temp_reg, temp2_reg, false,
-                    "deleted argument(s) must fall within current frame");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, arg_slots, +1);
-
-  // Pull up everything shallower than argslot.
-  // Then remove the excess space on the stack.
-  // The stacked return address gets pulled up with everything else.
-  // That is, copy [sp, argslot) upward by size words.  In pseudo-code:
-  //   for (temp = argslot-1; temp >= sp; --temp)
-  //     temp[size] = temp[0]
-  //   argslot += size;
-  //   sp += size;
-
-  RegisterOrConstant offset = __ regcon_sll_ptr(arg_slots, LogBytesPerWord, temp3_reg);
-  __ sub(argslot_reg, wordSize, temp_reg);  // source pointer for copy
-
-  {
-    Label L_loop;
-    __ BIND(L_loop);
-    // pull one word up each time through the loop
-    __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
-    __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
-    __ sub(temp_reg, wordSize, temp_reg);
-    __ cmp_and_brx_short(temp_reg, Gargs, Assembler::greaterEqualUnsigned, Assembler::pt, L_loop);
-  }
-
-  // And adjust the argslot address to point at the deletion point.
-  __ add(argslot_reg, offset, argslot_reg);
-
-  // We don't need the offset at this point anymore, just adjust SP and Gargs.
-  (void) adjust_SP_and_Gargs_up_by_slots(_masm, arg_slots, temp3_reg, temp_reg);
-
-  BLOCK_COMMENT("} remove_arg_slots");
-}
-
-// Helper to copy argument slots to the top of the stack.
-// The sequence starts with argslot_reg and is counted by slot_count
-// slot_count must be a multiple of stack_move_unit() and >= 0
-// This function blows the temps but does not change argslot_reg.
-void MethodHandles::push_arg_slots(MacroAssembler* _masm,
-                                   Register argslot_reg,
-                                   RegisterOrConstant slot_count,
-                                   Register temp_reg, Register temp2_reg) {
-  // allow constant zero
-  if (slot_count.is_constant() && slot_count.as_constant() == 0)
-    return;
-  assert_different_registers(argslot_reg, temp_reg, temp2_reg,
-                             (!slot_count.is_register() ? Gargs : slot_count.as_register()),
-                             SP);
-  assert(Interpreter::stackElementSize == wordSize, "else change this code");
-
-  BLOCK_COMMENT("push_arg_slots {");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, slot_count, 0);
-
-  RegisterOrConstant offset = adjust_SP_and_Gargs_down_by_slots(_masm, slot_count, temp2_reg, temp_reg);
-
-  if (slot_count.is_constant()) {
-    for (int i = slot_count.as_constant() - 1; i >= 0; i--) {
-      __ ld_ptr(          Address(argslot_reg, i * wordSize), temp_reg);
-      __ st_ptr(temp_reg, Address(Gargs,       i * wordSize));
-    }
-  } else {
-    Label L_plural, L_loop, L_break;
-    // Emit code to dynamically check for the common cases, zero and one slot.
-    __ cmp(slot_count.as_register(), (int32_t) 1);
-    __ br(Assembler::greater, false, Assembler::pn, L_plural);
-    __ delayed()->nop();
-    __ br(Assembler::less, false, Assembler::pn, L_break);
-    __ delayed()->nop();
-    __ ld_ptr(          Address(argslot_reg, 0), temp_reg);
-    __ st_ptr(temp_reg, Address(Gargs,       0));
-    __ ba_short(L_break);
-    __ BIND(L_plural);
-
-    // Loop for 2 or more:
-    //   top = &argslot[slot_count]
-    //   while (top > argslot)  *(--Gargs) = *(--top)
-    Register top_reg = temp_reg;
-    __ add(argslot_reg, offset, top_reg);
-    __ add(Gargs,       offset, Gargs  );  // move back up again so we can go down
-    __ BIND(L_loop);
-    __ sub(top_reg, wordSize, top_reg);
-    __ sub(Gargs,   wordSize, Gargs  );
-    __ ld_ptr(           Address(top_reg, 0), temp2_reg);
-    __ st_ptr(temp2_reg, Address(Gargs,   0));
-    __ cmp_and_brx_short(top_reg, argslot_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
-    __ BIND(L_break);
-  }
-  BLOCK_COMMENT("} push_arg_slots");
-}
-
-// in-place movement; no change to Gargs
-// blows temp_reg, temp2_reg
-void MethodHandles::move_arg_slots_up(MacroAssembler* _masm,
-                                      Register bottom_reg,  // invariant
-                                      Address  top_addr,    // can use temp_reg
-                                      RegisterOrConstant positive_distance_in_slots,  // destroyed if register
-                                      Register temp_reg, Register temp2_reg) {
-  assert_different_registers(bottom_reg,
-                             temp_reg, temp2_reg,
-                             positive_distance_in_slots.register_or_noreg());
-  BLOCK_COMMENT("move_arg_slots_up {");
-  Label L_loop, L_break;
-  Register top_reg = temp_reg;
-  if (!top_addr.is_same_address(Address(top_reg, 0))) {
-    __ add(top_addr, top_reg);
-  }
-  // Detect empty (or broken) loop:
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    // Verify that &bottom < &top (non-empty interval)
-    Label L_ok, L_bad;
-    if (positive_distance_in_slots.is_register()) {
-      __ cmp(positive_distance_in_slots.as_register(), (int32_t) 0);
-      __ br(Assembler::lessEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-    }
-    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
-    __ BIND(L_bad);
-    __ stop("valid bounds (copy up)");
-    __ BIND(L_ok);
-  }
-#endif
-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
-  // work top down to bottom, copying contiguous data upwards
-  // In pseudo-code:
-  //   while (--top >= bottom) *(top + distance) = *(top + 0);
-  RegisterOrConstant offset = __ argument_offset(positive_distance_in_slots, positive_distance_in_slots.register_or_noreg());
-  __ BIND(L_loop);
-  __ sub(top_reg, wordSize, top_reg);
-  __ ld_ptr(           Address(top_reg, 0     ), temp2_reg);
-  __ st_ptr(temp2_reg, Address(top_reg, offset)           );
-  __ cmp_and_brx_short(top_reg, bottom_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
-  __ BIND(L_break);
-  BLOCK_COMMENT("} move_arg_slots_up");
-}
-
-// in-place movement; no change to rsp
-// blows temp_reg, temp2_reg
-void MethodHandles::move_arg_slots_down(MacroAssembler* _masm,
-                                        Address  bottom_addr,  // can use temp_reg
-                                        Register top_reg,      // invariant
-                                        RegisterOrConstant negative_distance_in_slots,  // destroyed if register
-                                        Register temp_reg, Register temp2_reg) {
-  assert_different_registers(top_reg,
-                             negative_distance_in_slots.register_or_noreg(),
-                             temp_reg, temp2_reg);
-  BLOCK_COMMENT("move_arg_slots_down {");
-  Label L_loop, L_break;
-  Register bottom_reg = temp_reg;
-  if (!bottom_addr.is_same_address(Address(bottom_reg, 0))) {
-    __ add(bottom_addr, bottom_reg);
-  }
-  // Detect empty (or broken) loop:
-#ifdef ASSERT
-  assert(!negative_distance_in_slots.is_constant() || negative_distance_in_slots.as_constant() < 0, "");
-  if (VerifyMethodHandles) {
-    // Verify that &bottom < &top (non-empty interval)
-    Label L_ok, L_bad;
-    if (negative_distance_in_slots.is_register()) {
-      __ cmp(negative_distance_in_slots.as_register(), (int32_t) 0);
-      __ br(Assembler::greaterEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-    }
-    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
-    __ BIND(L_bad);
-    __ stop("valid bounds (copy down)");
-    __ BIND(L_ok);
-  }
-#endif
-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
-  // work bottom up to top, copying contiguous data downwards
-  // In pseudo-code:
-  //   while (bottom < top) *(bottom - distance) = *(bottom + 0), bottom++;
-  RegisterOrConstant offset = __ argument_offset(negative_distance_in_slots, negative_distance_in_slots.register_or_noreg());
-  __ BIND(L_loop);
-  __ ld_ptr(           Address(bottom_reg, 0     ), temp2_reg);
-  __ st_ptr(temp2_reg, Address(bottom_reg, offset)           );
-  __ add(bottom_reg, wordSize, bottom_reg);
-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_loop);
-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
-  __ BIND(L_break);
-  BLOCK_COMMENT("} move_arg_slots_down");
-}
-
-// Copy from a field or array element to a stacked argument slot.
-// is_element (ignored) says whether caller is loading an array element instead of an instance field.
-void MethodHandles::move_typed_arg(MacroAssembler* _masm,
-                                   BasicType type, bool is_element,
-                                   Address value_src, Address slot_dest,
-                                   Register temp_reg) {
-  assert(!slot_dest.uses(temp_reg), "must be different register");
-  BLOCK_COMMENT(!is_element ? "move_typed_arg {" : "move_typed_arg { (array element)");
-  if (type == T_OBJECT || type == T_ARRAY) {
-    __ load_heap_oop(value_src, temp_reg);
-    __ verify_oop(temp_reg);
-    __ st_ptr(temp_reg, slot_dest);
-  } else if (type != T_VOID) {
-    int  arg_size      = type2aelembytes(type);
-    bool arg_is_signed = is_signed_subword_type(type);
-    int  slot_size     = is_subword_type(type) ? type2aelembytes(T_INT) : arg_size;  // store int sub-words as int
-    __ load_sized_value( value_src, temp_reg, arg_size, arg_is_signed);
-    __ store_sized_value(temp_reg, slot_dest, slot_size              );
-  }
-  BLOCK_COMMENT("} move_typed_arg");
-}
-
-// Cf. TemplateInterpreterGenerator::generate_return_entry_for and
-// InterpreterMacroAssembler::save_return_value
-void MethodHandles::move_return_value(MacroAssembler* _masm, BasicType type,
-                                      Address return_slot) {
-  BLOCK_COMMENT("move_return_value {");
-  // Look at the type and pull the value out of the corresponding register.
-  if (type == T_VOID) {
-    // nothing to do
-  } else if (type == T_OBJECT) {
-    __ verify_oop(O0);
-    __ st_ptr(O0, return_slot);
-  } else if (type == T_INT || is_subword_type(type)) {
-    int type_size = type2aelembytes(T_INT);
-    __ store_sized_value(O0, return_slot, type_size);
-  } else if (type == T_LONG) {
-    // store the value by parts
-    // Note: We assume longs are continguous (if misaligned) on the interpreter stack.
-#if !defined(_LP64) && defined(COMPILER2)
-    __ stx(G1, return_slot);
-#else
-  #ifdef _LP64
-    __ stx(O0, return_slot);
-  #else
-    if (return_slot.has_disp()) {
-      // The displacement is a constant
-      __ st(O0, return_slot);
-      __ st(O1, return_slot.plus_disp(Interpreter::stackElementSize));
-    } else {
-      __ std(O0, return_slot);
-    }
-  #endif
-#endif
-  } else if (type == T_FLOAT) {
-    __ stf(FloatRegisterImpl::S, Ftos_f, return_slot);
-  } else if (type == T_DOUBLE) {
-    __ stf(FloatRegisterImpl::D, Ftos_f, return_slot);
-  } else {
-    ShouldNotReachHere();
-  }
-  BLOCK_COMMENT("} move_return_value");
-}
-
 #ifndef PRODUCT
-void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
-    RicochetFrame* rf = new RicochetFrame(*fr);
-
-    // ricochet slots (kept in registers for sparc)
-    values.describe(frame_no, rf->register_addr(I5_savedSP), err_msg("exact_sender_sp reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L5_conversion), err_msg("conversion reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L4_saved_args_base), err_msg("saved_args_base reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L3_saved_args_layout), err_msg("saved_args_layout reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L2_saved_target), err_msg("saved_target reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L1_continuation), err_msg("continuation reg for #%d", frame_no));
-
-    // relevant ricochet targets (in caller frame)
-    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
-    values.describe(-1, (intptr_t *)(STACK_BIAS+(uintptr_t)rf->exact_sender_sp()),  err_msg("*exact_sender_sp+STACK_BIAS for #%d", frame_no));
-}
-#endif // ASSERT
-
-#ifndef PRODUCT
-extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
                               oopDesc* mh,
                               intptr_t* saved_sp,
                               intptr_t* args,
                               intptr_t* tracing_fp) {
-  bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have mh
-
-  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp, args);
+  bool has_mh = (strstr(adaptername, "/static") == NULL &&
+                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
+  const char* mh_reg_name = has_mh ? "G3_mh" : "G3";
+  tty->print_cr("MH %s %s="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT,
+                adaptername, mh_reg_name,
+                (intptr_t) mh, saved_sp, args);
 
   if (Verbose) {
     // dumping last frame with frame::describe
@@ -1090,6 +561,7 @@
 
     // mark saved_sp, if seems valid (may not be valid for some adapters)
     intptr_t *unbiased_sp = (intptr_t *)(STACK_BIAS+(uintptr_t)saved_sp);
+    const int ARG_LIMIT = 255, SLOP = 45, UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP);
     if ((unbiased_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (unbiased_sp < dump_fp)) {
       values.describe(-1, unbiased_sp, "*saved_sp+STACK_BIAS");
     }
@@ -1097,10 +569,13 @@
     // Note: the unextended_sp may not be correct
     tty->print_cr("  stack layout:");
     values.print(p);
-  }
-
-  if (has_mh) {
-    print_method_handle(mh);
+    if (has_mh && mh->is_oop()) {
+      mh->print();
+      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
+          java_lang_invoke_MethodHandle::form(mh)->print();
+      }
+    }
   }
 }
 
@@ -1143,1260 +618,3 @@
   BLOCK_COMMENT("} trace_method_handle");
 }
 #endif // PRODUCT
-
-// which conversion op types are implemented here?
-int MethodHandles::adapter_conversion_ops_supported_mask() {
-  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
-          // OP_PRIM_TO_REF is below...
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
-          // OP_COLLECT_ARGS is below...
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
-         |(
-           java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
-           ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS)
-           )
-          )
-         );
-}
-
-//------------------------------------------------------------------------------
-// MethodHandles::generate_method_handle_stub
-//
-// Generate an "entry" field for a method handle.
-// This determines how the method handle will respond to calls.
-void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
-  MethodHandles::EntryKind ek_orig = ek_original_kind(ek);
-
-  // Here is the register state during an interpreted call,
-  // as set up by generate_method_handle_interpreter_entry():
-  // - G5: garbage temp (was MethodHandle.invoke methodOop, unused)
-  // - G3: receiver method handle
-  // - O5_savedSP: sender SP (must preserve)
-
-  const Register O0_scratch = O0;
-  const Register O1_scratch = O1;
-  const Register O2_scratch = O2;
-  const Register O3_scratch = O3;
-  const Register O4_scratch = O4;
-  const Register G5_scratch = G5;
-
-  // Often used names:
-  const Register O0_argslot = O0;
-
-  // Argument registers for _raise_exception:
-  const Register O0_code     = O0;
-  const Register O1_actual   = O1;
-  const Register O2_required = O2;
-
-  guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets");
-
-  // Some handy addresses:
-  Address G3_mh_vmtarget(   G3_method_handle, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes());
-
-  Address G3_dmh_vmindex(   G3_method_handle, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes());
-
-  Address G3_bmh_vmargslot( G3_method_handle, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes());
-  Address G3_bmh_argument(  G3_method_handle, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes());
-
-  Address G3_amh_vmargslot( G3_method_handle, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes());
-  Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes());
-  Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
-
-  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
-
-  if (have_entry(ek)) {
-    __ nop();  // empty stubs make SG sick
-    return;
-  }
-
-  address interp_entry = __ pc();
-
-  trace_method_handle(_masm, entry_name(ek));
-
-  BLOCK_COMMENT(err_msg("Entry %s {", entry_name(ek)));
-
-  switch ((int) ek) {
-  case _raise_exception:
-    {
-      // Not a real MH entry, but rather shared code for raising an
-      // exception.  For sharing purposes the arguments are passed into registers
-      // and then placed in the intepreter calling convention here.
-      assert(raise_exception_method(), "must be set");
-      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
-
-      __ set(AddressLiteral((address) &_raise_exception_method), G5_method);
-      __ ld_ptr(Address(G5_method, 0), G5_method);
-
-      const int jobject_oop_offset = 0;
-      __ ld_ptr(Address(G5_method, jobject_oop_offset), G5_method);
-
-      adjust_SP_and_Gargs_down_by_slots(_masm, 3, noreg, noreg);
-
-      __ st    (O0_code,     __ argument_address(constant(2), noreg, 0));
-      __ st_ptr(O1_actual,   __ argument_address(constant(1), noreg, 0));
-      __ st_ptr(O2_required, __ argument_address(constant(0), noreg, 0));
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-    }
-    break;
-
-  case _invokestatic_mh:
-  case _invokespecial_mh:
-    {
-      __ load_heap_oop(G3_mh_vmtarget, G5_method);  // target is a methodOop
-      // Same as TemplateTable::invokestatic or invokespecial,
-      // minus the CP setup and profiling:
-      if (ek == _invokespecial_mh) {
-        // Must load & check the first argument before entering the target method.
-        __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
-        __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
-        __ null_check(G3_method_handle);
-        __ verify_oop(G3_method_handle);
-      }
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-    }
-    break;
-
-  case _invokevirtual_mh:
-    {
-      // Same as TemplateTable::invokevirtual,
-      // minus the CP setup and profiling:
-
-      // Pick out the vtable index and receiver offset from the MH,
-      // and then we can discard it:
-      Register O2_index = O2_scratch;
-      __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
-      __ ldsw(G3_dmh_vmindex, O2_index);
-      // Note:  The verifier allows us to ignore G3_mh_vmtarget.
-      __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
-      __ null_check(G3_method_handle, oopDesc::klass_offset_in_bytes());
-
-      // Get receiver klass:
-      Register O0_klass = O0_argslot;
-      __ load_klass(G3_method_handle, O0_klass);
-      __ verify_oop(O0_klass);
-
-      // Get target methodOop & entry point:
-      const int base = instanceKlass::vtable_start_offset() * wordSize;
-      assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
-
-      __ sll_ptr(O2_index, LogBytesPerWord, O2_index);
-      __ add(O0_klass, O2_index, O0_klass);
-      Address vtable_entry_addr(O0_klass, base + vtableEntry::method_offset_in_bytes());
-      __ ld_ptr(vtable_entry_addr, G5_method);
-
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-    }
-    break;
-
-  case _invokeinterface_mh:
-    {
-      // Same as TemplateTable::invokeinterface,
-      // minus the CP setup and profiling:
-      __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
-      Register O1_intf  = O1_scratch;
-      Register G5_index = G5_scratch;
-      __ load_heap_oop(G3_mh_vmtarget, O1_intf);
-      __ ldsw(G3_dmh_vmindex, G5_index);
-      __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
-      __ null_check(G3_method_handle, oopDesc::klass_offset_in_bytes());
-
-      // Get receiver klass:
-      Register O0_klass = O0_argslot;
-      __ load_klass(G3_method_handle, O0_klass);
-      __ verify_oop(O0_klass);
-
-      // Get interface:
-      Label no_such_interface;
-      __ verify_oop(O1_intf);
-      __ lookup_interface_method(O0_klass, O1_intf,
-                                 // Note: next two args must be the same:
-                                 G5_index, G5_method,
-                                 O2_scratch,
-                                 O3_scratch,
-                                 no_such_interface);
-
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-
-      __ bind(no_such_interface);
-      // Throw an exception.
-      // For historical reasons, it will be IncompatibleClassChangeError.
-      __ unimplemented("not tested yet");
-      __ ld_ptr(Address(O1_intf, java_mirror_offset), O2_required);  // required interface
-      __ mov(   O0_klass,                             O1_actual);    // bad receiver
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_invokeinterface,  O0_code);      // who is complaining?
-    }
-    break;
-
-  case _bound_ref_mh:
-  case _bound_int_mh:
-  case _bound_long_mh:
-  case _bound_ref_direct_mh:
-  case _bound_int_direct_mh:
-  case _bound_long_direct_mh:
-    {
-      const bool direct_to_method = (ek >= _bound_ref_direct_mh);
-      BasicType arg_type  = ek_bound_mh_arg_type(ek);
-      int       arg_slots = type2size[arg_type];
-
-      // Make room for the new argument:
-      load_vmargslot(_masm, G3_bmh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      insert_arg_slots(_masm, arg_slots * stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-
-      // Store bound argument into the new stack slot:
-      __ load_heap_oop(G3_bmh_argument, O1_scratch);
-      if (arg_type == T_OBJECT) {
-        __ st_ptr(O1_scratch, Address(O0_argslot, 0));
-      } else {
-        Address prim_value_addr(O1_scratch, java_lang_boxing_object::value_offset_in_bytes(arg_type));
-        move_typed_arg(_masm, arg_type, false,
-                       prim_value_addr,
-                       Address(O0_argslot, 0),
-                      O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
-      }
-
-      if (direct_to_method) {
-        __ load_heap_oop(G3_mh_vmtarget, G5_method);  // target is a methodOop
-        jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-      } else {
-        __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);  // target is a methodOop
-        __ verify_oop(G3_method_handle);
-        __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-      }
-    }
-    break;
-
-  case _adapter_opt_profiling:
-    if (java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes() != 0) {
-      Address G3_mh_vmcount(G3_method_handle, java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes());
-      __ ld(G3_mh_vmcount, O1_scratch);
-      __ add(O1_scratch, 1, O1_scratch);
-      __ st(O1_scratch, G3_mh_vmcount);
-    }
-    // fall through
-
-  case _adapter_retype_only:
-  case _adapter_retype_raw:
-    // Immediately jump to the next MH layer:
-    __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-    __ verify_oop(G3_method_handle);
-    __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    // This is OK when all parameter types widen.
-    // It is also OK when a return type narrows.
-    break;
-
-  case _adapter_check_cast:
-    {
-      // Check a reference argument before jumping to the next layer of MH:
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      Address vmarg = __ argument_address(O0_argslot, O0_argslot);
-
-      // What class are we casting to?
-      Register O1_klass = O1_scratch;  // Interesting AMH data.
-      __ load_heap_oop(G3_amh_argument, O1_klass);  // This is a Class object!
-      load_klass_from_Class(_masm, O1_klass, O2_scratch, O3_scratch);
-
-      Label L_done;
-      __ ld_ptr(vmarg, O2_scratch);
-      __ br_null_short(O2_scratch, Assembler::pn, L_done);  // No cast if null.
-      __ load_klass(O2_scratch, O2_scratch);
-
-      // Live at this point:
-      // - O0_argslot      :  argslot index in vmarg; may be required in the failing path
-      // - O1_klass        :  klass required by the target method
-      // - O2_scratch      :  argument klass to test
-      // - G3_method_handle:  adapter method handle
-      __ check_klass_subtype(O2_scratch, O1_klass, O3_scratch, O4_scratch, L_done);
-
-      // If we get here, the type check failed!
-      __ load_heap_oop(G3_amh_argument,        O2_required);  // required class
-      __ ld_ptr(       vmarg,                  O1_actual);    // bad object
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_checkcast, O0_code);      // who is complaining?
-
-      __ BIND(L_done);
-      // Get the new MH:
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_prim_to_prim:
-  case _adapter_ref_to_prim:
-    // Handled completely by optimized cases.
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_i2i:        // optimized subcase of adapt_prim_to_prim
-//case _adapter_opt_f2i:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_l2i:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_unboxi:     // optimized subcase of adapt_ref_to_prim
-    {
-      // Perform an in-place conversion to int or an int subword.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      Address value;
-      Address vmarg;
-      bool value_left_justified = false;
-
-      switch (ek) {
-      case _adapter_opt_i2i:
-        value = vmarg = __ argument_address(O0_argslot, O0_argslot);
-        break;
-      case _adapter_opt_l2i:
-        {
-          // just delete the extra slot
-#ifdef _LP64
-          // In V9, longs are given 2 64-bit slots in the interpreter, but the
-          // data is passed in only 1 slot.
-          // Keep the second slot.
-          __ add(__ argument_address(O0_argslot, O0_argslot, -1), O0_argslot);
-          remove_arg_slots(_masm, -stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-          value = Address(O0_argslot, 4);  // Get least-significant 32-bit of 64-bit value.
-          vmarg = Address(O0_argslot, Interpreter::stackElementSize);
-#else
-          // Keep the first slot.
-          __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-          remove_arg_slots(_masm, -stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-          value = Address(O0_argslot, 0);
-          vmarg = value;
-#endif
-        }
-        break;
-      case _adapter_opt_unboxi:
-        {
-          vmarg = __ argument_address(O0_argslot, O0_argslot);
-          // Load the value up from the heap.
-          __ ld_ptr(vmarg, O1_scratch);
-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
-#ifdef ASSERT
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt)))
-              assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), "");
-          }
-#endif
-          __ null_check(O1_scratch, value_offset);
-          value = Address(O1_scratch, value_offset);
-#ifdef _BIG_ENDIAN
-          // Values stored in objects are packed.
-          value_left_justified = true;
-#endif
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-      }
-
-      // This check is required on _BIG_ENDIAN
-      Register G5_vminfo = G5_scratch;
-      __ ldsw(G3_amh_conversion, G5_vminfo);
-      assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-
-      // Original 32-bit vmdata word must be of this form:
-      // | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
-      __ lduw(value, O1_scratch);
-      if (!value_left_justified)
-        __ sll(O1_scratch, G5_vminfo, O1_scratch);
-      Label zero_extend, done;
-      __ btst(CONV_VMINFO_SIGN_FLAG, G5_vminfo);
-      __ br(Assembler::zero, false, Assembler::pn, zero_extend);
-      __ delayed()->nop();
-
-      // this path is taken for int->byte, int->short
-      __ sra(O1_scratch, G5_vminfo, O1_scratch);
-      __ ba_short(done);
-
-      __ bind(zero_extend);
-      // this is taken for int->char
-      __ srl(O1_scratch, G5_vminfo, O1_scratch);
-
-      __ bind(done);
-      __ st(O1_scratch, vmarg);
-
-      // Get the new MH:
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_opt_i2l:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_unboxl:     // optimized subcase of adapt_ref_to_prim
-    {
-      // Perform an in-place int-to-long or ref-to-long conversion.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-
-      // On big-endian machine we duplicate the slot and store the MSW
-      // in the first slot.
-      __ add(__ argument_address(O0_argslot, O0_argslot, 1), O0_argslot);
-
-      insert_arg_slots(_masm, stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-
-      Address arg_lsw(O0_argslot, 0);
-      Address arg_msw(O0_argslot, -Interpreter::stackElementSize);
-
-      switch (ek) {
-      case _adapter_opt_i2l:
-        {
-#ifdef _LP64
-          __ ldsw(arg_lsw, O2_scratch);                 // Load LSW sign-extended
-#else
-          __ ldsw(arg_lsw, O3_scratch);                 // Load LSW sign-extended
-          __ srlx(O3_scratch, BitsPerInt, O2_scratch);  // Move MSW value to lower 32-bits for std
-#endif
-          __ st_long(O2_scratch, arg_msw);              // Uses O2/O3 on !_LP64
-        }
-        break;
-      case _adapter_opt_unboxl:
-        {
-          // Load the value up from the heap.
-          __ ld_ptr(arg_lsw, O1_scratch);
-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG);
-          assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), "");
-          __ null_check(O1_scratch, value_offset);
-          __ ld_long(Address(O1_scratch, value_offset), O2_scratch);  // Uses O2/O3 on !_LP64
-          __ st_long(O2_scratch, arg_msw);
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-      }
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_opt_f2d:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_d2f:        // optimized subcase of adapt_prim_to_prim
-    {
-      // perform an in-place floating primitive conversion
-      __ unimplemented(entry_name(ek));
-    }
-    break;
-
-  case _adapter_prim_to_ref:
-    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-    break;
-
-  case _adapter_swap_args:
-  case _adapter_rot_args:
-    // handled completely by optimized cases
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_swap_1:
-  case _adapter_opt_swap_2:
-  case _adapter_opt_rot_1_up:
-  case _adapter_opt_rot_1_down:
-  case _adapter_opt_rot_2_up:
-  case _adapter_opt_rot_2_down:
-    {
-      int swap_slots = ek_adapter_opt_swap_slots(ek);
-      int rotate     = ek_adapter_opt_swap_mode(ek);
-
-      // 'argslot' is the position of the first argument to swap.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-      if (VerifyMethodHandles)
-        verify_argslot(_masm, O0_argslot, O2_scratch, "swap point must fall within current frame");
-
-      // 'vminfo' is the second.
-      Register O1_destslot = O1_scratch;
-      load_conversion_vminfo(_masm, G3_amh_conversion, O1_destslot);
-      __ add(__ argument_address(O1_destslot, O1_destslot), O1_destslot);
-      if (VerifyMethodHandles)
-        verify_argslot(_masm, O1_destslot, O2_scratch, "swap point must fall within current frame");
-
-      assert(Interpreter::stackElementSize == wordSize, "else rethink use of wordSize here");
-      if (!rotate) {
-        // simple swap
-        for (int i = 0; i < swap_slots; i++) {
-          __ ld_ptr(            Address(O0_argslot,  i * wordSize), O2_scratch);
-          __ ld_ptr(            Address(O1_destslot, i * wordSize), O3_scratch);
-          __ st_ptr(O3_scratch, Address(O0_argslot,  i * wordSize));
-          __ st_ptr(O2_scratch, Address(O1_destslot, i * wordSize));
-        }
-      } else {
-        // A rotate is actually pair of moves, with an "odd slot" (or pair)
-        // changing place with a series of other slots.
-        // First, push the "odd slot", which is going to get overwritten
-        switch (swap_slots) {
-        case 2 :  __ ld_ptr(Address(O0_argslot, 1 * wordSize), O4_scratch); // fall-thru
-        case 1 :  __ ld_ptr(Address(O0_argslot, 0 * wordSize), O3_scratch); break;
-        default:  ShouldNotReachHere();
-        }
-        if (rotate > 0) {
-          // Here is rotate > 0:
-          // (low mem)                                          (high mem)
-          //     | dest:     more_slots...     | arg: odd_slot :arg+1 |
-          // =>
-          //     | dest: odd_slot | dest+1: more_slots...      :arg+1 |
-          // work argslot down to destslot, copying contiguous data upwards
-          // pseudo-code:
-          //   argslot  = src_addr - swap_bytes
-          //   destslot = dest_addr
-          //   while (argslot >= destslot) *(argslot + swap_bytes) = *(argslot + 0), argslot--;
-          move_arg_slots_up(_masm,
-                            O1_destslot,
-                            Address(O0_argslot, 0),
-                            swap_slots,
-                            O0_argslot, O2_scratch);
-        } else {
-          // Here is the other direction, rotate < 0:
-          // (low mem)                                          (high mem)
-          //     | arg: odd_slot | arg+1: more_slots...       :dest+1 |
-          // =>
-          //     | arg:    more_slots...     | dest: odd_slot :dest+1 |
-          // work argslot up to destslot, copying contiguous data downwards
-          // pseudo-code:
-          //   argslot  = src_addr + swap_bytes
-          //   destslot = dest_addr
-          //   while (argslot <= destslot) *(argslot - swap_bytes) = *(argslot + 0), argslot++;
-          // dest_slot denotes an exclusive upper limit
-          int limit_bias = OP_ROT_ARGS_DOWN_LIMIT_BIAS;
-          if (limit_bias != 0)
-            __ add(O1_destslot, - limit_bias * wordSize, O1_destslot);
-          move_arg_slots_down(_masm,
-                              Address(O0_argslot, swap_slots * wordSize),
-                              O1_destslot,
-                              -swap_slots,
-                              O0_argslot, O2_scratch);
-
-          __ sub(O1_destslot, swap_slots * wordSize, O1_destslot);
-        }
-        // pop the original first chunk into the destination slot, now free
-        switch (swap_slots) {
-        case 2 :  __ st_ptr(O4_scratch, Address(O1_destslot, 1 * wordSize)); // fall-thru
-        case 1 :  __ st_ptr(O3_scratch, Address(O1_destslot, 0 * wordSize)); break;
-        default:  ShouldNotReachHere();
-        }
-      }
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_dup_args:
-    {
-      // 'argslot' is the position of the first argument to duplicate.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      // 'stack_move' is negative number of words to duplicate.
-      Register O1_stack_move = O1_scratch;
-      load_stack_move(_masm, G3_amh_conversion, O1_stack_move);
-
-      if (VerifyMethodHandles) {
-        verify_argslots(_masm, O1_stack_move, O0_argslot, O2_scratch, O3_scratch, true,
-                        "copied argument(s) must fall within current frame");
-      }
-
-      // insert location is always the bottom of the argument list:
-      __ neg(O1_stack_move);
-      push_arg_slots(_masm, O0_argslot, O1_stack_move, O2_scratch, O3_scratch);
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_drop_args:
-    {
-      // 'argslot' is the position of the first argument to nuke.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      // 'stack_move' is number of words to drop.
-      Register O1_stack_move = O1_scratch;
-      load_stack_move(_masm, G3_amh_conversion, O1_stack_move);
-
-      remove_arg_slots(_masm, O1_stack_move, O0_argslot, O2_scratch, O3_scratch, O4_scratch);
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_collect_args:
-  case _adapter_fold_args:
-  case _adapter_spread_args:
-    // Handled completely by optimized cases.
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_collect_ref:
-  case _adapter_opt_collect_int:
-  case _adapter_opt_collect_long:
-  case _adapter_opt_collect_float:
-  case _adapter_opt_collect_double:
-  case _adapter_opt_collect_void:
-  case _adapter_opt_collect_0_ref:
-  case _adapter_opt_collect_1_ref:
-  case _adapter_opt_collect_2_ref:
-  case _adapter_opt_collect_3_ref:
-  case _adapter_opt_collect_4_ref:
-  case _adapter_opt_collect_5_ref:
-  case _adapter_opt_filter_S0_ref:
-  case _adapter_opt_filter_S1_ref:
-  case _adapter_opt_filter_S2_ref:
-  case _adapter_opt_filter_S3_ref:
-  case _adapter_opt_filter_S4_ref:
-  case _adapter_opt_filter_S5_ref:
-  case _adapter_opt_collect_2_S0_ref:
-  case _adapter_opt_collect_2_S1_ref:
-  case _adapter_opt_collect_2_S2_ref:
-  case _adapter_opt_collect_2_S3_ref:
-  case _adapter_opt_collect_2_S4_ref:
-  case _adapter_opt_collect_2_S5_ref:
-  case _adapter_opt_fold_ref:
-  case _adapter_opt_fold_int:
-  case _adapter_opt_fold_long:
-  case _adapter_opt_fold_float:
-  case _adapter_opt_fold_double:
-  case _adapter_opt_fold_void:
-  case _adapter_opt_fold_1_ref:
-  case _adapter_opt_fold_2_ref:
-  case _adapter_opt_fold_3_ref:
-  case _adapter_opt_fold_4_ref:
-  case _adapter_opt_fold_5_ref:
-    {
-      // Given a fresh incoming stack frame, build a new ricochet frame.
-      // On entry, TOS points at a return PC, and FP is the callers frame ptr.
-      // RSI/R13 has the caller's exact stack pointer, which we must also preserve.
-      // RCX contains an AdapterMethodHandle of the indicated kind.
-
-      // Relevant AMH fields:
-      // amh.vmargslot:
-      //   points to the trailing edge of the arguments
-      //   to filter, collect, or fold.  For a boxing operation,
-      //   it points just after the single primitive value.
-      // amh.argument:
-      //   recursively called MH, on |collect| arguments
-      // amh.vmtarget:
-      //   final destination MH, on return value, etc.
-      // amh.conversion.dest:
-      //   tells what is the type of the return value
-      //   (not needed here, since dest is also derived from ek)
-      // amh.conversion.vminfo:
-      //   points to the trailing edge of the return value
-      //   when the vmtarget is to be called; this is
-      //   equal to vmargslot + (retained ? |collect| : 0)
-
-      // Pass 0 or more argument slots to the recursive target.
-      int collect_count_constant = ek_adapter_opt_collect_count(ek);
-
-      // The collected arguments are copied from the saved argument list:
-      int collect_slot_constant = ek_adapter_opt_collect_slot(ek);
-
-      assert(ek_orig == _adapter_collect_args ||
-             ek_orig == _adapter_fold_args, "");
-      bool retain_original_args = (ek_orig == _adapter_fold_args);
-
-      // The return value is replaced (or inserted) at the 'vminfo' argslot.
-      // Sometimes we can compute this statically.
-      int dest_slot_constant = -1;
-      if (!retain_original_args)
-        dest_slot_constant = collect_slot_constant;
-      else if (collect_slot_constant >= 0 && collect_count_constant >= 0)
-        // We are preserving all the arguments, and the return value is prepended,
-        // so the return slot is to the left (above) the |collect| sequence.
-        dest_slot_constant = collect_slot_constant + collect_count_constant;
-
-      // Replace all those slots by the result of the recursive call.
-      // The result type can be one of ref, int, long, float, double, void.
-      // In the case of void, nothing is pushed on the stack after return.
-      BasicType dest = ek_adapter_opt_collect_type(ek);
-      assert(dest == type2wfield[dest], "dest is a stack slot type");
-      int dest_count = type2size[dest];
-      assert(dest_count == 1 || dest_count == 2 || (dest_count == 0 && dest == T_VOID), "dest has a size");
-
-      // Choose a return continuation.
-      EntryKind ek_ret = _adapter_opt_return_any;
-      if (dest != T_CONFLICT && OptimizeMethodHandles) {
-        switch (dest) {
-        case T_INT    : ek_ret = _adapter_opt_return_int;     break;
-        case T_LONG   : ek_ret = _adapter_opt_return_long;    break;
-        case T_FLOAT  : ek_ret = _adapter_opt_return_float;   break;
-        case T_DOUBLE : ek_ret = _adapter_opt_return_double;  break;
-        case T_OBJECT : ek_ret = _adapter_opt_return_ref;     break;
-        case T_VOID   : ek_ret = _adapter_opt_return_void;    break;
-        default       : ShouldNotReachHere();
-        }
-        if (dest == T_OBJECT && dest_slot_constant >= 0) {
-          EntryKind ek_try = EntryKind(_adapter_opt_return_S0_ref + dest_slot_constant);
-          if (ek_try <= _adapter_opt_return_LAST &&
-              ek_adapter_opt_return_slot(ek_try) == dest_slot_constant) {
-            ek_ret = ek_try;
-          }
-        }
-        assert(ek_adapter_opt_return_type(ek_ret) == dest, "");
-      }
-
-      // Already pushed:  ... keep1 | collect | keep2 |
-
-      // Push a few extra argument words, if we need them to store the return value.
-      {
-        int extra_slots = 0;
-        if (retain_original_args) {
-          extra_slots = dest_count;
-        } else if (collect_count_constant == -1) {
-          extra_slots = dest_count;  // collect_count might be zero; be generous
-        } else if (dest_count > collect_count_constant) {
-          extra_slots = (dest_count - collect_count_constant);
-        } else {
-          // else we know we have enough dead space in |collect| to repurpose for return values
-        }
-        if (extra_slots != 0) {
-          __ sub(SP, round_to(extra_slots, 2) * Interpreter::stackElementSize, SP);
-        }
-      }
-
-      // Set up Ricochet Frame.
-      __ mov(SP, O5_savedSP);  // record SP for the callee
-
-      // One extra (empty) slot for outgoing target MH (see Gargs computation below).
-      __ save_frame(2);  // Note: we need to add 2 slots since frame::memory_parameter_word_sp_offset is 23.
-
-      // Note: Gargs is live throughout the following, until we make our recursive call.
-      // And the RF saves a copy in L4_saved_args_base.
-
-      RicochetFrame::enter_ricochet_frame(_masm, G3_method_handle, Gargs,
-                                          entry(ek_ret)->from_interpreted_entry());
-
-      // Compute argument base:
-      // Set up Gargs for current frame, extra (empty) slot is for outgoing target MH (space reserved by save_frame above).
-      __ add(FP, STACK_BIAS - (1 * Interpreter::stackElementSize), Gargs);
-
-      // Now pushed:  ... keep1 | collect | keep2 | extra | [RF]
-
-#ifdef ASSERT
-      if (VerifyMethodHandles && dest != T_CONFLICT) {
-        BLOCK_COMMENT("verify AMH.conv.dest {");
-        extract_conversion_dest_type(_masm, RicochetFrame::L5_conversion, O1_scratch);
-        Label L_dest_ok;
-        __ cmp(O1_scratch, (int) dest);
-        __ br(Assembler::equal, false, Assembler::pt, L_dest_ok);
-        __ delayed()->nop();
-        if (dest == T_INT) {
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt))) {
-              __ cmp(O1_scratch, (int) bt);
-              __ br(Assembler::equal, false, Assembler::pt, L_dest_ok);
-              __ delayed()->nop();
-            }
-          }
-        }
-        __ stop("bad dest in AMH.conv");
-        __ BIND(L_dest_ok);
-        BLOCK_COMMENT("} verify AMH.conv.dest");
-      }
-#endif //ASSERT
-
-      // Find out where the original copy of the recursive argument sequence begins.
-      Register O0_coll = O0_scratch;
-      {
-        RegisterOrConstant collect_slot = collect_slot_constant;
-        if (collect_slot_constant == -1) {
-          load_vmargslot(_masm, G3_amh_vmargslot, O1_scratch);
-          collect_slot = O1_scratch;
-        }
-        // collect_slot might be 0, but we need the move anyway.
-        __ add(RicochetFrame::L4_saved_args_base, __ argument_offset(collect_slot, collect_slot.register_or_noreg()), O0_coll);
-        // O0_coll now points at the trailing edge of |collect| and leading edge of |keep2|
-      }
-
-      // Replace the old AMH with the recursive MH.  (No going back now.)
-      // In the case of a boxing call, the recursive call is to a 'boxer' method,
-      // such as Integer.valueOf or Long.valueOf.  In the case of a filter
-      // or collect call, it will take one or more arguments, transform them,
-      // and return some result, to store back into argument_base[vminfo].
-      __ load_heap_oop(G3_amh_argument, G3_method_handle);
-      if (VerifyMethodHandles)  verify_method_handle(_masm, G3_method_handle, O1_scratch, O2_scratch);
-
-      // Calculate |collect|, the number of arguments we are collecting.
-      Register O1_collect_count = O1_scratch;
-      RegisterOrConstant collect_count;
-      if (collect_count_constant < 0) {
-        __ load_method_handle_vmslots(O1_collect_count, G3_method_handle, O2_scratch);
-        collect_count = O1_collect_count;
-      } else {
-        collect_count = collect_count_constant;
-#ifdef ASSERT
-        if (VerifyMethodHandles) {
-          BLOCK_COMMENT("verify collect_count_constant {");
-          __ load_method_handle_vmslots(O3_scratch, G3_method_handle, O2_scratch);
-          Label L_count_ok;
-          __ cmp_and_br_short(O3_scratch, collect_count_constant, Assembler::equal, Assembler::pt, L_count_ok);
-          __ stop("bad vminfo in AMH.conv");
-          __ BIND(L_count_ok);
-          BLOCK_COMMENT("} verify collect_count_constant");
-        }
-#endif //ASSERT
-      }
-
-      // copy |collect| slots directly to TOS:
-      push_arg_slots(_masm, O0_coll, collect_count, O2_scratch, O3_scratch);
-      // Now pushed:  ... keep1 | collect | keep2 | RF... | collect |
-      // O0_coll still points at the trailing edge of |collect| and leading edge of |keep2|
-
-      // If necessary, adjust the saved arguments to make room for the eventual return value.
-      // Normal adjustment:  ... keep1 | +dest+ | -collect- | keep2 | RF... | collect |
-      // If retaining args:  ... keep1 | +dest+ |  collect  | keep2 | RF... | collect |
-      // In the non-retaining case, this might move keep2 either up or down.
-      // We don't have to copy the whole | RF... collect | complex,
-      // but we must adjust RF.saved_args_base.
-      // Also, from now on, we will forget about the original copy of |collect|.
-      // If we are retaining it, we will treat it as part of |keep2|.
-      // For clarity we will define |keep3| = |collect|keep2| or |keep2|.
-
-      BLOCK_COMMENT("adjust trailing arguments {");
-      // Compare the sizes of |+dest+| and |-collect-|, which are opposed opening and closing movements.
-      int                open_count  = dest_count;
-      RegisterOrConstant close_count = collect_count_constant;
-      Register O1_close_count = O1_collect_count;
-      if (retain_original_args) {
-        close_count = constant(0);
-      } else if (collect_count_constant == -1) {
-        close_count = O1_collect_count;
-      }
-
-      // How many slots need moving?  This is simply dest_slot (0 => no |keep3|).
-      RegisterOrConstant keep3_count;
-      Register O2_keep3_count = O2_scratch;
-      if (dest_slot_constant < 0) {
-        extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O2_keep3_count);
-        keep3_count = O2_keep3_count;
-      } else  {
-        keep3_count = dest_slot_constant;
-#ifdef ASSERT
-        if (VerifyMethodHandles && dest_slot_constant < 0) {
-          BLOCK_COMMENT("verify dest_slot_constant {");
-          extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O3_scratch);
-          Label L_vminfo_ok;
-          __ cmp_and_br_short(O3_scratch, dest_slot_constant, Assembler::equal, Assembler::pt, L_vminfo_ok);
-          __ stop("bad vminfo in AMH.conv");
-          __ BIND(L_vminfo_ok);
-          BLOCK_COMMENT("} verify dest_slot_constant");
-        }
-#endif //ASSERT
-      }
-
-      // tasks remaining:
-      bool move_keep3 = (!keep3_count.is_constant() || keep3_count.as_constant() != 0);
-      bool stomp_dest = (NOT_DEBUG(dest == T_OBJECT) DEBUG_ONLY(dest_count != 0));
-      bool fix_arg_base = (!close_count.is_constant() || open_count != close_count.as_constant());
-
-      // Old and new argument locations (based at slot 0).
-      // Net shift (&new_argv - &old_argv) is (close_count - open_count).
-      bool zero_open_count = (open_count == 0);  // remember this bit of info
-      if (move_keep3 && fix_arg_base) {
-        // It will be easier to have everything in one register:
-        if (close_count.is_register()) {
-          // Deduct open_count from close_count register to get a clean +/- value.
-          __ sub(close_count.as_register(), open_count, close_count.as_register());
-        } else {
-          close_count = close_count.as_constant() - open_count;
-        }
-        open_count = 0;
-      }
-      Register L4_old_argv = RicochetFrame::L4_saved_args_base;
-      Register O3_new_argv = O3_scratch;
-      if (fix_arg_base) {
-        __ add(L4_old_argv, __ argument_offset(close_count, O4_scratch), O3_new_argv,
-               -(open_count * Interpreter::stackElementSize));
-      }
-
-      // First decide if any actual data are to be moved.
-      // We can skip if (a) |keep3| is empty, or (b) the argument list size didn't change.
-      // (As it happens, all movements involve an argument list size change.)
-
-      // If there are variable parameters, use dynamic checks to skip around the whole mess.
-      Label L_done;
-      if (keep3_count.is_register()) {
-        __ cmp_and_br_short(keep3_count.as_register(), 0, Assembler::equal, Assembler::pn, L_done);
-      }
-      if (close_count.is_register()) {
-        __ cmp_and_br_short(close_count.as_register(), open_count, Assembler::equal, Assembler::pn, L_done);
-      }
-
-      if (move_keep3 && fix_arg_base) {
-        bool emit_move_down = false, emit_move_up = false, emit_guard = false;
-        if (!close_count.is_constant()) {
-          emit_move_down = emit_guard = !zero_open_count;
-          emit_move_up   = true;
-        } else if (open_count != close_count.as_constant()) {
-          emit_move_down = (open_count > close_count.as_constant());
-          emit_move_up   = !emit_move_down;
-        }
-        Label L_move_up;
-        if (emit_guard) {
-          __ cmp(close_count.as_register(), open_count);
-          __ br(Assembler::greater, false, Assembler::pn, L_move_up);
-          __ delayed()->nop();
-        }
-
-        if (emit_move_down) {
-          // Move arguments down if |+dest+| > |-collect-|
-          // (This is rare, except when arguments are retained.)
-          // This opens space for the return value.
-          if (keep3_count.is_constant()) {
-            for (int i = 0; i < keep3_count.as_constant(); i++) {
-              __ ld_ptr(            Address(L4_old_argv, i * Interpreter::stackElementSize), O4_scratch);
-              __ st_ptr(O4_scratch, Address(O3_new_argv, i * Interpreter::stackElementSize)            );
-            }
-          } else {
-            // Live: O1_close_count, O2_keep3_count, O3_new_argv
-            Register argv_top = O0_scratch;
-            __ add(L4_old_argv, __ argument_offset(keep3_count, O4_scratch), argv_top);
-            move_arg_slots_down(_masm,
-                                Address(L4_old_argv, 0),  // beginning of old argv
-                                argv_top,                 // end of old argv
-                                close_count,              // distance to move down (must be negative)
-                                O4_scratch, G5_scratch);
-          }
-        }
-
-        if (emit_guard) {
-          __ ba_short(L_done);  // assumes emit_move_up is true also
-          __ BIND(L_move_up);
-        }
-
-        if (emit_move_up) {
-          // Move arguments up if |+dest+| < |-collect-|
-          // (This is usual, except when |keep3| is empty.)
-          // This closes up the space occupied by the now-deleted collect values.
-          if (keep3_count.is_constant()) {
-            for (int i = keep3_count.as_constant() - 1; i >= 0; i--) {
-              __ ld_ptr(            Address(L4_old_argv, i * Interpreter::stackElementSize), O4_scratch);
-              __ st_ptr(O4_scratch, Address(O3_new_argv, i * Interpreter::stackElementSize)            );
-            }
-          } else {
-            Address argv_top(L4_old_argv, __ argument_offset(keep3_count, O4_scratch));
-            // Live: O1_close_count, O2_keep3_count, O3_new_argv
-            move_arg_slots_up(_masm,
-                              L4_old_argv,  // beginning of old argv
-                              argv_top,     // end of old argv
-                              close_count,  // distance to move up (must be positive)
-                              O4_scratch, G5_scratch);
-          }
-        }
-      }
-      __ BIND(L_done);
-
-      if (fix_arg_base) {
-        // adjust RF.saved_args_base
-        __ mov(O3_new_argv, RicochetFrame::L4_saved_args_base);
-      }
-
-      if (stomp_dest) {
-        // Stomp the return slot, so it doesn't hold garbage.
-        // This isn't strictly necessary, but it may help detect bugs.
-        __ set(RicochetFrame::RETURN_VALUE_PLACEHOLDER, O4_scratch);
-        __ st_ptr(O4_scratch, Address(RicochetFrame::L4_saved_args_base,
-                                      __ argument_offset(keep3_count, keep3_count.register_or_noreg())));  // uses O2_keep3_count
-      }
-      BLOCK_COMMENT("} adjust trailing arguments");
-
-      BLOCK_COMMENT("do_recursive_call");
-      __ mov(SP, O5_savedSP);  // record SP for the callee
-      __ set(ExternalAddress(SharedRuntime::ricochet_blob()->bounce_addr() - frame::pc_return_offset), O7);
-      // The globally unique bounce address has two purposes:
-      // 1. It helps the JVM recognize this frame (frame::is_ricochet_frame).
-      // 2. When returned to, it cuts back the stack and redirects control flow
-      //    to the return handler.
-      // The return handler will further cut back the stack when it takes
-      // down the RF.  Perhaps there is a way to streamline this further.
-
-      // State during recursive call:
-      // ... keep1 | dest | dest=42 | keep3 | RF... | collect | bounce_pc |
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_opt_return_ref:
-  case _adapter_opt_return_int:
-  case _adapter_opt_return_long:
-  case _adapter_opt_return_float:
-  case _adapter_opt_return_double:
-  case _adapter_opt_return_void:
-  case _adapter_opt_return_S0_ref:
-  case _adapter_opt_return_S1_ref:
-  case _adapter_opt_return_S2_ref:
-  case _adapter_opt_return_S3_ref:
-  case _adapter_opt_return_S4_ref:
-  case _adapter_opt_return_S5_ref:
-    {
-      BasicType dest_type_constant = ek_adapter_opt_return_type(ek);
-      int       dest_slot_constant = ek_adapter_opt_return_slot(ek);
-
-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-
-      if (dest_slot_constant == -1) {
-        // The current stub is a general handler for this dest_type.
-        // It can be called from _adapter_opt_return_any below.
-        // Stash the address in a little table.
-        assert((dest_type_constant & CONV_TYPE_MASK) == dest_type_constant, "oob");
-        address return_handler = __ pc();
-        _adapter_return_handlers[dest_type_constant] = return_handler;
-        if (dest_type_constant == T_INT) {
-          // do the subword types too
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt)) &&
-                _adapter_return_handlers[bt] == NULL) {
-              _adapter_return_handlers[bt] = return_handler;
-            }
-          }
-        }
-      }
-
-      // On entry to this continuation handler, make Gargs live again.
-      __ mov(RicochetFrame::L4_saved_args_base, Gargs);
-
-      Register O7_temp   = O7;
-      Register O5_vminfo = O5;
-
-      RegisterOrConstant dest_slot = dest_slot_constant;
-      if (dest_slot_constant == -1) {
-        extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O5_vminfo);
-        dest_slot = O5_vminfo;
-      }
-      // Store the result back into the argslot.
-      // This code uses the interpreter calling sequence, in which the return value
-      // is usually left in the TOS register, as defined by InterpreterMacroAssembler::pop.
-      // There are certain irregularities with floating point values, which can be seen
-      // in TemplateInterpreterGenerator::generate_return_entry_for.
-      move_return_value(_masm, dest_type_constant, __ argument_address(dest_slot, O7_temp));
-
-      RicochetFrame::leave_ricochet_frame(_masm, G3_method_handle, I5_savedSP, I7);
-
-      // Load the final target and go.
-      if (VerifyMethodHandles)  verify_method_handle(_masm, G3_method_handle, O0_scratch, O1_scratch);
-      __ restore(I5_savedSP, G0, SP);
-      __ jump_to_method_handle_entry(G3_method_handle, O0_scratch);
-      __ illtrap(0);
-    }
-    break;
-
-  case _adapter_opt_return_any:
-    {
-      Register O7_temp      = O7;
-      Register O5_dest_type = O5;
-
-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-      extract_conversion_dest_type(_masm, RicochetFrame::L5_conversion, O5_dest_type);
-      __ set(ExternalAddress((address) &_adapter_return_handlers[0]), O7_temp);
-      __ sll_ptr(O5_dest_type, LogBytesPerWord, O5_dest_type);
-      __ ld_ptr(O7_temp, O5_dest_type, O7_temp);
-
-#ifdef ASSERT
-      { Label L_ok;
-        __ br_notnull_short(O7_temp, Assembler::pt, L_ok);
-        __ stop("bad method handle return");
-        __ BIND(L_ok);
-      }
-#endif //ASSERT
-      __ JMP(O7_temp, 0);
-      __ delayed()->nop();
-    }
-    break;
-
-  case _adapter_opt_spread_0:
-  case _adapter_opt_spread_1_ref:
-  case _adapter_opt_spread_2_ref:
-  case _adapter_opt_spread_3_ref:
-  case _adapter_opt_spread_4_ref:
-  case _adapter_opt_spread_5_ref:
-  case _adapter_opt_spread_ref:
-  case _adapter_opt_spread_byte:
-  case _adapter_opt_spread_char:
-  case _adapter_opt_spread_short:
-  case _adapter_opt_spread_int:
-  case _adapter_opt_spread_long:
-  case _adapter_opt_spread_float:
-  case _adapter_opt_spread_double:
-    {
-      // spread an array out into a group of arguments
-      int  length_constant    = ek_adapter_opt_spread_count(ek);
-      bool length_can_be_zero = (length_constant == 0);
-      if (length_constant < 0) {
-        // some adapters with variable length must handle the zero case
-        if (!OptimizeMethodHandles ||
-            ek_adapter_opt_spread_type(ek) != T_OBJECT)
-          length_can_be_zero = true;
-      }
-
-      // find the address of the array argument
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      // O0_argslot points both to the array and to the first output arg
-      Address vmarg = Address(O0_argslot, 0);
-
-      // Get the array value.
-      Register  O1_array       = O1_scratch;
-      Register  O2_array_klass = O2_scratch;
-      BasicType elem_type      = ek_adapter_opt_spread_type(ek);
-      int       elem_slots     = type2size[elem_type];  // 1 or 2
-      int       array_slots    = 1;  // array is always a T_OBJECT
-      int       length_offset  = arrayOopDesc::length_offset_in_bytes();
-      int       elem0_offset   = arrayOopDesc::base_offset_in_bytes(elem_type);
-      __ ld_ptr(vmarg, O1_array);
-
-      Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
-      if (length_can_be_zero) {
-        // handle the null pointer case, if zero is allowed
-        Label L_skip;
-        if (length_constant < 0) {
-          load_conversion_vminfo(_masm, G3_amh_conversion, O3_scratch);
-          __ cmp_zero_and_br(Assembler::notZero, O3_scratch, L_skip);
-          __ delayed()->nop(); // to avoid back-to-back cbcond instructions
-        }
-        __ br_null_short(O1_array, Assembler::pn, L_array_is_empty);
-        __ BIND(L_skip);
-      }
-      __ null_check(O1_array, oopDesc::klass_offset_in_bytes());
-      __ load_klass(O1_array, O2_array_klass);
-
-      // Check the array type.
-      Register O3_klass = O3_scratch;
-      __ load_heap_oop(G3_amh_argument, O3_klass);  // this is a Class object!
-      load_klass_from_Class(_masm, O3_klass, O4_scratch, G5_scratch);
-
-      Label L_ok_array_klass, L_bad_array_klass, L_bad_array_length;
-      __ check_klass_subtype(O2_array_klass, O3_klass, O4_scratch, G5_scratch, L_ok_array_klass);
-      // If we get here, the type check failed!
-      __ ba_short(L_bad_array_klass);
-      __ BIND(L_ok_array_klass);
-
-      // Check length.
-      if (length_constant >= 0) {
-        __ ldsw(Address(O1_array, length_offset), O4_scratch);
-        __ cmp(O4_scratch, length_constant);
-      } else {
-        Register O3_vminfo = O3_scratch;
-        load_conversion_vminfo(_masm, G3_amh_conversion, O3_vminfo);
-        __ ldsw(Address(O1_array, length_offset), O4_scratch);
-        __ cmp(O3_vminfo, O4_scratch);
-      }
-      __ br(Assembler::notEqual, false, Assembler::pn, L_bad_array_length);
-      __ delayed()->nop();
-
-      Register O2_argslot_limit = O2_scratch;
-
-      // Array length checks out.  Now insert any required stack slots.
-      if (length_constant == -1) {
-        // Form a pointer to the end of the affected region.
-        __ add(O0_argslot, Interpreter::stackElementSize, O2_argslot_limit);
-        // 'stack_move' is negative number of words to insert
-        // This number already accounts for elem_slots.
-        Register O3_stack_move = O3_scratch;
-        load_stack_move(_masm, G3_amh_conversion, O3_stack_move);
-        __ cmp(O3_stack_move, 0);
-        assert(stack_move_unit() < 0, "else change this comparison");
-        __ br(Assembler::less, false, Assembler::pn, L_insert_arg_space);
-        __ delayed()->nop();
-        __ br(Assembler::equal, false, Assembler::pn, L_copy_args);
-        __ delayed()->nop();
-        // single argument case, with no array movement
-        __ BIND(L_array_is_empty);
-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
-                         O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-        __ ba_short(L_args_done);  // no spreading to do
-        __ BIND(L_insert_arg_space);
-        // come here in the usual case, stack_move < 0 (2 or more spread arguments)
-        // Live: O1_array, O2_argslot_limit, O3_stack_move
-        insert_arg_slots(_masm, O3_stack_move,
-                         O0_argslot, O4_scratch, G5_scratch, O1_scratch);
-        // reload from rdx_argslot_limit since rax_argslot is now decremented
-        __ ld_ptr(Address(O2_argslot_limit, -Interpreter::stackElementSize), O1_array);
-      } else if (length_constant >= 1) {
-        int new_slots = (length_constant * elem_slots) - array_slots;
-        insert_arg_slots(_masm, new_slots * stack_move_unit(),
-                         O0_argslot, O2_scratch, O3_scratch, O4_scratch);
-      } else if (length_constant == 0) {
-        __ BIND(L_array_is_empty);
-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
-                         O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-      } else {
-        ShouldNotReachHere();
-      }
-
-      // Copy from the array to the new slots.
-      // Note: Stack change code preserves integrity of O0_argslot pointer.
-      // So even after slot insertions, O0_argslot still points to first argument.
-      // Beware:  Arguments that are shallow on the stack are deep in the array,
-      // and vice versa.  So a downward-growing stack (the usual) has to be copied
-      // elementwise in reverse order from the source array.
-      __ BIND(L_copy_args);
-      if (length_constant == -1) {
-        // [O0_argslot, O2_argslot_limit) is the area we are inserting into.
-        // Array element [0] goes at O0_argslot_limit[-wordSize].
-        Register O1_source = O1_array;
-        __ add(Address(O1_array, elem0_offset), O1_source);
-        Register O4_fill_ptr = O4_scratch;
-        __ mov(O2_argslot_limit, O4_fill_ptr);
-        Label L_loop;
-        __ BIND(L_loop);
-        __ add(O4_fill_ptr, -Interpreter::stackElementSize * elem_slots, O4_fill_ptr);
-        move_typed_arg(_masm, elem_type, true,
-                       Address(O1_source, 0), Address(O4_fill_ptr, 0),
-                       O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
-        __ add(O1_source, type2aelembytes(elem_type), O1_source);
-        __ cmp_and_brx_short(O4_fill_ptr, O0_argslot, Assembler::greaterUnsigned, Assembler::pt, L_loop);
-      } else if (length_constant == 0) {
-        // nothing to copy
-      } else {
-        int elem_offset = elem0_offset;
-        int slot_offset = length_constant * Interpreter::stackElementSize;
-        for (int index = 0; index < length_constant; index++) {
-          slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
-          move_typed_arg(_masm, elem_type, true,
-                         Address(O1_array, elem_offset), Address(O0_argslot, slot_offset),
-                         O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
-          elem_offset += type2aelembytes(elem_type);
-        }
-      }
-      __ BIND(L_args_done);
-
-      // Arguments are spread.  Move to next method handle.
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-
-      __ BIND(L_bad_array_klass);
-      assert(!vmarg.uses(O2_required), "must be different registers");
-      __ load_heap_oop(Address(O2_array_klass, java_mirror_offset), O2_required);  // required class
-      __ ld_ptr(       vmarg,                                       O1_actual);    // bad object
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_aaload,                         O0_code);      // who is complaining?
-
-      __ bind(L_bad_array_length);
-      assert(!vmarg.uses(O2_required), "must be different registers");
-      __ mov(   G3_method_handle,                O2_required);  // required class
-      __ ld_ptr(vmarg,                           O1_actual);    // bad object
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_arraylength, O0_code);      // who is complaining?
-    }
-    break;
-
-  default:
-    DEBUG_ONLY(tty->print_cr("bad ek=%d (%s)", (int)ek, entry_name(ek)));
-    ShouldNotReachHere();
-  }
-  BLOCK_COMMENT(err_msg("} Entry %s", entry_name(ek)));
-
-  address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry);
-  __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-
-  init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie));
-}
--- a/src/cpu/sparc/vm/methodHandles_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/methodHandles_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,186 +30,9 @@
   adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
 };
 
-public:
-
-class RicochetFrame : public ResourceObj {
-  friend class MethodHandles;
-
- private:
-  /*
-    RF field            x86                 SPARC
-    sender_pc           *(rsp+0)            I7-0x8
-    sender_link         rbp                 I6+BIAS
-    exact_sender_sp     rsi/r13             I5_savedSP
-    conversion          *(rcx+&amh_conv)    L5_conv
-    saved_args_base     rax                 L4_sab (cf. Gargs = G4)
-    saved_args_layout   #NULL               L3_sal
-    saved_target        *(rcx+&mh_vmtgt)    L2_stgt
-    continuation        #STUB_CON           L1_cont
-   */
-  static const Register L1_continuation     ;  // what to do when control gets back here
-  static const Register L2_saved_target     ;  // target method handle to invoke on saved_args
-  static const Register L3_saved_args_layout;  // caching point for MethodTypeForm.vmlayout cookie
-  static const Register L4_saved_args_base  ;  // base of pushed arguments (slot 0, arg N) (-3)
-  static const Register L5_conversion       ;  // misc. information from original AdapterMethodHandle (-2)
-
-  frame _fr;
-
-  RicochetFrame(const frame& fr) : _fr(fr) { }
-
-  intptr_t* register_addr(Register reg) const  {
-    assert((_fr.sp() + reg->sp_offset_in_saved_window()) == _fr.register_addr(reg), "must agree");
-    return _fr.register_addr(reg);
-  }
-  intptr_t  register_value(Register reg) const { return *register_addr(reg); }
-
- public:
-  intptr_t* continuation() const        { return (intptr_t*) register_value(L1_continuation); }
-  oop       saved_target() const        { return (oop)       register_value(L2_saved_target); }
-  oop       saved_args_layout() const   { return (oop)       register_value(L3_saved_args_layout); }
-  intptr_t* saved_args_base() const     { return (intptr_t*) register_value(L4_saved_args_base); }
-  intptr_t  conversion() const          { return             register_value(L5_conversion); }
-  intptr_t* exact_sender_sp() const     { return (intptr_t*) register_value(I5_savedSP); }
-  intptr_t* sender_link() const         { return _fr.sender_sp(); }  // XXX
-  address   sender_pc() const           { return _fr.sender_pc(); }
-
-  // This value is not used for much, but it apparently must be nonzero.
-  static int frame_size_in_bytes()              { return wordSize * 4; }
-
-  intptr_t* extended_sender_sp() const  { return saved_args_base(); }
-
-  intptr_t  return_value_slot_number() const {
-    return adapter_conversion_vminfo(conversion());
-  }
-  BasicType return_value_type() const {
-    return adapter_conversion_dest_type(conversion());
-  }
-  bool has_return_value_slot() const {
-    return return_value_type() != T_VOID;
-  }
-  intptr_t* return_value_slot_addr() const {
-    assert(has_return_value_slot(), "");
-    return saved_arg_slot_addr(return_value_slot_number());
-  }
-  intptr_t* saved_target_slot_addr() const {
-    return saved_arg_slot_addr(saved_args_length());
-  }
-  intptr_t* saved_arg_slot_addr(int slot) const {
-    assert(slot >= 0, "");
-    return (intptr_t*)( (address)saved_args_base() + (slot * Interpreter::stackElementSize) );
-  }
-
-  jint      saved_args_length() const;
-  jint      saved_arg_offset(int arg) const;
-
-  // GC interface
-  oop*  saved_target_addr()                     { return (oop*)register_addr(L2_saved_target); }
-  oop*  saved_args_layout_addr()                { return (oop*)register_addr(L3_saved_args_layout); }
-
-  oop  compute_saved_args_layout(bool read_cache, bool write_cache);
-
-#ifdef ASSERT
-  // The magic number is supposed to help find ricochet frames within the bytes of stack dumps.
-  enum { MAGIC_NUMBER_1 = 0xFEED03E, MAGIC_NUMBER_2 = 0xBEEF03E };
-  static const Register L0_magic_number_1   ;  // cookie for debugging, at start of RSA
-  static Address magic_number_2_addr()  { return Address(L4_saved_args_base, -wordSize); }
-  intptr_t magic_number_1() const       { return register_value(L0_magic_number_1); }
-  intptr_t magic_number_2() const       { return saved_args_base()[-1]; }
-#endif //ASSERT
-
- public:
-  enum { RETURN_VALUE_PLACEHOLDER = (NOT_DEBUG(0) DEBUG_ONLY(42)) };
-
-  void verify() const NOT_DEBUG_RETURN; // check for MAGIC_NUMBER, etc.
-
-  static void generate_ricochet_blob(MacroAssembler* _masm,
-                                     // output params:
-                                     int* bounce_offset,
-                                     int* exception_offset,
-                                     int* frame_size_in_words);
-
-  static void enter_ricochet_frame(MacroAssembler* _masm,
-                                   Register recv_reg,
-                                   Register argv_reg,
-                                   address return_handler);
-
-  static void leave_ricochet_frame(MacroAssembler* _masm,
-                                   Register recv_reg,
-                                   Register new_sp_reg,
-                                   Register sender_pc_reg);
-
-  static RicochetFrame* from_frame(const frame& fr) {
-    RicochetFrame* rf = new RicochetFrame(fr);
-    rf->verify();
-    return rf;
-  }
-
-  static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
-
-  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
-};
-
 // Additional helper methods for MethodHandles code generation:
 public:
   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg);
-  static void load_conversion_vminfo(MacroAssembler* _masm, Address conversion_field_addr, Register reg);
-  static void extract_conversion_vminfo(MacroAssembler* _masm, Register conversion_field_reg, Register reg);
-  static void extract_conversion_dest_type(MacroAssembler* _masm, Register conversion_field_reg, Register reg);
-
-  static void load_stack_move(MacroAssembler* _masm,
-                              Address G3_amh_conversion,
-                              Register G5_stack_move);
-
-  static void insert_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               Register argslot_reg,
-                               Register temp_reg, Register temp2_reg, Register temp3_reg);
-
-  static void remove_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               Register argslot_reg,
-                               Register temp_reg, Register temp2_reg, Register temp3_reg);
-
-  static void push_arg_slots(MacroAssembler* _masm,
-                             Register argslot_reg,
-                             RegisterOrConstant slot_count,
-                             Register temp_reg, Register temp2_reg);
-
-  static void move_arg_slots_up(MacroAssembler* _masm,
-                                Register bottom_reg,  // invariant
-                                Address  top_addr,    // can use temp_reg
-                                RegisterOrConstant positive_distance_in_slots,
-                                Register temp_reg, Register temp2_reg);
-
-  static void move_arg_slots_down(MacroAssembler* _masm,
-                                  Address  bottom_addr,  // can use temp_reg
-                                  Register top_reg,      // invariant
-                                  RegisterOrConstant negative_distance_in_slots,
-                                  Register temp_reg, Register temp2_reg);
-
-  static void move_typed_arg(MacroAssembler* _masm,
-                             BasicType type, bool is_element,
-                             Address value_src, Address slot_dest,
-                             Register temp_reg);
-
-  static void move_return_value(MacroAssembler* _masm, BasicType type,
-                                Address return_slot);
-
-  static void verify_argslot(MacroAssembler* _masm, Register argslot_reg,
-                             Register temp_reg,
-                             const char* error_message) NOT_DEBUG_RETURN;
-
-  static void verify_argslots(MacroAssembler* _masm,
-                              RegisterOrConstant argslot_count,
-                              Register argslot_reg,
-                              Register temp_reg,
-                              Register temp2_reg,
-                              bool negate_argslot,
-                              const char* error_message) NOT_DEBUG_RETURN;
-
-  static void verify_stack_move(MacroAssembler* _masm,
-                                RegisterOrConstant arg_slots,
-                                int direction) NOT_DEBUG_RETURN;
 
   static void verify_klass(MacroAssembler* _masm,
                            Register obj_reg, KlassHandle klass,
@@ -223,8 +46,17 @@
                  "reference is a MH");
   }
 
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
   // Similar to InterpreterMacroAssembler::jump_from_interpreted.
   // Takes care of special dispatch from single stepping too.
-  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, Register temp2);
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method,
+                                      Register temp, Register temp2,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2, Register temp3,
+                                  bool for_compiler_entry);
 
   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -400,13 +400,13 @@
     case T_LONG:                // LP64, longs compete with int args
       assert(sig_bt[i+1] == T_VOID, "");
 #ifdef _LP64
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
 #endif
       break;
     case T_OBJECT:
     case T_ARRAY:
     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
 #ifndef _LP64
       else                            stk_reg_pairs++;
 #endif
@@ -416,11 +416,11 @@
     case T_CHAR:
     case T_BYTE:
     case T_BOOLEAN:
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
       else                            stk_reg_pairs++;
       break;
     case T_FLOAT:
-      if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
+      if (flt_reg_cnt < flt_reg_max)  flt_reg_cnt++;
       else                            stk_reg_pairs++;
       break;
     case T_DOUBLE:
@@ -436,7 +436,6 @@
   // This is where the longs/doubles start on the stack.
   stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
 
-  int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
   int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
 
   // int stk_reg = frame::register_save_words*(wordSize>>2);
@@ -517,24 +516,15 @@
           stk_reg_pairs += 2;
         }
 #else // COMPILER2
-        if (int_reg_pairs + 1 < int_reg_max) {
-          if (is_outgoing) {
-            regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
-          } else {
-            regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
-          }
-          int_reg_pairs += 2;
-        } else {
           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
           stk_reg_pairs += 2;
-        }
 #endif // COMPILER2
 #endif // _LP64
       break;
 
     case T_FLOAT:
       if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
-      else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));
+      else                       regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
       break;
     case T_DOUBLE:
       assert(sig_bt[i+1] == T_VOID, "expecting half");
@@ -886,6 +876,20 @@
   __ delayed()->add(SP, G1, Gargs);
 }
 
+static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg,
+                        address code_start, address code_end,
+                        Label& L_ok) {
+  Label L_fail;
+  __ set(ExternalAddress(code_start), temp_reg);
+  __ set(pointer_delta(code_end, code_start, 1), temp2_reg);
+  __ cmp(pc_reg, temp_reg);
+  __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail);
+  __ delayed()->add(temp_reg, temp2_reg, temp_reg);
+  __ cmp(pc_reg, temp_reg);
+  __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
+  __ bind(L_fail);
+}
+
 void AdapterGenerator::gen_i2c_adapter(
                             int total_args_passed,
                             // VMReg max_arg,
@@ -907,6 +911,51 @@
   // This removes all sorts of headaches on the x86 side and also eliminates
   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
 
+  // More detail:
+  // Adapters can be frameless because they do not require the caller
+  // to perform additional cleanup work, such as correcting the stack pointer.
+  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
+  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
+  // even if a callee has modified the stack pointer.
+  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
+  // routinely repairs its caller's stack pointer (from sender_sp, which is set
+  // up via the senderSP register).
+  // In other words, if *either* the caller or callee is interpreted, we can
+  // get the stack pointer repaired after a call.
+  // This is why c2i and i2c adapters cannot be indefinitely composed.
+  // In particular, if a c2i adapter were to somehow call an i2c adapter,
+  // both caller and callee would be compiled methods, and neither would
+  // clean up the stack pointer changes performed by the two adapters.
+  // If this happens, control eventually transfers back to the compiled
+  // caller, but with an uncorrected stack, causing delayed havoc.
+
+  if (VerifyAdapterCalls &&
+      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
+    // So, let's test for cascading c2i/i2c adapters right now.
+    //  assert(Interpreter::contains($return_addr) ||
+    //         StubRoutines::contains($return_addr),
+    //         "i2c adapter must return to an interpreter frame");
+    __ block_comment("verify_i2c { ");
+    Label L_ok;
+    if (Interpreter::code() != NULL)
+      range_check(masm, O7, O0, O1,
+                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
+                  L_ok);
+    if (StubRoutines::code1() != NULL)
+      range_check(masm, O7, O0, O1,
+                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
+                  L_ok);
+    if (StubRoutines::code2() != NULL)
+      range_check(masm, O7, O0, O1,
+                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
+                  L_ok);
+    const char* msg = "i2c adapter must return to an interpreter frame";
+    __ block_comment(msg);
+    __ stop(msg);
+    __ bind(L_ok);
+    __ block_comment("} verify_i2ce ");
+  }
+
   // As you can see from the list of inputs & outputs there are not a lot
   // of temp registers to work with: mostly G1, G3 & G4.
 
@@ -1937,20 +1986,156 @@
   __ bind(done);
 }
 
+static void verify_oop_args(MacroAssembler* masm,
+                            int total_args_passed,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  Register temp_reg = G5_method;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < total_args_passed; i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
+          ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg);
+          __ ld_ptr(SP, ld_off, temp_reg);
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 int total_args_passed,
+                                 int comp_args_on_stack,
+                                 vmIntrinsics::ID special_dispatch,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, total_args_passed, sig_bt, regs);
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
+  if (ref_kind != 0) {
+    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
+    member_reg = G5_method;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    fatal(err_msg("special_dispatch=%d", special_dispatch));
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
+    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
+    VMReg r = regs[member_arg_pos].first();
+    assert(r->is_valid(), "bad member arg");
+    if (r->is_stack()) {
+      RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
+      ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
+      __ ld_ptr(SP, ld_off, member_reg);
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(total_args_passed > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      assert(false, "receiver always in a register");
+      receiver_reg = G3_scratch;  // known to be free at this point
+      RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
+      ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
+      __ ld_ptr(SP, ld_off, receiver_reg);
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
 // convention (handlizes oops, etc), transitions to native, makes the call,
 // returns to java state (possibly blocking), unhandlizes any result and
 // returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GC_locker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GC_locker::needs_gc())
+//      SharedRuntime::block_for_jni_critical();
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                 methodHandle method,
                                                 int compile_id,
                                                 int total_in_args,
                                                 int comp_args_on_stack, // in VMRegStackSlots
-                                                BasicType *in_sig_bt,
-                                                VMRegPair *in_regs,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
                                                 BasicType ret_type) {
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+    gen_special_dispatch(masm,
+                         total_in_args,
+                         comp_args_on_stack,
+                         method->intrinsic_id(),
+                         in_sig_bt,
+                         in_regs);
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
   bool is_critical_native = true;
   address native_func = method->critical_native_function();
   if (native_func == NULL) {
@@ -3325,7 +3510,7 @@
   // make sure that the frames are aligned properly
 #ifndef _LP64
   __ btst(wordSize*2-1, SP);
-  __ breakpoint_trap(Assembler::notZero);
+  __ breakpoint_trap(Assembler::notZero, Assembler::ptr_cc);
 #endif
   #endif
 
@@ -3407,7 +3592,7 @@
 #ifdef ASSERT
   // make sure that there is at least one entry in the array
   __ tst(O4array_size);
-  __ breakpoint_trap(Assembler::zero);
+  __ breakpoint_trap(Assembler::zero, Assembler::icc);
 #endif
 
   // Now push the new interpreter frames
--- a/src/cpu/sparc/vm/sparc.ad	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -678,18 +678,26 @@
 
 static inline jdouble replicate_immI(int con, int count, int width) {
   // Load a constant replicated "count" times with width "width"
+  assert(count*width == 8 && width <= 4, "sanity");
   int bit_width = width * 8;
-  jlong elt_val = con;
-  elt_val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
-  jlong val = elt_val;
+  jlong val = con;
+  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
   for (int i = 0; i < count - 1; i++) {
-    val <<= bit_width;
-    val |= elt_val;
+    val |= (val << bit_width);
   }
   jdouble dval = *((jdouble*) &val);  // coerce to double type
   return dval;
 }
 
+static inline jdouble replicate_immF(float con) {
+  // Replicate float con 2 times and pack into vector.
+  int val = *((int*)&con);
+  jlong lval = val;
+  lval = (lval << 32) | (lval & 0xFFFFFFFFl);
+  jdouble dval = *((jdouble*) &lval);  // coerce to double type
+  return dval;
+}
+
 // Standard Sparc opcode form2 field breakdown
 static inline void emit2_19(CodeBuffer &cbuf, int f30, int f29, int f25, int f22, int f20, int f19, int f0 ) {
   f0 &= (1<<19)-1;     // Mask displacement to 19 bits
@@ -791,6 +799,7 @@
     case Assembler::stdf_op3: st_op = Op_StoreD; break;
 
     case Assembler::ldsb_op3: ld_op = Op_LoadB; break;
+    case Assembler::ldub_op3: ld_op = Op_LoadUB; break;
     case Assembler::lduh_op3: ld_op = Op_LoadUS; break;
     case Assembler::ldsh_op3: ld_op = Op_LoadS; break;
     case Assembler::ldx_op3:  // may become LoadP or stay LoadI
@@ -799,7 +808,6 @@
     case Assembler::ldd_op3:  ld_op = Op_LoadL; break;
     case Assembler::ldf_op3:  ld_op = Op_LoadF; break;
     case Assembler::lddf_op3: ld_op = Op_LoadD; break;
-    case Assembler::ldub_op3: ld_op = Op_LoadB; break;
     case Assembler::prefetch_op3: ld_op = Op_LoadI; break;
 
     default: ShouldNotReachHere();
@@ -827,7 +835,6 @@
       // a Load
       // inputs are (0:control, 1:memory, 2:address)
       if (!(n->ideal_Opcode()==ld_op)       && // Following are special cases
-          !(n->ideal_Opcode()==Op_LoadLLocked && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_LoadPLocked && ld_op==Op_LoadP) &&
           !(n->ideal_Opcode()==Op_LoadI     && ld_op==Op_LoadF) &&
           !(n->ideal_Opcode()==Op_LoadF     && ld_op==Op_LoadI) &&
@@ -841,10 +848,7 @@
           !(n->ideal_Opcode()==Op_PrefetchRead  && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) &&
-          !(n->ideal_Opcode()==Op_Load2I    && ld_op==Op_LoadD) &&
-          !(n->ideal_Opcode()==Op_Load4C    && ld_op==Op_LoadD) &&
-          !(n->ideal_Opcode()==Op_Load4S    && ld_op==Op_LoadD) &&
-          !(n->ideal_Opcode()==Op_Load8B    && ld_op==Op_LoadD) &&
+          !(n->ideal_Opcode()==Op_LoadVector && ld_op==Op_LoadD) &&
           !(n->rule() == loadUB_rule)) {
         verify_oops_warning(n, n->ideal_Opcode(), ld_op);
       }
@@ -856,9 +860,7 @@
           !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) &&
           !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) &&
           !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) &&
-          !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) &&
-          !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) &&
-          !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) &&
+          !(n->ideal_Opcode()==Op_StoreVector && st_op==Op_StoreD) &&
           !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) {
         verify_oops_warning(n, n->ideal_Opcode(), st_op);
       }
@@ -1832,6 +1834,8 @@
   case Op_CountLeadingZerosL:
   case Op_CountTrailingZerosI:
   case Op_CountTrailingZerosL:
+  case Op_PopCountI:
+  case Op_PopCountL:
     if (!UsePopCountInstruction)
       return false;
     break;
@@ -1848,16 +1852,45 @@
 address last_rethrow = NULL;  // debugging aid for Rethrow encoding
 #endif
 
+// Map Types to machine register types
+const int Matcher::base2reg[Type::lastype] = {
+  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
+  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+  0, Op_RegD, 0, 0, /* Vectors */
+  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+  0, 0/*abio*/,
+  Op_RegP /* Return address */, 0, /* the memories */
+  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+  0  /*bottom*/
+};
+
 // Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  assert(MaxVectorSize == 8, "");
   return 8;
 }
 
 // Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
+const int Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize == 8, "");
   return Op_RegD;
 }
 
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+  return max_vector_size(bt); // Same as max.
+}
+
+// SPARC doesn't support misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  return false;
+}
+
 // USII supports fxtof through the whole range of number, USIII doesn't
 const bool Matcher::convL2FSupported(void) {
   return VM_Version::has_fast_fxtof();
@@ -3124,50 +3157,6 @@
     __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
   %}
 
-  enc_class enc_repl8b( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sllx(src_reg, 56, dst_reg);
-    __ srlx(dst_reg,  8, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-    __ srlx(dst_reg, 16, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-    __ srlx(dst_reg, 32, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-  %}
-
-  enc_class enc_repl4b( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sll(src_reg, 24, dst_reg);
-    __ srl(dst_reg,  8, O7);
-    __ or3(dst_reg, O7, dst_reg);
-    __ srl(dst_reg, 16, O7);
-    __ or3(dst_reg, O7, dst_reg);
-  %}
-
-  enc_class enc_repl4s( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sllx(src_reg, 48, dst_reg);
-    __ srlx(dst_reg, 16, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-    __ srlx(dst_reg, 32, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-  %}
-
-  enc_class enc_repl2i( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sllx(src_reg, 32, dst_reg);
-    __ srlx(dst_reg, 32, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-  %}
-
 %}
 
 //----------FRAME--------------------------------------------------------------
@@ -5931,50 +5920,6 @@
   ins_pipe(iload_mem);
 %}
 
-// Load Aligned Packed Byte into a Double Register
-instruct loadA8B(regD dst, memory mem) %{
-  match(Set dst (Load8B mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed8B" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Char into a Double Register
-instruct loadA4C(regD dst, memory mem) %{
-  match(Set dst (Load4C mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed4C" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Short into a Double Register
-instruct loadA4S(regD dst, memory mem) %{
-  match(Set dst (Load4S mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed4S" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Int into a Double Register
-instruct loadA2I(regD dst, memory mem) %{
-  match(Set dst (Load2I mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed2I" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
 // Load Range
 instruct loadRange(iRegI dst, memory mem) %{
   match(Set dst (LoadRange mem));
@@ -6598,17 +6543,6 @@
   ins_pipe(fstoreF_mem_zero);
 %}
 
-// Store Aligned Packed Bytes in Double register to memory
-instruct storeA8B(memory mem, regD src) %{
-  match(Set mem (Store8B mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STDF   $src,$mem\t! packed8B" %}
-  opcode(Assembler::stdf_op3);
-  ins_encode(simple_form3_mem_reg( mem, src ) );
-  ins_pipe(fstoreD_mem_reg);
-%}
-
 // Convert oop pointer into compressed form
 instruct encodeHeapOop(iRegN dst, iRegP src) %{
   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
@@ -6653,62 +6587,6 @@
 %}
 
 
-// Store Zero into Aligned Packed Bytes
-instruct storeA8B0(memory mem, immI0 zero) %{
-  match(Set mem (Store8B mem zero));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STX    $zero,$mem\t! packed8B" %}
-  opcode(Assembler::stx_op3);
-  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
-  ins_pipe(fstoreD_mem_zero);
-%}
-
-// Store Aligned Packed Chars/Shorts in Double register to memory
-instruct storeA4C(memory mem, regD src) %{
-  match(Set mem (Store4C mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STDF   $src,$mem\t! packed4C" %}
-  opcode(Assembler::stdf_op3);
-  ins_encode(simple_form3_mem_reg( mem, src ) );
-  ins_pipe(fstoreD_mem_reg);
-%}
-
-// Store Zero into Aligned Packed Chars/Shorts
-instruct storeA4C0(memory mem, immI0 zero) %{
-  match(Set mem (Store4C mem (Replicate4C zero)));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STX    $zero,$mem\t! packed4C" %}
-  opcode(Assembler::stx_op3);
-  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
-  ins_pipe(fstoreD_mem_zero);
-%}
-
-// Store Aligned Packed Ints in Double register to memory
-instruct storeA2I(memory mem, regD src) %{
-  match(Set mem (Store2I mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STDF   $src,$mem\t! packed2I" %}
-  opcode(Assembler::stdf_op3);
-  ins_encode(simple_form3_mem_reg( mem, src ) );
-  ins_pipe(fstoreD_mem_reg);
-%}
-
-// Store Zero into Aligned Packed Ints
-instruct storeA2I0(memory mem, immI0 zero) %{
-  match(Set mem (Store2I mem zero));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STX    $zero,$mem\t! packed2I" %}
-  opcode(Assembler::stx_op3);
-  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
-  ins_pipe(fstoreD_mem_zero);
-%}
-
-
 //----------MemBar Instructions-----------------------------------------------
 // Memory barrier flavors
 
@@ -7304,17 +7182,6 @@
   ins_pipe(iload_mem);
 %}
 
-// LoadL-locked.  Same as a regular long load when used with a compare-swap
-instruct loadLLocked(iRegL dst, memory mem) %{
-  match(Set dst (LoadLLocked mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDX    $mem,$dst\t! long" %}
-  opcode(Assembler::ldx_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(iload_mem);
-%}
-
 instruct storePConditional( iRegP heap_top_ptr, iRegP oldval, g3RegP newval, flagsRegP pcc ) %{
   match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval)));
   effect( KILL newval );
@@ -8890,150 +8757,6 @@
   ins_pipe(ialu_reg_imm);
 %}
 
-// Replicate scalar to packed byte values in Double register
-instruct Repl8B_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,56,$dst\n\t"
-            "SRLX  $dst, 8,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,16,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate8B" %}
-  ins_encode( enc_repl8b(src, dst));
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed byte values in Double register
-instruct Repl8B_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate8B src));
-  expand %{
-    iRegL tmp;
-    Repl8B_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar constant to packed byte values in Double register
-instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{
-  match(Set dst (Replicate8B con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed char values into stack slot
-instruct Repl4C_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,48,$dst\n\t"
-            "SRLX  $dst,16,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate4C" %}
-  ins_encode( enc_repl4s(src, dst) );
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed char values into stack slot
-instruct Repl4C_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate4C src));
-  expand %{
-    iRegL tmp;
-    Repl4C_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar constant to packed char values in Double register
-instruct Repl4C_immI(regD dst, immI con, o7RegI tmp) %{
-  match(Set dst (Replicate4C con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4C($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed short values into stack slot
-instruct Repl4S_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,48,$dst\n\t"
-            "SRLX  $dst,16,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate4S" %}
-  ins_encode( enc_repl4s(src, dst) );
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed short values into stack slot
-instruct Repl4S_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate4S src));
-  expand %{
-    iRegL tmp;
-    Repl4S_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar constant to packed short values in Double register
-instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{
-  match(Set dst (Replicate4S con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed int values in Double register
-instruct Repl2I_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,32,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate2I" %}
-  ins_encode( enc_repl2i(src, dst));
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed int values in Double register
-instruct Repl2I_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate2I src));
-  expand %{
-    iRegL tmp;
-    Repl2I_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar zero constant to packed int values in Double register
-instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{
-  match(Set dst (Replicate2I con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
 //----------Control Flow Instructions------------------------------------------
 // Compare Instructions
 // Compare Integers
@@ -10752,6 +10475,308 @@
   ins_pipe(istore_mem_reg);
 %}
 
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load Aligned Packed values into a Double Register
+instruct loadV8(regD dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 8);
+  match(Set dst (LoadVector mem));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "LDDF   $mem,$dst\t! load vector (8 bytes)" %}
+  ins_encode %{
+    __ ldf(FloatRegisterImpl::D, $mem$$Address, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(floadD_mem);
+%}
+
+// Store Vector in Double register to memory
+instruct storeV8(memory mem, regD src) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STDF   $src,$mem\t! store vector (8 bytes)" %}
+  ins_encode %{
+    __ stf(FloatRegisterImpl::D, as_DoubleFloatRegister($src$$reg), $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_reg);
+%}
+
+// Store Zero into vector in memory
+instruct storeV8B_zero(memory mem, immI0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateB zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (8 bytes)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV4S_zero(memory mem, immI0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateS zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (4 shorts)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV2I_zero(memory mem, immI0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateI zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (2 ints)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV2F_zero(memory mem, immF0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateF zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (2 floats)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+// Replicate scalar to packed byte values into Double register
+instruct Repl8B_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 8 && UseVIS >= 3);
+  match(Set dst (ReplicateB src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,56,$tmp\n\t"
+            "SRLX  $tmp, 8,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate8B\n\t"
+            "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    56, Rtmp);
+    __ srlx(Rtmp,     8, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed byte values into Double stack
+instruct Repl8B_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 8 && UseVIS < 3);
+  match(Set dst (ReplicateB src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,56,$tmp\n\t"
+            "SRLX  $tmp, 8,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate8B\n\t"
+            "STX   $tmp,$dst\t! regL to stkD" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    56, Rtmp);
+    __ srlx(Rtmp,     8, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+    __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed char/short values into Double register
+instruct Repl4S_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 4 && UseVIS >= 3);
+  match(Set dst (ReplicateS src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,48,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate4S\n\t"
+            "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    48, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed char/short values into Double stack
+instruct Repl4S_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 4 && UseVIS < 3);
+  match(Set dst (ReplicateS src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,48,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate4S\n\t"
+            "STX   $tmp,$dst\t! regL to stkD" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    48, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+    __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar constant to packed char/short values in Double register
+instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed int values into Double register
+instruct Repl2I_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 2 && UseVIS >= 3);
+  match(Set dst (ReplicateI src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,32,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate2I\n\t"
+            "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    32, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed int values into Double stack
+instruct Repl2I_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 2 && UseVIS < 3);
+  match(Set dst (ReplicateI src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,32,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate2I\n\t"
+            "STX   $tmp,$dst\t! regL to stkD" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    32, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+    __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar zero constant to packed int values in Double register
+instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed float values into Double stack
+instruct Repl2F_stk(stackSlotD dst, regF src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  ins_cost(MEMORY_REF_COST*2);
+  format %{ "STF    $src,$dst.hi\t! packed2F\n\t"
+            "STF    $src,$dst.lo" %}
+  opcode(Assembler::stf_op3);
+  ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, src));
+  ins_pipe(fstoreF_stk_reg);
+%}
+
+// Replicate scalar zero constant to packed float values in Double register
+instruct Repl2F_immF(regD dst, immF con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2F($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immF($con$$constant)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immF($con$$constant)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
 //----------PEEPHOLE RULES-----------------------------------------------------
 // These must follow all instruction definitions as they use the names
 // defined in the instructions definitions.
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -3404,14 +3404,6 @@
     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
 #endif  // COMPILER2 !=> _LP64
 
-    // Build this early so it's available for the interpreter.  The
-    // stub expects the required and actual type to already be in O1
-    // and O2 respectively.
-    StubRoutines::_throw_WrongMethodTypeException_entry =
-      generate_throw_exception("WrongMethodTypeException throw_exception",
-                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
-                               G5_method_type, G3_method_handle);
-
     // Build this early so it's available for the interpreter.
     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
   }
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -371,7 +371,8 @@
     __ br( Assembler::zero, true, Assembler::pt, done);
     __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case
 
-    __ ld_ptr( Lmethod, in_bytes(methodOopDesc::constants_offset()), O0);
+    __ ld_ptr( Lmethod, in_bytes(methodOopDesc::const_offset()), O0);
+    __ ld_ptr( O0, in_bytes(constMethodOopDesc::constants_offset()), O0);
     __ ld_ptr( O0, constantPoolOopDesc::pool_holder_offset_in_bytes(), O0);
 
     // lock the mirror, not the klassOop
@@ -379,7 +380,7 @@
 
 #ifdef ASSERT
     __ tst(O0);
-    __ breakpoint_trap(Assembler::zero);
+    __ breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
 #endif // ASSERT
 
     __ bind(done);
@@ -670,7 +671,8 @@
                       ConstantPoolCacheEntry::size()) * BytesPerWord), G1_scratch);
 
     // get constant pool cache
-    __ ld_ptr(G5_method, methodOopDesc::constants_offset(), G3_scratch);
+    __ ld_ptr(G5_method, methodOopDesc::const_offset(), G3_scratch);
+    __ ld_ptr(G3_scratch, constMethodOopDesc::constants_offset(), G3_scratch);
     __ ld_ptr(G3_scratch, constantPoolOopDesc::cache_offset_in_bytes(), G3_scratch);
 
     // get specific constant pool cache entry
@@ -692,9 +694,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Get the type from the constant pool cache
-    __ srl(G1_scratch, ConstantPoolCacheEntry::tosBits, G1_scratch);
-    // Make sure we don't need to mask G1_scratch for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ srl(G1_scratch, ConstantPoolCacheEntry::tos_state_shift, G1_scratch);
+    // Make sure we don't need to mask G1_scratch after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
     __ cmp(G1_scratch, atos );
     __ br(Assembler::equal, true, Assembler::pt, xreturn_path);
     __ delayed()->ld_ptr(Otos_i, G3_scratch, Otos_i);
@@ -993,7 +995,8 @@
     // for static methods insert the mirror argument
     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 
-    __ ld_ptr(Lmethod, methodOopDesc:: constants_offset(), O1);
+    __ ld_ptr(Lmethod, methodOopDesc:: const_offset(), O1);
+    __ ld_ptr(O1, constMethodOopDesc::constants_offset(), O1);
     __ ld_ptr(O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1);
     __ ld_ptr(O1, mirror_offset, O1);
 #ifdef ASSERT
@@ -1659,7 +1662,7 @@
       int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
       *interpreter_frame->register_addr(I5_savedSP)    = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
     } else {
-      assert(caller->is_compiled_frame() || caller->is_entry_frame() || caller->is_ricochet_frame(), "only possible cases");
+      assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
       // Don't have Lesp available; lay out locals block in the caller
       // adjacent to the register window save area.
       //
@@ -2050,7 +2053,7 @@
   AddressLiteral stop_at(&StopInterpreterAt);
   __ load_ptr_contents(stop_at, G4_scratch);
   __ cmp(G3_scratch, G4_scratch);
-  __ breakpoint_trap(Assembler::equal);
+  __ breakpoint_trap(Assembler::equal, Assembler::icc);
 }
 #endif // not PRODUCT
 #endif // !CC_INTERP
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -378,7 +378,7 @@
   Register Rcache = G3_scratch;
   Register Rscratch = G4_scratch;
 
-  resolve_cache_and_index(f1_oop, Otos_i, Rcache, Rscratch, wide ? sizeof(u2) : sizeof(u1));
+  resolve_cache_and_index(f12_oop, Otos_i, Rcache, Rscratch, wide ? sizeof(u2) : sizeof(u1));
 
   __ verify_oop(Otos_i);
 
@@ -2093,10 +2093,12 @@
   // Depends on cpCacheOop layout!
   Label resolved;
 
-  if (byte_no == f1_oop) {
-    // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
-    // This kind of CP cache entry does not need to match the flags byte, because
+  if (byte_no == f12_oop) {
+    // We are resolved if the f1 field contains a non-null object (CallSite, MethodType, etc.)
+    // This kind of CP cache entry does not need to match bytecode_1 or bytecode_2, because
     // there is a 1-1 relation between bytecode type and CP entry type.
+    // The caller will also load a methodOop from f2.
+    assert(result != noreg, "");
     assert_different_registers(result, Rcache);
     __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
     __ ld_ptr(Rcache, constantPoolCacheOopDesc::base_offset() +
@@ -2123,10 +2125,13 @@
     case Bytecodes::_invokespecial  : // fall through
     case Bytecodes::_invokestatic   : // fall through
     case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
+    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);  break;
     case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
     case Bytecodes::_fast_aldc      : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
     case Bytecodes::_fast_aldc_w    : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
-    default                         : ShouldNotReachHere();                                 break;
+    default:
+      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
+      break;
   }
   // first time invocation - must resolve first
   __ call_VM(noreg, entry, O1);
@@ -2139,48 +2144,54 @@
 }
 
 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
-                                               Register Rmethod,
-                                               Register Ritable_index,
-                                               Register Rflags,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
                                                bool is_invokevirtual,
                                                bool is_invokevfinal,
                                                bool is_invokedynamic) {
   // Uses both G3_scratch and G4_scratch
-  Register Rcache = G3_scratch;
-  Register Rscratch = G4_scratch;
-  assert_different_registers(Rcache, Rmethod, Ritable_index);
-
-  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
+  Register cache = G3_scratch;
+  Register index = G4_scratch;
+  assert_different_registers(cache, method, itable_index);
 
   // determine constant pool cache field offsets
+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
   const int method_offset = in_bytes(
-    cp_base_offset +
-      (is_invokevirtual
+      constantPoolCacheOopDesc::base_offset() +
+      ((byte_no == f2_byte)
        ? ConstantPoolCacheEntry::f2_offset()
        : ConstantPoolCacheEntry::f1_offset()
       )
     );
-  const int flags_offset = in_bytes(cp_base_offset +
+  const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::flags_offset());
   // access constant pool cache fields
-  const int index_offset = in_bytes(cp_base_offset +
+  const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::f2_offset());
 
   if (is_invokevfinal) {
-    __ get_cache_and_index_at_bcp(Rcache, Rscratch, 1);
-    __ ld_ptr(Rcache, method_offset, Rmethod);
-  } else if (byte_no == f1_oop) {
-    // Resolved f1_oop goes directly into 'method' register.
-    resolve_cache_and_index(byte_no, Rmethod, Rcache, Rscratch, sizeof(u4));
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ ld_ptr(Address(cache, method_offset), method);
+  } else if (byte_no == f12_oop) {
+    // Resolved f1_oop (CallSite, MethodType, etc.) goes into 'itable_index'.
+    // Resolved f2_oop (methodOop invoker) will go into 'method' (at index_offset).
+    // See ConstantPoolCacheEntry::set_dynamic_call and set_method_handle.
+    size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+    resolve_cache_and_index(byte_no, itable_index, cache, index, index_size);
+    __ ld_ptr(Address(cache, index_offset), method);
+    itable_index = noreg;  // hack to disable load below
   } else {
-    resolve_cache_and_index(byte_no, noreg, Rcache, Rscratch, sizeof(u2));
-    __ ld_ptr(Rcache, method_offset, Rmethod);
+    resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
+    __ ld_ptr(Address(cache, method_offset), method);
   }
 
-  if (Ritable_index != noreg) {
-    __ ld_ptr(Rcache, index_offset, Ritable_index);
+  if (itable_index != noreg) {
+    // pick up itable index from f2 also:
+    assert(byte_no == f1_byte, "already picked up f1");
+    __ ld_ptr(Address(cache, index_offset), itable_index);
   }
-  __ ld_ptr(Rcache, flags_offset, Rflags);
+  __ ld_ptr(Address(cache, flags_offset), flags);
 }
 
 // The Rcache register must be set before call
@@ -2272,7 +2283,7 @@
 
   if (__ membar_has_effect(membar_bits)) {
     // Get volatile flag
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ and3(Rflags, Lscratch, Lscratch);
   }
 
@@ -2280,9 +2291,9 @@
 
   // compute field type
   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj;
-  __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
-  // Make sure we don't need to mask Rflags for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
+  // Make sure we don't need to mask Rflags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
 
   // Check atos before itos for getstatic, more likely (in Queens at least)
   __ cmp(Rflags, atos);
@@ -2445,7 +2456,7 @@
   if (__ membar_has_effect(membar_bits)) {
     // Get volatile flag
     __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f2_offset(), Rflags);
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
   }
 
   switch (bytecode()) {
@@ -2569,9 +2580,9 @@
       Label two_word, valsizeknown;
       __ ld_ptr(G1_scratch, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags);
       __ mov(Lesp, G4_scratch);
-      __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
-      // Make sure we don't need to mask Rflags for tosBits after the above shift
-      ConstantPoolCacheEntry::verify_tosBits();
+      __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
+      // Make sure we don't need to mask Rflags after the above shift
+      ConstantPoolCacheEntry::verify_tos_state_shift();
       __ cmp(Rflags, ltos);
       __ br(Assembler::equal, false, Assembler::pt, two_word);
       __ delayed()->cmp(Rflags, dtos);
@@ -2625,7 +2636,7 @@
 
   Label notVolatile, checkVolatile, exit;
   if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) {
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ and3(Rflags, Lscratch, Lscratch);
 
     if (__ membar_has_effect(read_bits)) {
@@ -2635,9 +2646,9 @@
     }
   }
 
-  __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
-  // Make sure we don't need to mask Rflags for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
+  // Make sure we don't need to mask Rflags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
 
   // compute field type
   Label notInt, notShort, notChar, notObj, notByte, notLong, notFloat;
@@ -2833,7 +2844,7 @@
   Label notVolatile, checkVolatile, exit;
   if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) {
     __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags);
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ and3(Rflags, Lscratch, Lscratch);
     if (__ membar_has_effect(read_bits)) {
       __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile);
@@ -2916,7 +2927,7 @@
 
     // Test volatile
     Label notVolatile;
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ btst(Rflags, Lscratch);
     __ br(Assembler::zero, false, Assembler::pt, notVolatile);
     __ delayed()->nop();
@@ -2936,27 +2947,82 @@
   ShouldNotReachHere();
 }
 
+
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method,  // linked method (or i-klass)
+                                   Register ra,      // return address
+                                   Register index,   // itable index, MethodType, etc.
+                                   Register recv,    // if caller wants to see it
+                                   Register flags    // if caller wants to test it
+                                   ) {
+  // determine flags
+  const Bytecodes::Code code = bytecode();
+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+  const bool load_receiver       = (recv != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+  assert(recv  == noreg || recv  == O0, "");
+  assert(flags == noreg || flags == O1, "");
+
+  // setup registers & access constant pool cache
+  if (recv  == noreg)  recv  = O0;
+  if (flags == noreg)  flags = O1;
+  const Register temp = O2;
+  assert_different_registers(method, ra, index, recv, flags, temp);
+
+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+  __ mov(SP, O5_savedSP);  // record SP that we wanted the callee to restore
+
+  // maybe push appendix to arguments
+  if (is_invokedynamic || is_invokehandle) {
+    Label L_no_push;
+    __ verify_oop(index);
+    __ set((1 << ConstantPoolCacheEntry::has_appendix_shift), temp);
+    __ btst(flags, temp);
+    __ br(Assembler::zero, false, Assembler::pt, L_no_push);
+    __ delayed()->nop();
+    // Push the appendix as a trailing parameter.
+    // This must be done before we get the receiver,
+    // since the parameter_size includes it.
+    __ push_ptr(index);  // push appendix (MethodType, CallSite, etc.)
+    __ bind(L_no_push);
+  }
+
+  // load receiver if needed (after appendix is pushed so parameter size is correct)
+  if (load_receiver) {
+    __ and3(flags, ConstantPoolCacheEntry::parameter_size_mask, temp);  // get parameter size
+    __ load_receiver(temp, recv);  //  __ argument_address uses Gargs but we need Lesp
+    __ verify_oop(recv);
+  }
+
+  // compute return type
+  __ srl(flags, ConstantPoolCacheEntry::tos_state_shift, ra);
+  // Make sure we don't need to mask flags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
+  // load return address
+  {
+    const address table_addr = (is_invokeinterface || is_invokedynamic) ?
+        (address)Interpreter::return_5_addrs_by_index_table() :
+        (address)Interpreter::return_3_addrs_by_index_table();
+    AddressLiteral table(table_addr);
+    __ set(table, temp);
+    __ sll(ra, LogBytesPerWord, ra);
+    __ ld_ptr(Address(temp, ra), ra);
+  }
+}
+
+
 void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
   Register Rtemp = G4_scratch;
   Register Rcall = Rindex;
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
   // get target methodOop & entry point
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  if (vtableEntry::size() % 3 == 0) {
-    // scale the vtable index by 12:
-    int one_third = vtableEntry::size() / 3;
-    __ sll(Rindex, exact_log2(one_third * 1 * wordSize), Rtemp);
-    __ sll(Rindex, exact_log2(one_third * 2 * wordSize), Rindex);
-    __ add(Rindex, Rtemp, Rindex);
-  } else {
-    // scale the vtable index by 8:
-    __ sll(Rindex, exact_log2(vtableEntry::size() * wordSize), Rindex);
-  }
-
-  __ add(Rrecv, Rindex, Rrecv);
-  __ ld_ptr(Rrecv, base + vtableEntry::method_offset_in_bytes(), G5_method);
-
+  __ lookup_virtual_method(Rrecv, Rindex, G5_method);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -2965,16 +3031,16 @@
   assert(byte_no == f2_byte, "use this argument");
 
   Register Rscratch = G3_scratch;
-  Register Rtemp = G4_scratch;
-  Register Rret = Lscratch;
-  Register Rrecv = G5_method;
+  Register Rtemp    = G4_scratch;
+  Register Rret     = Lscratch;
+  Register O0_recv  = O0;
   Label notFinal;
 
   load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, true, false, false);
   __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
 
   // Check for vfinal
-  __ set((1 << ConstantPoolCacheEntry::vfinalMethod), G4_scratch);
+  __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), G4_scratch);
   __ btst(Rret, G4_scratch);
   __ br(Assembler::zero, false, Assembler::pt, notFinal);
   __ delayed()->and3(Rret, 0xFF, G4_scratch);      // gets number of parameters
@@ -2986,27 +3052,27 @@
   __ bind(notFinal);
 
   __ mov(G5_method, Rscratch);  // better scratch register
-  __ load_receiver(G4_scratch, O0);  // gets receiverOop
-  // receiver is in O0
-  __ verify_oop(O0);
+  __ load_receiver(G4_scratch, O0_recv);  // gets receiverOop
+  // receiver is in O0_recv
+  __ verify_oop(O0_recv);
 
   // get return address
   AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
   __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret);          // get return type
+  // Make sure we don't need to mask Rret after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
   __ sll(Rret,  LogBytesPerWord, Rret);
   __ ld_ptr(Rtemp, Rret, Rret);         // get return address
 
   // get receiver klass
-  __ null_check(O0, oopDesc::klass_offset_in_bytes());
-  __ load_klass(O0, Rrecv);
-  __ verify_oop(Rrecv);
-
-  __ profile_virtual_call(Rrecv, O4);
-
-  generate_vtable_call(Rrecv, Rscratch, Rret);
+  __ null_check(O0_recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(O0_recv, O0_recv);
+  __ verify_oop(O0_recv);
+
+  __ profile_virtual_call(O0_recv, O4);
+
+  generate_vtable_call(O0_recv, Rscratch, Rret);
 }
 
 void TemplateTable::fast_invokevfinal(int byte_no) {
@@ -3036,9 +3102,9 @@
   // get return address
   AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
   __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret);          // get return type
+  // Make sure we don't need to mask Rret after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
   __ sll(Rret,  LogBytesPerWord, Rret);
   __ ld_ptr(Rtemp, Rret, Rret);         // get return address
 
@@ -3047,65 +3113,37 @@
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
+
 void TemplateTable::invokespecial(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
 
-  Register Rscratch = G3_scratch;
-  Register Rtemp = G4_scratch;
-  Register Rret = Lscratch;
-
-  load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, /*virtual*/ false, false, false);
-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
-
-  __ verify_oop(G5_method);
-
-  __ lduh(G5_method, in_bytes(methodOopDesc::size_of_parameters_offset()), G4_scratch);
-  __ load_receiver(G4_scratch, O0);
-
-  // receiver NULL check
-  __ null_check(O0);
-
-  __ profile_call(O4);
-
-  // get return address
-  AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
-  __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret,  LogBytesPerWord, Rret);
-  __ ld_ptr(Rtemp, Rret, Rret);         // get return address
+  const Register Rret     = Lscratch;
+  const Register O0_recv  = O0;
+  const Register Rscratch = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret, noreg, O0_recv);  // get receiver also for null check
+  __ null_check(O0_recv);
 
   // do the call
+  __ verify_oop(G5_method);
+  __ profile_call(O4);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
+
 void TemplateTable::invokestatic(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
 
-  Register Rscratch = G3_scratch;
-  Register Rtemp = G4_scratch;
-  Register Rret = Lscratch;
-
-  load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, /*virtual*/ false, false, false);
-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
-
-  __ verify_oop(G5_method);
-
-  __ profile_call(O4);
-
-  // get return address
-  AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
-  __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret,  LogBytesPerWord, Rret);
-  __ ld_ptr(Rtemp, Rret, Rret);         // get return address
+  const Register Rret     = Lscratch;
+  const Register Rscratch = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret);  // get f1 methodOop
 
   // do the call
+  __ verify_oop(G5_method);
+  __ profile_call(O4);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3122,7 +3160,7 @@
   Label notFinal;
 
   // Check for vfinal
-  __ set((1 << ConstantPoolCacheEntry::vfinalMethod), Rscratch);
+  __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), Rscratch);
   __ btst(Rflags, Rscratch);
   __ br(Assembler::zero, false, Assembler::pt, notFinal);
   __ delayed()->nop();
@@ -3144,53 +3182,37 @@
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
 
-  Register Rscratch = G4_scratch;
-  Register Rret = G3_scratch;
-  Register Rindex = Lscratch;
-  Register Rinterface = G1_scratch;
-  Register RklassOop = G5_method;
-  Register Rflags = O1;
+  const Register Rinterface  = G1_scratch;
+  const Register Rret        = G3_scratch;
+  const Register Rindex      = Lscratch;
+  const Register O0_recv     = O0;
+  const Register O1_flags    = O1;
+  const Register O2_klassOop = O2;
+  const Register Rscratch    = G4_scratch;
   assert_different_registers(Rscratch, G5_method);
 
-  load_invoke_cp_cache_entry(byte_no, Rinterface, Rindex, Rflags, /*virtual*/ false, false, false);
-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
-
-  // get receiver
-  __ and3(Rflags, 0xFF, Rscratch);       // gets number of parameters
-  __ load_receiver(Rscratch, O0);
-  __ verify_oop(O0);
-
-  __ mov(Rflags, Rret);
-
-  // get return address
-  AddressLiteral table(Interpreter::return_5_addrs_by_index_table());
-  __ set(table, Rscratch);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret,  LogBytesPerWord, Rret);
-  __ ld_ptr(Rscratch, Rret, Rret);      // get return address
+  prepare_invoke(byte_no, Rinterface, Rret, Rindex, O0_recv, O1_flags);
 
   // get receiver klass
-  __ null_check(O0, oopDesc::klass_offset_in_bytes());
-  __ load_klass(O0, RklassOop);
-  __ verify_oop(RklassOop);
+  __ null_check(O0_recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(O0_recv, O2_klassOop);
+  __ verify_oop(O2_klassOop);
 
   // Special case of invokeinterface called for virtual method of
   // java.lang.Object.  See cpCacheOop.cpp for details.
   // This code isn't produced by javac, but could be produced by
   // another compliant java compiler.
   Label notMethod;
-  __ set((1 << ConstantPoolCacheEntry::methodInterface), Rscratch);
-  __ btst(Rflags, Rscratch);
+  __ set((1 << ConstantPoolCacheEntry::is_forced_virtual_shift), Rscratch);
+  __ btst(O1_flags, Rscratch);
   __ br(Assembler::zero, false, Assembler::pt, notMethod);
   __ delayed()->nop();
 
-  invokeinterface_object_method(RklassOop, Rinterface, Rret, Rflags);
+  invokeinterface_object_method(O2_klassOop, Rinterface, Rret, O1_flags);
 
   __ bind(notMethod);
 
-  __ profile_virtual_call(RklassOop, O4);
+  __ profile_virtual_call(O2_klassOop, O4);
 
   //
   // find entry point to call
@@ -3199,9 +3221,9 @@
   // compute start of first itableOffsetEntry (which is at end of vtable)
   const int base = instanceKlass::vtable_start_offset() * wordSize;
   Label search;
-  Register Rtemp = Rflags;
-
-  __ ld(RklassOop, instanceKlass::vtable_length_offset() * wordSize, Rtemp);
+  Register Rtemp = O1_flags;
+
+  __ ld(O2_klassOop, instanceKlass::vtable_length_offset() * wordSize, Rtemp);
   if (align_object_offset(1) > 1) {
     __ round_to(Rtemp, align_object_offset(1));
   }
@@ -3212,7 +3234,7 @@
     __ set(base, Rscratch);
     __ add(Rscratch, Rtemp, Rtemp);
   }
-  __ add(RklassOop, Rtemp, Rscratch);
+  __ add(O2_klassOop, Rtemp, Rscratch);
 
   __ bind(search);
 
@@ -3244,7 +3266,7 @@
   assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust instruction below");
   __ sll(Rindex, exact_log2(itableMethodEntry::size() * wordSize), Rindex);       // Rindex *= 8;
   __ add(Rscratch, Rindex, Rscratch);
-  __ ld_ptr(RklassOop, Rscratch, G5_method);
+  __ ld_ptr(O2_klassOop, Rscratch, G5_method);
 
   // Check for abstract method error.
   {
@@ -3260,13 +3282,42 @@
 
   __ verify_oop(G5_method);
   __ call_from_interpreter(Rcall, Gargs, Rret);
-
+}
+
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f12_oop, "use this argument");
+
+  if (!EnableInvokeDynamic) {
+    // rewriter does not generate this bytecode
+    __ should_not_reach_here();
+    return;
+  }
+
+  const Register Rret       = Lscratch;
+  const Register G4_mtype   = G4_scratch;  // f1
+  const Register O0_recv    = O0;
+  const Register Rscratch   = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret, G4_mtype, O0_recv);
+  __ null_check(O0_recv);
+
+  // G4: MethodType object (from f1)
+  // G5: MH.linkToCallSite method (from f2)
+
+  // Note:  G4_mtype is already pushed (if necessary) by prepare_invoke
+
+  // do the call
+  __ verify_oop(G5_method);
+  __ profile_final_call(O4);  // FIXME: profile the LambdaForm also
+  __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
 
 void TemplateTable::invokedynamic(int byte_no) {
   transition(vtos, vtos);
-  assert(byte_no == f1_oop, "use this argument");
+  assert(byte_no == f12_oop, "use this argument");
 
   if (!EnableInvokeDynamic) {
     // We should not encounter this bytecode if !EnableInvokeDynamic.
@@ -3279,42 +3330,24 @@
     return;
   }
 
-  // G5: CallSite object (f1)
-  // XX: unused (f2)
-  // XX: flags (unused)
-
-  Register G5_callsite = G5_method;
-  Register Rscratch    = G3_scratch;
-  Register Rtemp       = G1_scratch;
-  Register Rret        = Lscratch;
-
-  load_invoke_cp_cache_entry(byte_no, G5_callsite, noreg, Rret,
-                             /*virtual*/ false, /*vfinal*/ false, /*indy*/ true);
-  __ mov(SP, O5_savedSP);  // record SP that we wanted the callee to restore
-
+  const Register Rret        = Lscratch;
+  const Register G4_callsite = G4_scratch;
+  const Register Rscratch    = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret, G4_callsite);
+
+  // G4: CallSite object (from f1)
+  // G5: MH.linkToCallSite method (from f2)
+
+  // Note:  G4_callsite is already pushed by prepare_invoke
+
+  // %%% should make a type profile for any invokedynamic that takes a ref argument
   // profile this call
   __ profile_call(O4);
 
-  // get return address
-  AddressLiteral table(Interpreter::return_5_addrs_by_index_table());
-  __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);  // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret, LogBytesPerWord, Rret);
-  __ ld_ptr(Rtemp, Rret, Rret);  // get return address
-
-  __ verify_oop(G5_callsite);
-  __ load_heap_oop(G5_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, Rscratch), G3_method_handle);
-  __ null_check(G3_method_handle);
-  __ verify_oop(G3_method_handle);
-
-  // Adjust Rret first so Llast_SP can be same as Rret
-  __ add(Rret, -frame::pc_return_offset, O7);
-  __ add(Lesp, BytesPerWord, Gargs);  // setup parameter pointer
-  __ jump_to_method_handle_entry(G3_method_handle, Rtemp, /* emit_delayed_nop */ false);
-  // Record SP so we can remove any stack space allocated by adapter transition
-  __ delayed()->mov(SP, Llast_SP);
+  // do the call
+  __ verify_oop(G5_method);
+  __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
 
--- a/src/cpu/sparc/vm/templateTable_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,13 @@
 #ifndef CPU_SPARC_VM_TEMPLATETABLE_SPARC_HPP
 #define CPU_SPARC_VM_TEMPLATETABLE_SPARC_HPP
 
+  static void prepare_invoke(int byte_no,
+                             Register method,         // linked method (or i-klass)
+                             Register ra,             // return address
+                             Register index = noreg,  // itable index, MethodType, etc.
+                             Register recv  = noreg,  // if caller wants to see it
+                             Register flags = noreg   // if caller wants to test it
+                             );
   // helper function
   static void invokevfinal_helper(Register Rcache, Register Rret);
   static void invokeinterface_object_method(Register RklassOop, Register Rcall,
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -106,10 +106,10 @@
     if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
       FLAG_SET_DEFAULT(OptoLoopAlignment, 4);
     }
-    // When using CMS, we cannot use memset() in BOT updates because
-    // the sun4v/CMT version in libc_psr uses BIS which exposes
-    // "phantom zeros" to concurrent readers. See 6948537.
-    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && UseConcMarkSweepGC) {
+    // When using CMS or G1, we cannot use memset() in BOT updates
+    // because the sun4v/CMT version in libc_psr uses BIS which
+    // exposes "phantom zeros" to concurrent readers. See 6948537.
+    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
       FLAG_SET_DEFAULT(UseMemSetInBOT, false);
     }
 #ifdef _LP64
@@ -217,6 +217,8 @@
   // Currently not supported anywhere.
   FLAG_SET_DEFAULT(UseFPUForSpilling, false);
 
+  MaxVectorSize = 8;
+
   assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
 #endif
 
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,10 +44,11 @@
     fmaf_instructions    = 10,
     fmau_instructions    = 11,
     vis3_instructions    = 12,
-    sparc64_family       = 13,
-    T_family             = 14,
-    T1_model             = 15,
-    cbcond_instructions  = 16
+    cbcond_instructions  = 13,
+    sparc64_family       = 14,
+    M_family             = 15,
+    T_family             = 16,
+    T1_model             = 17
   };
 
   enum Feature_Flag_Set {
@@ -67,10 +68,11 @@
     fmaf_instructions_m     = 1 << fmaf_instructions,
     fmau_instructions_m     = 1 << fmau_instructions,
     vis3_instructions_m     = 1 << vis3_instructions,
+    cbcond_instructions_m   = 1 << cbcond_instructions,
     sparc64_family_m        = 1 << sparc64_family,
+    M_family_m              = 1 << M_family,
     T_family_m              = 1 << T_family,
     T1_model_m              = 1 << T1_model,
-    cbcond_instructions_m   = 1 << cbcond_instructions,
 
     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -89,6 +91,7 @@
   static int  platform_features(int features);
 
   // Returns true if the platform is in the niagara line (T series)
+  static bool is_M_family(int features) { return (features & M_family_m) != 0; }
   static bool is_T_family(int features) { return (features & T_family_m) != 0; }
   static bool is_niagara() { return is_T_family(_features); }
   DEBUG_ONLY( static bool is_niagara(int features)  { return (features & sun4v_m) != 0; } )
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -70,7 +70,6 @@
   __ load_klass(O0, G3_scratch);
 
   // set methodOop (in case of interpreted method), and destination address
-  int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
@@ -82,13 +81,8 @@
     __ bind(L);
   }
 #endif
-  int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
-  if (Assembler::is_simm13(v_off)) {
-    __ ld_ptr(G3, v_off, G5_method);
-  } else {
-    __ set(v_off,G5);
-    __ ld_ptr(G3, G5, G5_method);
-  }
+
+  __ lookup_virtual_method(G3_scratch, vtable_index, G5_method);
 
 #ifndef PRODUCT
   if (DebugVtables) {
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -41,6 +41,15 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #endif
 
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 // Implementation of AddressLiteral
 
 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
@@ -990,32 +999,22 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::andl(Address dst, int32_t imm32) {
@@ -1043,36 +1042,6 @@
   emit_arith(0x23, 0xC0, dst, src);
 }
 
-void Assembler::andpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andpd(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::andps(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andps(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_byte(0x0F);
@@ -1237,61 +1206,42 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0xE6);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x5B);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
@@ -1303,10 +1253,7 @@
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
@@ -1318,25 +1265,17 @@
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 
@@ -1364,32 +1303,22 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::emms() {
@@ -1625,15 +1554,18 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x28);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x28);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
+  emit_byte(0x16);
   emit_byte(0xC0 | encode);
 }
 
@@ -1686,26 +1618,27 @@
   emit_operand(dst, src);
 }
 
+void Assembler::movdl(Address dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
+  emit_byte(0x7E);
+  emit_operand(src, dst);
+}
+
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
@@ -1716,6 +1649,35 @@
   emit_operand(src, dst);
 }
 
+// Move Unaligned 256bit Vector
+void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
+  assert(UseAVX, "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+  emit_byte(0x6F);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmovdqu(XMMRegister dst, Address src) {
+  assert(UseAVX, "");
+  InstructionMark im(this);
+  bool vector256 = true;
+  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+  emit_byte(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::vmovdqu(Address dst, XMMRegister src) {
+  assert(UseAVX, "");
+  InstructionMark im(this);
+  bool vector256 = true;
+  // swap src<->dst for encoding
+  assert(src != xnoreg, "sanity");
+  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
+  emit_byte(0x7F);
+  emit_operand(src, dst);
+}
+
 // Uses zero extension on 64bit
 
 void Assembler::movl(Register dst, int32_t imm32) {
@@ -1757,10 +1719,7 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x12);
-  emit_operand(dst, src);
+  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -1817,17 +1776,12 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
@@ -1840,17 +1794,12 @@
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
@@ -1948,32 +1897,22 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::negl(Register dst) {
@@ -2262,17 +2201,12 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_operand(dst, src);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
@@ -2286,7 +2220,7 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_byte(0xC0 | encode);
   emit_byte(imm8);
@@ -2302,7 +2236,7 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x30);
   emit_byte(0xC0 | encode);
 }
@@ -2403,28 +2337,10 @@
   a_byte(p);
 }
 
-void Assembler::por(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::por(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_operand(dst, src);
-}
-
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
   emit_byte(mode & 0xFF);
 
 }
@@ -2443,9 +2359,7 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
   emit_byte(mode & 0xFF);
 }
 
@@ -2460,18 +2374,6 @@
   emit_byte(mode & 0xFF);
 }
 
-void Assembler::psrlq(XMMRegister dst, int shift) {
-  // Shift 64 bit value logically right by specified number of bits.
-  // HMM Table D-1 says sse2 or mmx.
-  // Do not confuse it with psrldq SSE2 instruction which
-  // shifts 128 bit value in xmm register by number of bytes.
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift);
-}
-
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -2492,7 +2394,7 @@
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_byte(0xC0 | encode);
 }
@@ -2500,33 +2402,28 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_operand(dst, src);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_operand(dst, src);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::push(int32_t imm32) {
@@ -2556,22 +2453,6 @@
 }
 #endif
 
-void Assembler::pxor(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_operand(dst, src);
-}
-
-void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::rcll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
@@ -2730,32 +2611,22 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::stmxcsr( Address dst) {
@@ -2805,32 +2676,22 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::testb(Register dst, int imm8) {
@@ -2868,32 +2729,22 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 
@@ -2935,189 +2786,741 @@
   emit_arith(0x33, 0xC0, dst, src);
 }
 
-void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::xorpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
-
-void Assembler::xorps(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::xorps(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
-// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
+
+// AVX 3-operands scalar float-point arithmetic instructions
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
-}
-
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+//====================VECTOR ARITHMETIC=====================================
+
+// Float-point vector arithmetic
+
+void Assembler::addpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::addps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::subpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::subps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::mulps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::divpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::divps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::xorps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::xorpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::xorps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x57);
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+
+// Integer vector arithmetic
+void Assembler::paddb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::psubb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  InstructionMark im(this);
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  emit_byte(0x40);
   emit_operand(dst, src);
 }
 
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
+// Shift packed integers left by specified number of bits.
+void Assembler::psllw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::pslld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllq(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers logically right by specified number of bits.
+void Assembler::psrlw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 71 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 72 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlq(XMMRegister dst, int shift) {
+  // Do not confuse it with psrldq SSE2 instruction which
+  // shifts 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers arithmetically right by specified number of bits.
+void Assembler::psraw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrad(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 72 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+
+// AND packed integers
+void Assembler::pand(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::por(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pxor(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+
+void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x57);
-  emit_operand(dst, src);
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_byte(0x18);
+  emit_byte(0xC0 | encode);
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_byte(0x01);
+}
+
+void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx2(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_byte(0x38);
+  emit_byte(0xC0 | encode);
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_byte(0x01);
+}
+
+void Assembler::vzeroupper() {
+  assert(VM_Version::supports_avx(), "");
+  (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
+  emit_byte(0x77);
 }
 
 
@@ -3578,6 +3981,21 @@
   emit_byte(0xF1);
 }
 
+void Assembler::frndint() {
+  emit_byte(0xD9);
+  emit_byte(0xFC);
+}
+
+void Assembler::f2xm1() {
+  emit_byte(0xD9);
+  emit_byte(0xF0);
+}
+
+void Assembler::fldl2e() {
+  emit_byte(0xD9);
+  emit_byte(0xEA);
+}
+
 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
@@ -3681,6 +4099,49 @@
   }
 }
 
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// Versions with no second source register (non-destructive source).
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// 3-operands AVX instructions
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               Address src, VexSimdPrefix pre, bool vector256) {
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
+  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
@@ -5393,23 +5854,7 @@
     // To see where a verify_oop failed, get $ebx+40/X for this frame.
     // This is the value of eip which points to where verify_oop will return.
     if (os::message_box(msg, "Execution stopped, print registers?")) {
-      ttyLocker ttyl;
-      tty->print_cr("eip = 0x%08x", eip);
-#ifndef PRODUCT
-      if ((WizardMode || Verbose) && PrintMiscellaneous) {
-        tty->cr();
-        findpc(eip);
-        tty->cr();
-      }
-#endif
-      tty->print_cr("rax = 0x%08x", rax);
-      tty->print_cr("rbx = 0x%08x", rbx);
-      tty->print_cr("rcx = 0x%08x", rcx);
-      tty->print_cr("rdx = 0x%08x", rdx);
-      tty->print_cr("rdi = 0x%08x", rdi);
-      tty->print_cr("rsi = 0x%08x", rsi);
-      tty->print_cr("rbp = 0x%08x", rbp);
-      tty->print_cr("rsp = 0x%08x", rsp);
+      print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
       BREAKPOINT;
       assert(false, "start up GDB");
     }
@@ -5421,12 +5866,53 @@
   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
 }
 
+void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
+  ttyLocker ttyl;
+  FlagSetting fs(Debugging, true);
+  tty->print_cr("eip = 0x%08x", eip);
+#ifndef PRODUCT
+  if ((WizardMode || Verbose) && PrintMiscellaneous) {
+    tty->cr();
+    findpc(eip);
+    tty->cr();
+  }
+#endif
+#define PRINT_REG(rax) \
+  { tty->print("%s = ", #rax); os::print_location(tty, rax); }
+  PRINT_REG(rax);
+  PRINT_REG(rbx);
+  PRINT_REG(rcx);
+  PRINT_REG(rdx);
+  PRINT_REG(rdi);
+  PRINT_REG(rsi);
+  PRINT_REG(rbp);
+  PRINT_REG(rsp);
+#undef PRINT_REG
+  // Print some words near top of staack.
+  int* dump_sp = (int*) rsp;
+  for (int col1 = 0; col1 < 8; col1++) {
+    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+    os::print_location(tty, *dump_sp++);
+  }
+  for (int row = 0; row < 16; row++) {
+    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+    for (int col = 0; col < 8; col++) {
+      tty->print(" 0x%08x", *dump_sp++);
+    }
+    tty->cr();
+  }
+  // Print some instructions around pc:
+  Disassembler::decode((address)eip-64, (address)eip);
+  tty->print_cr("--------");
+  Disassembler::decode((address)eip, (address)eip+32);
+}
+
 void MacroAssembler::stop(const char* msg) {
   ExternalAddress message((address)msg);
   // push address of message
   pushptr(message.addr());
   { Label L; call(L, relocInfo::none); bind(L); }     // push eip
-  pusha();                                           // push registers
+  pusha();                                            // push registers
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
   hlt();
 }
@@ -5443,6 +5929,18 @@
   pop_CPU_state();
 }
 
+void MacroAssembler::print_state() {
+  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
+  pusha();                                            // push registers
+
+  push_CPU_state();
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
+  pop_CPU_state();
+
+  popa();
+  addl(rsp, wordSize);
+}
+
 #else // _LP64
 
 // 64 bit versions
@@ -5908,14 +6406,33 @@
 }
 
 void MacroAssembler::warn(const char* msg) {
-  push(rsp);
+  push(rbp);
+  movq(rbp, rsp);
   andq(rsp, -16);     // align stack as required by push_CPU_state and call
-
   push_CPU_state();   // keeps alignment at 16 bytes
   lea(c_rarg0, ExternalAddress((address) msg));
   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
   pop_CPU_state();
-  pop(rsp);
+  mov(rsp, rbp);
+  pop(rbp);
+}
+
+void MacroAssembler::print_state() {
+  address rip = pc();
+  pusha();            // get regs on stack
+  push(rbp);
+  movq(rbp, rsp);
+  andq(rsp, -16);     // align stack as required by push_CPU_state and call
+  push_CPU_state();   // keeps alignment at 16 bytes
+
+  lea(c_rarg0, InternalAddress(rip));
+  lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
+  call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
+
+  pop_CPU_state();
+  mov(rsp, rbp);
+  pop(rbp);
+  popa();
 }
 
 #ifndef PRODUCT
@@ -5924,7 +6441,7 @@
 
 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
   // In order to get locks to work, we need to fake a in_VM state
-  if (ShowMessageBoxOnError ) {
+  if (ShowMessageBoxOnError) {
     JavaThread* thread = JavaThread::current();
     JavaThreadState saved_state = thread->thread_state();
     thread->set_thread_state(_thread_in_vm);
@@ -5938,30 +6455,9 @@
     // XXX correct this offset for amd64
     // This is the value of eip which points to where verify_oop will return.
     if (os::message_box(msg, "Execution stopped, print registers?")) {
-      ttyLocker ttyl;
-      tty->print_cr("rip = 0x%016lx", pc);
-#ifndef PRODUCT
-      tty->cr();
-      findpc(pc);
-      tty->cr();
-#endif
-      tty->print_cr("rax = 0x%016lx", regs[15]);
-      tty->print_cr("rbx = 0x%016lx", regs[12]);
-      tty->print_cr("rcx = 0x%016lx", regs[14]);
-      tty->print_cr("rdx = 0x%016lx", regs[13]);
-      tty->print_cr("rdi = 0x%016lx", regs[8]);
-      tty->print_cr("rsi = 0x%016lx", regs[9]);
-      tty->print_cr("rbp = 0x%016lx", regs[10]);
-      tty->print_cr("rsp = 0x%016lx", regs[11]);
-      tty->print_cr("r8  = 0x%016lx", regs[7]);
-      tty->print_cr("r9  = 0x%016lx", regs[6]);
-      tty->print_cr("r10 = 0x%016lx", regs[5]);
-      tty->print_cr("r11 = 0x%016lx", regs[4]);
-      tty->print_cr("r12 = 0x%016lx", regs[3]);
-      tty->print_cr("r13 = 0x%016lx", regs[2]);
-      tty->print_cr("r14 = 0x%016lx", regs[1]);
-      tty->print_cr("r15 = 0x%016lx", regs[0]);
+      print_state64(pc, regs);
       BREAKPOINT;
+      assert(false, "start up GDB");
     }
     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   } else {
@@ -5972,6 +6468,54 @@
   }
 }
 
+void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
+  ttyLocker ttyl;
+  FlagSetting fs(Debugging, true);
+  tty->print_cr("rip = 0x%016lx", pc);
+#ifndef PRODUCT
+  tty->cr();
+  findpc(pc);
+  tty->cr();
+#endif
+#define PRINT_REG(rax, value) \
+  { tty->print("%s = ", #rax); os::print_location(tty, value); }
+  PRINT_REG(rax, regs[15]);
+  PRINT_REG(rbx, regs[12]);
+  PRINT_REG(rcx, regs[14]);
+  PRINT_REG(rdx, regs[13]);
+  PRINT_REG(rdi, regs[8]);
+  PRINT_REG(rsi, regs[9]);
+  PRINT_REG(rbp, regs[10]);
+  PRINT_REG(rsp, regs[11]);
+  PRINT_REG(r8 , regs[7]);
+  PRINT_REG(r9 , regs[6]);
+  PRINT_REG(r10, regs[5]);
+  PRINT_REG(r11, regs[4]);
+  PRINT_REG(r12, regs[3]);
+  PRINT_REG(r13, regs[2]);
+  PRINT_REG(r14, regs[1]);
+  PRINT_REG(r15, regs[0]);
+#undef PRINT_REG
+  // Print some words near top of staack.
+  int64_t* rsp = (int64_t*) regs[11];
+  int64_t* dump_sp = rsp;
+  for (int col1 = 0; col1 < 8; col1++) {
+    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+    os::print_location(tty, *dump_sp++);
+  }
+  for (int row = 0; row < 25; row++) {
+    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+    for (int col = 0; col < 4; col++) {
+      tty->print(" 0x%016lx", *dump_sp++);
+    }
+    tty->cr();
+  }
+  // Print some instructions around pc:
+  Disassembler::decode((address)pc-64, (address)pc);
+  tty->print_cr("--------");
+  Disassembler::decode((address)pc, (address)pc+32);
+}
+
 #endif // _LP64
 
 // Now versions that are common to 32/64 bit
@@ -6341,7 +6885,7 @@
       get_thread(rax);
       cmpptr(java_thread, rax);
       jcc(Assembler::equal, L);
-      stop("MacroAssembler::call_VM_base: rdi not callee saved?");
+      STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
       bind(L);
     }
     pop(rax);
@@ -6868,6 +7412,264 @@
   Assembler::fldcw(as_Address(src));
 }
 
+void MacroAssembler::pow_exp_core_encoding() {
+  // kills rax, rcx, rdx
+  subptr(rsp,sizeof(jdouble));
+  // computes 2^X. Stack: X ...
+  // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
+  // keep it on the thread's stack to compute 2^int(X) later
+  // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
+  // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
+  fld_s(0);                 // Stack: X X ...
+  frndint();                // Stack: int(X) X ...
+  fsuba(1);                 // Stack: int(X) X-int(X) ...
+  fistp_s(Address(rsp,0));  // move int(X) as integer to thread's stack. Stack: X-int(X) ...
+  f2xm1();                  // Stack: 2^(X-int(X))-1 ...
+  fld1();                   // Stack: 1 2^(X-int(X))-1 ...
+  faddp(1);                 // Stack: 2^(X-int(X))
+  // computes 2^(int(X)): add exponent bias (1023) to int(X), then
+  // shift int(X)+1023 to exponent position.
+  // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
+  // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
+  // values so detect them and set result to NaN.
+  movl(rax,Address(rsp,0));
+  movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
+  addl(rax, 1023);
+  movl(rdx,rax);
+  shll(rax,20);
+  // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
+  addl(rdx,1);
+  // Check that 1 < int(X)+1023+1 < 2048
+  // in 3 steps:
+  // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
+  // 2- (int(X)+1023+1)&-2048 != 0
+  // 3- (int(X)+1023+1)&-2048 != 1
+  // Do 2- first because addl just updated the flags.
+  cmov32(Assembler::equal,rax,rcx);
+  cmpl(rdx,1);
+  cmov32(Assembler::equal,rax,rcx);
+  testl(rdx,rcx);
+  cmov32(Assembler::notEqual,rax,rcx);
+  movl(Address(rsp,4),rax);
+  movl(Address(rsp,0),0);
+  fmul_d(Address(rsp,0));   // Stack: 2^X ...
+  addptr(rsp,sizeof(jdouble));
+}
+
+void MacroAssembler::increase_precision() {
+  subptr(rsp, BytesPerWord);
+  fnstcw(Address(rsp, 0));
+  movl(rax, Address(rsp, 0));
+  orl(rax, 0x300);
+  push(rax);
+  fldcw(Address(rsp, 0));
+  pop(rax);
+}
+
+void MacroAssembler::restore_precision() {
+  fldcw(Address(rsp, 0));
+  addptr(rsp, BytesPerWord);
+}
+
+void MacroAssembler::fast_pow() {
+  // computes X^Y = 2^(Y * log2(X))
+  // if fast computation is not possible, result is NaN. Requires
+  // fallback from user of this macro.
+  // increase precision for intermediate steps of the computation
+  increase_precision();
+  fyl2x();                 // Stack: (Y*log2(X)) ...
+  pow_exp_core_encoding(); // Stack: exp(X) ...
+  restore_precision();
+}
+
+void MacroAssembler::fast_exp() {
+  // computes exp(X) = 2^(X * log2(e))
+  // if fast computation is not possible, result is NaN. Requires
+  // fallback from user of this macro.
+  // increase precision for intermediate steps of the computation
+  increase_precision();
+  fldl2e();                // Stack: log2(e) X ...
+  fmulp(1);                // Stack: (X*log2(e)) ...
+  pow_exp_core_encoding(); // Stack: exp(X) ...
+  restore_precision();
+}
+
+void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
+  // kills rax, rcx, rdx
+  // pow and exp needs 2 extra registers on the fpu stack.
+  Label slow_case, done;
+  Register tmp = noreg;
+  if (!VM_Version::supports_cmov()) {
+    // fcmp needs a temporary so preserve rdx,
+    tmp = rdx;
+  }
+  Register tmp2 = rax;
+  Register tmp3 = rcx;
+
+  if (is_exp) {
+    // Stack: X
+    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
+    fast_exp();                 // Stack: exp(X) X
+    fcmp(tmp, 0, false, false); // Stack: exp(X) X
+    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
+    jcc(Assembler::parity, slow_case);
+    // get rid of duplicate argument. Stack: exp(X)
+    if (num_fpu_regs_in_use > 0) {
+      fxch();
+      fpop();
+    } else {
+      ffree(1);
+    }
+    jmp(done);
+  } else {
+    // Stack: X Y
+    Label x_negative, y_odd;
+
+    fldz();                     // Stack: 0 X Y
+    fcmp(tmp, 1, true, false);  // Stack: X Y
+    jcc(Assembler::above, x_negative);
+
+    // X >= 0
+
+    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+    fld_s(1);                   // Stack: X Y X Y
+    fast_pow();                 // Stack: X^Y X Y
+    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
+    // X^Y not equal to itself: X^Y is NaN go to slow case.
+    jcc(Assembler::parity, slow_case);
+    // get rid of duplicate arguments. Stack: X^Y
+    if (num_fpu_regs_in_use > 0) {
+      fxch(); fpop();
+      fxch(); fpop();
+    } else {
+      ffree(2);
+      ffree(1);
+    }
+    jmp(done);
+
+    // X <= 0
+    bind(x_negative);
+
+    fld_s(1);                   // Stack: Y X Y
+    frndint();                  // Stack: int(Y) X Y
+    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
+    jcc(Assembler::notEqual, slow_case);
+
+    subptr(rsp, 8);
+
+    // For X^Y, when X < 0, Y has to be an integer and the final
+    // result depends on whether it's odd or even. We just checked
+    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
+    // integer to test its parity. If int(Y) is huge and doesn't fit
+    // in the 64 bit integer range, the integer indefinite value will
+    // end up in the gp registers. Huge numbers are all even, the
+    // integer indefinite number is even so it's fine.
+
+#ifdef ASSERT
+    // Let's check we don't end up with an integer indefinite number
+    // when not expected. First test for huge numbers: check whether
+    // int(Y)+1 == int(Y) which is true for very large numbers and
+    // those are all even. A 64 bit integer is guaranteed to not
+    // overflow for numbers where y+1 != y (when precision is set to
+    // double precision).
+    Label y_not_huge;
+
+    fld1();                     // Stack: 1 int(Y) X Y
+    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
+
+#ifdef _LP64
+    // trip to memory to force the precision down from double extended
+    // precision
+    fstp_d(Address(rsp, 0));
+    fld_d(Address(rsp, 0));
+#endif
+
+    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
+#endif
+
+    // move int(Y) as 64 bit integer to thread's stack
+    fistp_d(Address(rsp,0));    // Stack: X Y
+
+#ifdef ASSERT
+    jcc(Assembler::notEqual, y_not_huge);
+
+    // Y is huge so we know it's even. It may not fit in a 64 bit
+    // integer and we don't want the debug code below to see the
+    // integer indefinite value so overwrite int(Y) on the thread's
+    // stack with 0.
+    movl(Address(rsp, 0), 0);
+    movl(Address(rsp, 4), 0);
+
+    bind(y_not_huge);
+#endif
+
+    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+    fld_s(1);                   // Stack: X Y X Y
+    fabs();                     // Stack: abs(X) Y X Y
+    fast_pow();                 // Stack: abs(X)^Y X Y
+    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
+    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
+
+    pop(tmp2);
+    NOT_LP64(pop(tmp3));
+    jcc(Assembler::parity, slow_case);
+
+#ifdef ASSERT
+    // Check that int(Y) is not integer indefinite value (int
+    // overflow). Shouldn't happen because for values that would
+    // overflow, 1+int(Y)==Y which was tested earlier.
+#ifndef _LP64
+    {
+      Label integer;
+      testl(tmp2, tmp2);
+      jcc(Assembler::notZero, integer);
+      cmpl(tmp3, 0x80000000);
+      jcc(Assembler::notZero, integer);
+      STOP("integer indefinite value shouldn't be seen here");
+      bind(integer);
+    }
+#else
+    {
+      Label integer;
+      mov(tmp3, tmp2); // preserve tmp2 for parity check below
+      shlq(tmp3, 1);
+      jcc(Assembler::carryClear, integer);
+      jcc(Assembler::notZero, integer);
+      STOP("integer indefinite value shouldn't be seen here");
+      bind(integer);
+    }
+#endif
+#endif
+
+    // get rid of duplicate arguments. Stack: X^Y
+    if (num_fpu_regs_in_use > 0) {
+      fxch(); fpop();
+      fxch(); fpop();
+    } else {
+      ffree(2);
+      ffree(1);
+    }
+
+    testl(tmp2, 1);
+    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
+    // X <= 0, Y even: X^Y = -abs(X)^Y
+
+    fchs();                     // Stack: -abs(X)^Y Y
+    jmp(done);
+  }
+
+  // slow case: runtime call
+  bind(slow_case);
+
+  fpop();                       // pop incorrect result or int(Y)
+
+  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
+                      is_exp ? 1 : 2, num_fpu_regs_in_use);
+
+  // Come here with result in F-TOS
+  bind(done);
+}
+
 void MacroAssembler::fpop() {
   ffree();
   fincstp();
@@ -7132,6 +7934,24 @@
   movb(as_Address(dst), src);
 }
 
+void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movdl(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movdl(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movq(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movq(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     if (UseXmmLoadAndClearUpper) {
@@ -7485,21 +8305,21 @@
   }
 }
 
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandpd(dst, nds, as_Address(src));
+    vandpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vandpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandps(dst, nds, as_Address(src));
+    vandps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandps(dst, nds, Address(rscratch1, 0));
+    vandps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
@@ -7557,21 +8377,21 @@
   }
 }
 
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorpd(dst, nds, as_Address(src));
+    vxorpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorps(dst, nds, as_Address(src));
+    vxorps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorps(dst, nds, Address(rscratch1, 0));
+    vxorps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
@@ -7997,7 +8817,7 @@
     shlptr(tsize, LogHeapWordSize);
     cmpptr(t1, tsize);
     jcc(Assembler::equal, ok);
-    stop("assert(t1 != tlab size)");
+    STOP("assert(t1 != tlab size)");
     should_not_reach_here();
 
     bind(ok);
@@ -8045,6 +8865,144 @@
 #endif
 }
 
+void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
+  pusha();
+
+  // if we are coming from c1, xmm registers may be live
+  if (UseSSE >= 1) {
+    subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
+  }
+  int off = 0;
+  if (UseSSE == 1)  {
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
+  } else if (UseSSE >= 2)  {
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7);
+#ifdef _LP64
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14);
+    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15);
+#endif
+  }
+
+  // Preserve registers across runtime call
+  int incoming_argument_and_return_value_offset = -1;
+  if (num_fpu_regs_in_use > 1) {
+    // Must preserve all other FPU regs (could alternatively convert
+    // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
+    // FPU state, but can not trust C compiler)
+    NEEDS_CLEANUP;
+    // NOTE that in this case we also push the incoming argument(s) to
+    // the stack and restore it later; we also use this stack slot to
+    // hold the return value from dsin, dcos etc.
+    for (int i = 0; i < num_fpu_regs_in_use; i++) {
+      subptr(rsp, sizeof(jdouble));
+      fstp_d(Address(rsp, 0));
+    }
+    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
+    for (int i = nb_args-1; i >= 0; i--) {
+      fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
+    }
+  }
+
+  subptr(rsp, nb_args*sizeof(jdouble));
+  for (int i = 0; i < nb_args; i++) {
+    fstp_d(Address(rsp, i*sizeof(jdouble)));
+  }
+
+#ifdef _LP64
+  if (nb_args > 0) {
+    movdbl(xmm0, Address(rsp, 0));
+  }
+  if (nb_args > 1) {
+    movdbl(xmm1, Address(rsp, sizeof(jdouble)));
+  }
+  assert(nb_args <= 2, "unsupported number of args");
+#endif // _LP64
+
+  // NOTE: we must not use call_VM_leaf here because that requires a
+  // complete interpreter frame in debug mode -- same bug as 4387334
+  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
+  // do proper 64bit abi
+
+  NEEDS_CLEANUP;
+  // Need to add stack banging before this runtime call if it needs to
+  // be taken; however, there is no generic stack banging routine at
+  // the MacroAssembler level
+
+  MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
+
+#ifdef _LP64
+  movsd(Address(rsp, 0), xmm0);
+  fld_d(Address(rsp, 0));
+#endif // _LP64
+  addptr(rsp, sizeof(jdouble) * nb_args);
+  if (num_fpu_regs_in_use > 1) {
+    // Must save return value to stack and then restore entire FPU
+    // stack except incoming arguments
+    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
+    for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
+      fld_d(Address(rsp, 0));
+      addptr(rsp, sizeof(jdouble));
+    }
+    fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
+    addptr(rsp, sizeof(jdouble) * nb_args);
+  }
+
+  off = 0;
+  if (UseSSE == 1)  {
+    movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
+  } else if (UseSSE >= 2)  {
+    movdbl(xmm0, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm1, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm2, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm3, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm4, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm5, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm6, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm7, Address(rsp,off++*sizeof(jdouble)));
+#ifdef _LP64
+    movdbl(xmm8, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm9, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm10, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm11, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm12, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm13, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm14, Address(rsp,off++*sizeof(jdouble)));
+    movdbl(xmm15, Address(rsp,off++*sizeof(jdouble)));
+#endif
+  }
+  if (UseSSE >= 1) {
+    addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
+  }
+  popa();
+}
+
 static const double     pi_4 =  0.7853981633974483;
 
 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
@@ -8092,73 +9050,27 @@
 
   // slow case: runtime call
   bind(slow_case);
-  // Preserve registers across runtime call
-  pusha();
-  int incoming_argument_and_return_value_offset = -1;
-  if (num_fpu_regs_in_use > 1) {
-    // Must preserve all other FPU regs (could alternatively convert
-    // SharedRuntime::dsin and dcos into assembly routines known not to trash
-    // FPU state, but can not trust C compiler)
-    NEEDS_CLEANUP;
-    // NOTE that in this case we also push the incoming argument to
-    // the stack and restore it later; we also use this stack slot to
-    // hold the return value from dsin or dcos.
-    for (int i = 0; i < num_fpu_regs_in_use; i++) {
-      subptr(rsp, sizeof(jdouble));
-      fstp_d(Address(rsp, 0));
-    }
-    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
-    fld_d(Address(rsp, incoming_argument_and_return_value_offset));
-  }
-  subptr(rsp, sizeof(jdouble));
-  fstp_d(Address(rsp, 0));
-#ifdef _LP64
-  movdbl(xmm0, Address(rsp, 0));
-#endif // _LP64
-
-  // NOTE: we must not use call_VM_leaf here because that requires a
-  // complete interpreter frame in debug mode -- same bug as 4387334
-  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
-  // do proper 64bit abi
-
-  NEEDS_CLEANUP;
-  // Need to add stack banging before this runtime call if it needs to
-  // be taken; however, there is no generic stack banging routine at
-  // the MacroAssembler level
+
   switch(trig) {
   case 's':
     {
-      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
+      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
     }
     break;
   case 'c':
     {
-      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
+      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
     }
     break;
   case 't':
     {
-      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
+      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
     }
     break;
   default:
     assert(false, "bad intrinsic");
     break;
   }
-#ifdef _LP64
-    movsd(Address(rsp, 0), xmm0);
-    fld_d(Address(rsp, 0));
-#endif // _LP64
-  addptr(rsp, sizeof(jdouble));
-  if (num_fpu_regs_in_use > 1) {
-    // Must save return value to stack and then restore entire FPU stack
-    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
-    for (int i = 0; i < num_fpu_regs_in_use; i++) {
-      fld_d(Address(rsp, 0));
-      addptr(rsp, sizeof(jdouble));
-    }
-  }
-  popa();
 
   // Come here with result in F-TOS
   bind(done);
@@ -8244,6 +9156,19 @@
 }
 
 
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  const int base = instanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
+  Address vtable_entry_addr(recv_klass,
+                            vtable_index, Address::times_ptr,
+                            base + vtableEntry::method_offset_in_bytes());
+  movptr(method_result, vtable_entry_addr);
+}
+
+
 void MacroAssembler::check_klass_subtype(Register sub_klass,
                            Register super_klass,
                            Register temp_reg,
@@ -8493,6 +9418,7 @@
   // Pass register number to verify_oop_subroutine
   char* b = new char[strlen(s) + 50];
   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
+  BLOCK_COMMENT("verify_oop {");
 #ifdef _LP64
   push(rscratch1);                    // save r10, trashed by movptr()
 #endif
@@ -8507,6 +9433,7 @@
   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
   call(rax);
   // Caller pops the arguments (oop, message) and restores rax, r10
+  BLOCK_COMMENT("} verify_oop");
 }
 
 
@@ -8527,7 +9454,7 @@
       jcc(Assembler::notZero, L);
       char* buf = new char[40];
       sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
-      stop(buf);
+      STOP(buf);
     } else {
       jccb(Assembler::notZero, L);
       hlt();
@@ -8543,60 +9470,6 @@
 }
 
 
-// registers on entry:
-//  - rax ('check' register): required MethodType
-//  - rcx: method handle
-//  - rdx, rsi, or ?: killable temp
-void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                              Register temp_reg,
-                                              Label& wrong_method_type) {
-  Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
-  // compare method type against that of the receiver
-  if (UseCompressedOops) {
-    load_heap_oop(temp_reg, type_addr);
-    cmpptr(mtype_reg, temp_reg);
-  } else {
-    cmpptr(mtype_reg, type_addr);
-  }
-  jcc(Assembler::notEqual, wrong_method_type);
-}
-
-
-// A method handle has a "vmslots" field which gives the size of its
-// argument list in JVM stack slots.  This field is either located directly
-// in every method handle, or else is indirectly accessed through the
-// method handle's MethodType.  This macro hides the distinction.
-void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                                Register temp_reg) {
-  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
-  // load mh.type.form.vmslots
-  Register temp2_reg = vmslots_reg;
-  load_heap_oop(temp2_reg, Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
-  load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
-  movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
-}
-
-
-// registers on entry:
-//  - rcx: method handle
-//  - rdx: killable temp (interpreted only)
-//  - rax: killable temp (compiled only)
-void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
-  assert(mh_reg == rcx, "caller must put MH object in rcx");
-  assert_different_registers(mh_reg, temp_reg);
-
-  // pick out the interpreted side of the handler
-  // NOTE: vmentry is not an oop!
-  movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
-
-  // off we go...
-  jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
-
-  // for the various stubs which take control at this point,
-  // see MethodHandles::generate_method_handle_stub
-}
-
-
 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
                                          int extra_slot_offset) {
   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
@@ -8669,14 +9542,14 @@
     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
     jcc(Assembler::aboveEqual, next);
-    stop("assert(top >= start)");
+    STOP("assert(top >= start)");
     should_not_reach_here();
 
     bind(next);
     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
     jcc(Assembler::aboveEqual, ok);
-    stop("assert(top <= end)");
+    STOP("assert(top <= end)");
     should_not_reach_here();
 
     bind(ok);
@@ -9109,6 +9982,25 @@
     movptr(dst, src);
 }
 
+void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
+  assert_different_registers(src1, tmp);
+#ifdef _LP64
+  if (UseCompressedOops) {
+    bool did_push = false;
+    if (tmp == noreg) {
+      tmp = rax;
+      push(tmp);
+      did_push = true;
+      assert(!src2.uses(rsp), "can't push");
+    }
+    load_heap_oop(tmp, src2);
+    cmpptr(src1, tmp);
+    if (did_push)  pop(tmp);
+  } else
+#endif
+    cmpptr(src1, src2);
+}
+
 // Used for storing NULLs.
 void MacroAssembler::store_heap_oop_null(Address dst) {
 #ifdef _LP64
@@ -9139,7 +10031,7 @@
     push(rscratch1); // cmpptr trashes rscratch1
     cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
     jcc(Assembler::equal, ok);
-    stop(msg);
+    STOP(msg);
     bind(ok);
     pop(rscratch1);
   }
@@ -9172,7 +10064,7 @@
     Label ok;
     testq(r, r);
     jcc(Assembler::notEqual, ok);
-    stop("null oop passed to encode_heap_oop_not_null");
+    STOP("null oop passed to encode_heap_oop_not_null");
     bind(ok);
   }
 #endif
@@ -9193,7 +10085,7 @@
     Label ok;
     testq(src, src);
     jcc(Assembler::notEqual, ok);
-    stop("null oop passed to encode_heap_oop_not_null2");
+    STOP("null oop passed to encode_heap_oop_not_null2");
     bind(ok);
   }
 #endif
@@ -9384,7 +10276,7 @@
     cmpptr(rax, StackAlignmentInBytes-wordSize);
     pop(rax);
     jcc(Assembler::equal, L);
-    stop("Stack is not properly aligned!");
+    STOP("Stack is not properly aligned!");
     bind(L);
   }
 #endif
@@ -10058,13 +10950,6 @@
   bind(DONE);
 }
 
-#ifdef PRODUCT
-#define BLOCK_COMMENT(str) /* nothing */
-#else
-#define BLOCK_COMMENT(str) block_comment(str)
-#endif
-
-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 void MacroAssembler::generate_fill(BasicType t, bool aligned,
                                    Register to, Register value, Register count,
                                    Register rtmp, XMMRegister xtmp) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -591,8 +591,9 @@
 
   void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
                   VexSimdPrefix pre, bool vector256 = false) {
-     vex_prefix(src, nds->encoding(), dst->encoding(),
-                pre, VEX_OPCODE_0F, false, vector256);
+    int dst_enc = dst->encoding();
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
   }
 
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
@@ -600,9 +601,12 @@
                              bool vex_w, bool vector256);
 
   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, bool vector256 = false) {
-     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                  pre, VEX_OPCODE_0F, false, vector256);
+                             VexSimdPrefix pre, bool vector256 = false,
+                             VexOpcode opc = VEX_OPCODE_0F) {
+    int src_enc = src->encoding();
+    int dst_enc = dst->encoding();
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
   }
 
   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
@@ -613,6 +617,7 @@
                    VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
     simd_prefix(dst, xnoreg, src, pre, opc);
   }
+
   void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
     simd_prefix(src, dst, pre);
   }
@@ -622,16 +627,10 @@
     simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
   }
 
-
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
                              bool rex_w = false, bool vector256 = false);
 
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
-  }
-
   // Move/convert 32-bit integer value.
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
                              VexSimdPrefix pre) {
@@ -673,6 +672,15 @@
   void emit_arith(int op1, int op2, Register dst, jobject obj);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
+  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      Address src, VexSimdPrefix pre, bool vector256);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      XMMRegister src, VexSimdPrefix pre, bool vector256);
+
   void emit_operand(Register reg,
                     Register base, Register index, Address::ScaleFactor scale,
                     int disp,
@@ -887,12 +895,6 @@
   void andq(Register dst, Address src);
   void andq(Register dst, Register src);
 
-  // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
-  void andpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
-  void andps(XMMRegister dst, XMMRegister src);
-
   void bsfl(Register dst, Register src);
   void bsrl(Register dst, Register src);
 
@@ -1148,6 +1150,9 @@
   void fxsave(Address dst);
 
   void fyl2x();
+  void frndint();
+  void f2xm1();
+  void fldl2e();
 
   void hlt();
 
@@ -1258,6 +1263,7 @@
   void movdl(XMMRegister dst, Register src);
   void movdl(Register dst, XMMRegister src);
   void movdl(XMMRegister dst, Address src);
+  void movdl(Address dst, XMMRegister src);
 
   // Move Double Quadword
   void movdq(XMMRegister dst, Register src);
@@ -1271,6 +1277,14 @@
   void movdqu(XMMRegister dst, Address src);
   void movdqu(XMMRegister dst, XMMRegister src);
 
+  // Move Unaligned 256bit Vector
+  void vmovdqu(Address dst, XMMRegister src);
+  void vmovdqu(XMMRegister dst, Address src);
+  void vmovdqu(XMMRegister dst, XMMRegister src);
+
+  // Move lower 64bit to high 64bit in 128bit register
+  void movlhps(XMMRegister dst, XMMRegister src);
+
   void movl(Register dst, int32_t imm32);
   void movl(Address dst, int32_t imm32);
   void movl(Register dst, Register src);
@@ -1420,10 +1434,6 @@
   void prefetcht2(Address src);
   void prefetchw(Address src);
 
-  // POR - Bitwise logical OR
-  void por(XMMRegister dst, XMMRegister src);
-  void por(XMMRegister dst, Address src);
-
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
   void pshufd(XMMRegister dst, Address src,     int mode);
@@ -1432,9 +1442,6 @@
   void pshuflw(XMMRegister dst, XMMRegister src, int mode);
   void pshuflw(XMMRegister dst, Address src,     int mode);
 
-  // Shift Right by bits Logical Quadword Immediate
-  void psrlq(XMMRegister dst, int shift);
-
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
 
@@ -1450,16 +1457,15 @@
   void punpckldq(XMMRegister dst, XMMRegister src);
   void punpckldq(XMMRegister dst, Address src);
 
+  // Interleave Low Quadwords
+  void punpcklqdq(XMMRegister dst, XMMRegister src);
+
 #ifndef _LP64 // no 32bit push/pop on amd64
   void pushl(Address src);
 #endif
 
   void pushq(Address src);
 
-  // Xor Packed Byte Integer Values
-  void pxor(XMMRegister dst, Address src);
-  void pxor(XMMRegister dst, XMMRegister src);
-
   void rcll(Register dst, int imm8);
 
   void rclq(Register dst, int imm8);
@@ -1582,21 +1588,14 @@
   void xorq(Register dst, Address src);
   void xorq(Register dst, Register src);
 
-  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, XMMRegister src);
-
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
 
-  // AVX 3-operands instructions (encoded with VEX prefix)
+  // AVX 3-operands scalar instructions (encoded with VEX prefix)
+
   void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vaddss(XMMRegister dst, XMMRegister nds, Address src);
   void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vandps(XMMRegister dst, XMMRegister nds, Address src);
   void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vdivss(XMMRegister dst, XMMRegister nds, Address src);
@@ -1609,9 +1608,157 @@
   void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
-
+
+
+  //====================VECTOR ARITHMETIC=====================================
+
+  // Add Packed Floating-Point Values
+  void addpd(XMMRegister dst, XMMRegister src);
+  void addps(XMMRegister dst, XMMRegister src);
+  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Subtract Packed Floating-Point Values
+  void subpd(XMMRegister dst, XMMRegister src);
+  void subps(XMMRegister dst, XMMRegister src);
+  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply Packed Floating-Point Values
+  void mulpd(XMMRegister dst, XMMRegister src);
+  void mulps(XMMRegister dst, XMMRegister src);
+  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Divide Packed Floating-Point Values
+  void divpd(XMMRegister dst, XMMRegister src);
+  void divps(XMMRegister dst, XMMRegister src);
+  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical AND of Packed Floating-Point Values
+  void andpd(XMMRegister dst, XMMRegister src);
+  void andps(XMMRegister dst, XMMRegister src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical XOR of Packed Floating-Point Values
+  void xorpd(XMMRegister dst, XMMRegister src);
+  void xorps(XMMRegister dst, XMMRegister src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Add packed integers
+  void paddb(XMMRegister dst, XMMRegister src);
+  void paddw(XMMRegister dst, XMMRegister src);
+  void paddd(XMMRegister dst, XMMRegister src);
+  void paddq(XMMRegister dst, XMMRegister src);
+  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Sub packed integers
+  void psubb(XMMRegister dst, XMMRegister src);
+  void psubw(XMMRegister dst, XMMRegister src);
+  void psubd(XMMRegister dst, XMMRegister src);
+  void psubq(XMMRegister dst, XMMRegister src);
+  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply packed integers (only shorts and ints)
+  void pmullw(XMMRegister dst, XMMRegister src);
+  void pmulld(XMMRegister dst, XMMRegister src);
+  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Shift left packed integers
+  void psllw(XMMRegister dst, int shift);
+  void pslld(XMMRegister dst, int shift);
+  void psllq(XMMRegister dst, int shift);
+  void psllw(XMMRegister dst, XMMRegister shift);
+  void pslld(XMMRegister dst, XMMRegister shift);
+  void psllq(XMMRegister dst, XMMRegister shift);
+  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Logical shift right packed integers
+  void psrlw(XMMRegister dst, int shift);
+  void psrld(XMMRegister dst, int shift);
+  void psrlq(XMMRegister dst, int shift);
+  void psrlw(XMMRegister dst, XMMRegister shift);
+  void psrld(XMMRegister dst, XMMRegister shift);
+  void psrlq(XMMRegister dst, XMMRegister shift);
+  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
+  void psraw(XMMRegister dst, int shift);
+  void psrad(XMMRegister dst, int shift);
+  void psraw(XMMRegister dst, XMMRegister shift);
+  void psrad(XMMRegister dst, XMMRegister shift);
+  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // And packed integers
+  void pand(XMMRegister dst, XMMRegister src);
+  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Or packed integers
+  void por(XMMRegister dst, XMMRegister src);
+  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Xor packed integers
+  void pxor(XMMRegister dst, XMMRegister src);
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Copy low 128bit into high 128bit of YMM registers.
+  void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+
+  // AVX instruction which is used to clear upper 128 bits of YMM registers and
+  // to avoid transaction penalty between AVX and SSE states. There is no
+  // penalty if legacy SSE instructions are encoded using VEX prefix because
+  // they always clear upper 128 bits. It should be used before calling
+  // runtime code and native libraries.
+  void vzeroupper();
 
  protected:
   // Next instructions require address alignment 16 bytes SSE mode.
@@ -1908,6 +2055,7 @@
   void load_heap_oop(Register dst, Address src);
   void load_heap_oop_not_null(Register dst, Address src);
   void store_heap_oop(Address dst, Register src);
+  void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
 
   // Used for storing NULL. All other oop constants should be
   // stored using routines that take a jobject.
@@ -2085,6 +2233,11 @@
                                Register scan_temp,
                                Label& no_such_interface);
 
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
   // Test sub_klass against super_klass, with fast and slow paths.
 
   // The fast path produces a tri-state answer: yes / no / maybe-slow.
@@ -2120,15 +2273,8 @@
                            Label& L_success);
 
   // method handles (JSR 292)
-  void check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                Register temp_reg,
-                                Label& wrong_method_type);
-  void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                  Register temp_reg);
-  void jump_to_method_handle_entry(Register mh_reg, Register temp_reg);
   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 
-
   //----
   void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
 
@@ -2147,8 +2293,13 @@
   // prints msg and continues
   void warn(const char* msg);
 
+  // dumps registers and other state
+  void print_state();
+
   static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
   static void debug64(char* msg, int64_t pc, int64_t regs[]);
+  static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
+  static void print_state64(int64_t pc, int64_t regs[]);
 
   void os_breakpoint();
 
@@ -2387,7 +2538,30 @@
   void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
   void ldmxcsr(AddressLiteral src);
 
+  // compute pow(x,y) and exp(x) with x86 instructions. Don't cover
+  // all corner cases and may result in NaN and require fallback to a
+  // runtime call.
+  void fast_pow();
+  void fast_exp();
+  void increase_precision();
+  void restore_precision();
+
+  // computes exp(x). Fallback to runtime call included.
+  void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
+  // computes pow(x,y). Fallback to runtime call included.
+  void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
+
 private:
+
+  // call runtime as a fallback for trig functions and pow/exp.
+  void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
+
+  // computes 2^(Ylog2X); Ylog2X in ST(0)
+  void pow_exp_core_encoding();
+
+  // computes pow(x,y) or exp(x). Fallback to runtime call included.
+  void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
+
   // these are private because users should be doing movflt/movdbl
 
   void movss(Address dst, XMMRegister src)     { Assembler::movss(dst, src); }
@@ -2473,11 +2647,13 @@
   void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
   void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
-  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
-  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
   void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
@@ -2503,12 +2679,36 @@
   void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
   void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
-  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
+  // AVX Vector instructions
+
+  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
+
+  // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
+  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+    if (UseAVX > 1) // vinserti128h is available only in AVX2
+      Assembler::vinserti128h(dst, nds, src);
+    else
+      Assembler::vinsertf128h(dst, nds, src);
+  }
 
   // Data
 
@@ -2561,6 +2761,13 @@
   // to avoid hiding movb
   void movbyte(ArrayAddress dst, int src);
 
+  // Import other mov() methods from the parent class or else
+  // they will be hidden by the following overriding declaration.
+  using Assembler::movdl;
+  using Assembler::movq;
+  void movdl(XMMRegister dst, AddressLiteral src);
+  void movq(XMMRegister dst, AddressLiteral src);
+
   // Can push value or effective address
   void pushptr(AddressLiteral src);
 
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -488,68 +488,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    __ cmpptr(src_reg, (int32_t) NULL_WORD);
-    __ jcc(Assembler::equal, _continuation);
-  }
-
-  // Generate src->_klass->_reference_type == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(tmp_reg, src_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ cmpb(ref_type_adr, REF_NONE);
-  __ jcc(Assembler::equal, _continuation);
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ cmpl(in_progress, 0);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ cmpb(in_progress, 0);
-  }
-  __ jcc(Assembler::equal, _continuation);
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  __ cmpptr(val_reg, (int32_t) NULL_WORD);
-  __ jcc(Assembler::equal, _continuation);
-
-  ce->store_parameter(val()->as_register(), 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
-  __ jmp(_continuation);
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -2446,6 +2446,12 @@
         // Should consider not saving rbx, if not necessary
         __ trigfunc('t', op->as_Op2()->fpu_stack_size());
         break;
+      case lir_exp :
+        __ exp_with_fallback(op->as_Op2()->fpu_stack_size());
+        break;
+      case lir_pow :
+        __ pow_with_fallback(op->as_Op2()->fpu_stack_size());
+        break;
       default      : ShouldNotReachHere();
     }
   } else {
@@ -3502,6 +3508,7 @@
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
   int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
 
   // Update counter for all call types
   ciMethodData* md = method->method_data_or_null();
@@ -3513,9 +3520,11 @@
   __ movoop(mdo, md->constant_encoding());
   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
   Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
   // Perform additional virtual call profiling for invokevirtual and
   // invokeinterface bytecodes
   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // required for optimized MH invokes
       C1ProfileVirtualCalls) {
     assert(op->recv()->is_single_cpu(), "recv must be allocated");
     Register recv = op->recv()->as_register();
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -718,35 +718,6 @@
 }
 
 
-void LIRGenerator::do_AttemptUpdate(Intrinsic* x) {
-  assert(x->number_of_arguments() == 3, "wrong type");
-  LIRItem obj       (x->argument_at(0), this);  // AtomicLong object
-  LIRItem cmp_value (x->argument_at(1), this);  // value to compare with field
-  LIRItem new_value (x->argument_at(2), this);  // replace field with new_value if it matches cmp_value
-
-  // compare value must be in rdx,eax (hi,lo); may be destroyed by cmpxchg8 instruction
-  cmp_value.load_item_force(FrameMap::long0_opr);
-
-  // new value must be in rcx,ebx (hi,lo)
-  new_value.load_item_force(FrameMap::long1_opr);
-
-  // object pointer register is overwritten with field address
-  obj.load_item();
-
-  // generate compare-and-swap; produces zero condition if swap occurs
-  int value_offset = sun_misc_AtomicLongCSImpl::value_offset();
-  LIR_Opr addr = new_pointer_register();
-  __ leal(LIR_OprFact::address(new LIR_Address(obj.result(), value_offset, T_LONG)), addr);
-  LIR_Opr t1 = LIR_OprFact::illegalOpr;  // no temp needed
-  LIR_Opr t2 = LIR_OprFact::illegalOpr;  // no temp needed
-  __ cas_long(addr, cmp_value.result(), new_value.result(), t1, t2);
-
-  // generate conditional move of boolean result
-  LIR_Opr result = rlock_result(x);
-  __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result, T_LONG);
-}
-
-
 void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
   assert(x->number_of_arguments() == 4, "wrong type");
   LIRItem obj   (x->argument_at(0), this);  // object
@@ -823,7 +794,7 @@
 
 
 void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
-  assert(x->number_of_arguments() == 1, "wrong type");
+  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
   LIRItem value(x->argument_at(0), this);
 
   bool use_fpu = false;
@@ -834,6 +805,8 @@
       case vmIntrinsics::_dtan:
       case vmIntrinsics::_dlog:
       case vmIntrinsics::_dlog10:
+      case vmIntrinsics::_dexp:
+      case vmIntrinsics::_dpow:
         use_fpu = true;
     }
   } else {
@@ -843,20 +816,37 @@
   value.load_item();
 
   LIR_Opr calc_input = value.result();
+  LIR_Opr calc_input2 = NULL;
+  if (x->id() == vmIntrinsics::_dpow) {
+    LIRItem extra_arg(x->argument_at(1), this);
+    if (UseSSE < 2) {
+      extra_arg.set_destroys_register();
+    }
+    extra_arg.load_item();
+    calc_input2 = extra_arg.result();
+  }
   LIR_Opr calc_result = rlock_result(x);
 
-  // sin and cos need two free fpu stack slots, so register two temporary operands
+  // sin, cos, pow and exp need two free fpu stack slots, so register
+  // two temporary operands
   LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
   LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
 
   if (use_fpu) {
     LIR_Opr tmp = FrameMap::fpu0_double_opr;
+    int tmp_start = 1;
+    if (calc_input2 != NULL) {
+      __ move(calc_input2, tmp);
+      tmp_start = 2;
+      calc_input2 = tmp;
+    }
     __ move(calc_input, tmp);
 
     calc_input = tmp;
     calc_result = tmp;
-    tmp1 = FrameMap::caller_save_fpu_reg_at(1);
-    tmp2 = FrameMap::caller_save_fpu_reg_at(2);
+
+    tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
+    tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
   }
 
   switch(x->id()) {
@@ -867,6 +857,8 @@
     case vmIntrinsics::_dtan:   __ tan  (calc_input, calc_result, tmp1, tmp2);              break;
     case vmIntrinsics::_dlog:   __ log  (calc_input, calc_result, tmp1);                    break;
     case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1);                    break;
+    case vmIntrinsics::_dexp:   __ exp  (calc_input, calc_result,              tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
+    case vmIntrinsics::_dpow:   __ pow  (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
     default:                    ShouldNotReachHere();
   }
 
--- a/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -690,8 +690,8 @@
 
     case lir_mul_strictfp:
     case lir_div_strictfp: {
-      assert(op2->tmp_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot");
-      insert_free_if_dead(op2->tmp_opr());
+      assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot");
+      insert_free_if_dead(op2->tmp1_opr());
       assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
       // fall-through: continue with the normal handling of lir_mul and lir_div
     }
@@ -787,16 +787,17 @@
 
     case lir_log:
     case lir_log10: {
-      // log and log10 needs one temporary fpu stack slot, so there is ontemporary
-      // registers stored in temp of the operation.
-      // the stack allocator must guarantee that the stack slots are really free,
-      // otherwise there might be a stack overflow.
+      // log and log10 need one temporary fpu stack slot, so
+      // there is one temporary registers stored in temp of the
+      // operation. the stack allocator must guarantee that the stack
+      // slots are really free, otherwise there might be a stack
+      // overflow.
       assert(right->is_illegal(), "must be");
       assert(left->is_fpu_register(), "must be");
       assert(res->is_fpu_register(), "must be");
-      assert(op2->tmp_opr()->is_fpu_register(), "must be");
+      assert(op2->tmp1_opr()->is_fpu_register(), "must be");
 
-      insert_free_if_dead(op2->tmp_opr());
+      insert_free_if_dead(op2->tmp1_opr());
       insert_free_if_dead(res, left);
       insert_exchange(left);
       do_rename(left, res);
@@ -812,8 +813,9 @@
 
     case lir_tan:
     case lir_sin:
-    case lir_cos: {
-      // sin and cos need two temporary fpu stack slots, so there are two temporary
+    case lir_cos:
+    case lir_exp: {
+      // sin, cos and exp need two temporary fpu stack slots, so there are two temporary
       // registers (stored in right and temp of the operation).
       // the stack allocator must guarantee that the stack slots are really free,
       // otherwise there might be a stack overflow.
@@ -821,11 +823,11 @@
       assert(res->is_fpu_register(), "must be");
       // assert(left->is_last_use(), "old value gets destroyed");
       assert(right->is_fpu_register(), "right is used as the first temporary register");
-      assert(op2->tmp_opr()->is_fpu_register(), "temp is used as the second temporary register");
-      assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp_opr()) && fpu_num(op2->tmp_opr()) != fpu_num(res), "need distinct temp registers");
+      assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
+      assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
 
       insert_free_if_dead(right);
-      insert_free_if_dead(op2->tmp_opr());
+      insert_free_if_dead(op2->tmp1_opr());
 
       insert_free_if_dead(res, left);
       insert_exchange(left);
@@ -839,6 +841,53 @@
       break;
     }
 
+    case lir_pow: {
+      // pow needs two temporary fpu stack slots, so there are two temporary
+      // registers (stored in tmp1 and tmp2 of the operation).
+      // the stack allocator must guarantee that the stack slots are really free,
+      // otherwise there might be a stack overflow.
+      assert(left->is_fpu_register(), "must be");
+      assert(right->is_fpu_register(), "must be");
+      assert(res->is_fpu_register(), "must be");
+
+      assert(op2->tmp1_opr()->is_fpu_register(), "tmp1 is the first temporary register");
+      assert(op2->tmp2_opr()->is_fpu_register(), "tmp2 is the second temporary register");
+      assert(fpu_num(left) != fpu_num(right) && fpu_num(left) != fpu_num(op2->tmp1_opr()) && fpu_num(left) != fpu_num(op2->tmp2_opr()) && fpu_num(left) != fpu_num(res), "need distinct temp registers");
+      assert(fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(right) != fpu_num(op2->tmp2_opr()) && fpu_num(right) != fpu_num(res), "need distinct temp registers");
+      assert(fpu_num(op2->tmp1_opr()) != fpu_num(op2->tmp2_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
+      assert(fpu_num(op2->tmp2_opr()) != fpu_num(res), "need distinct temp registers");
+
+      insert_free_if_dead(op2->tmp1_opr());
+      insert_free_if_dead(op2->tmp2_opr());
+
+      // Must bring both operands to top of stack with following operand ordering:
+      // * fpu stack before pow: ... right left
+      // * fpu stack after pow:  ... left
+
+      insert_free_if_dead(res, right);
+
+      if (tos_offset(right) != 1) {
+        insert_exchange(right);
+        insert_exchange(1);
+      }
+      insert_exchange(left);
+      assert(tos_offset(right) == 1, "check");
+      assert(tos_offset(left) == 0, "check");
+
+      new_left = to_fpu_stack_top(left);
+      new_right = to_fpu_stack(right);
+
+      op2->set_fpu_stack_size(sim()->stack_size());
+      assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
+
+      sim()->pop();
+
+      do_rename(right, res);
+
+      new_res = to_fpu_stack_top(res);
+      break;
+    }
+
     default: {
       assert(false, "missed a fpu-operation");
     }
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -47,6 +47,12 @@
   assert(!(oop_result1->is_valid() || oop_result2->is_valid()) || oop_result1 != oop_result2, "registers must be different");
   assert(oop_result1 != thread && oop_result2 != thread, "registers must be different");
   assert(args_size >= 0, "illegal args_size");
+  bool align_stack = false;
+#ifdef _LP64
+  // At a method handle call, the stack may not be properly aligned
+  // when returning with an exception.
+  align_stack = (stub_id() == Runtime1::handle_exception_from_callee_id);
+#endif
 
 #ifdef _LP64
   mov(c_rarg0, thread);
@@ -59,11 +65,21 @@
   push(thread);
 #endif // _LP64
 
-  set_last_Java_frame(thread, noreg, rbp, NULL);
+  int call_offset;
+  if (!align_stack) {
+    set_last_Java_frame(thread, noreg, rbp, NULL);
+  } else {
+    address the_pc = pc();
+    call_offset = offset();
+    set_last_Java_frame(thread, noreg, rbp, the_pc);
+    andptr(rsp, -(StackAlignmentInBytes));    // Align stack
+  }
 
   // do the call
   call(RuntimeAddress(entry));
-  int call_offset = offset();
+  if (!align_stack) {
+    call_offset = offset();
+  }
   // verify callee-saved register
 #ifdef ASSERT
   guarantee(thread != rax, "change this code");
@@ -78,7 +94,7 @@
   }
   pop(rax);
 #endif
-  reset_last_Java_frame(thread, true, false);
+  reset_last_Java_frame(thread, true, align_stack);
 
   // discard thread and arguments
   NOT_LP64(addptr(rsp, num_rt_args()*BytesPerWord));
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -481,7 +481,8 @@
   __ xorptr(rdx, rdx);
   __ movptr(STATE(_oop_temp), rdx);                     // state->_oop_temp = NULL (only really needed for native)
   __ movptr(STATE(_mdx), rdx);                          // state->_mdx = NULL
-  __ movptr(rdx, Address(rbx, methodOopDesc::constants_offset()));
+  __ movptr(rdx, Address(rbx, methodOopDesc::const_offset()));
+  __ movptr(rdx, Address(rdx, constMethodOopDesc::constants_offset()));
   __ movptr(rdx, Address(rdx, constantPoolOopDesc::cache_offset_in_bytes()));
   __ movptr(STATE(_constants), rdx);                    // state->_constants = constants()
 
@@ -516,7 +517,8 @@
     __ testl(rax, JVM_ACC_STATIC);
     __ movptr(rax, Address(locals, 0));                   // get receiver (assume this is frequent case)
     __ jcc(Assembler::zero, done);
-    __ movptr(rax, Address(rbx, methodOopDesc::constants_offset()));
+    __ movptr(rax, Address(rbx, methodOopDesc::const_offset()));
+    __ movptr(rax, Address(rax, constMethodOopDesc::constants_offset()));
     __ movptr(rax, Address(rax, constantPoolOopDesc::pool_holder_offset_in_bytes()));
     __ movptr(rax, Address(rax, mirror_offset));
     __ bind(done);
@@ -769,7 +771,8 @@
     __ testl(rax, JVM_ACC_STATIC);
     __ movptr(rax, Address(rdi, 0));                                    // get receiver (assume this is frequent case)
     __ jcc(Assembler::zero, done);
-    __ movptr(rax, Address(rbx, methodOopDesc::constants_offset()));
+    __ movptr(rax, Address(rbx, methodOopDesc::const_offset()));
+    __ movptr(rax, Address(rax, constMethodOopDesc::constants_offset()));
     __ movptr(rax, Address(rax, constantPoolOopDesc::pool_holder_offset_in_bytes()));
     __ movptr(rax, Address(rax, mirror_offset));
     __ bind(done);
@@ -821,9 +824,9 @@
     __ testptr(rax, rax);
     __ jcc(Assembler::zero, slow_path);
 
-    __ movptr(rdi, Address(rbx, methodOopDesc::constants_offset()));
     // read first instruction word and extract bytecode @ 1 and index @ 2
     __ movptr(rdx, Address(rbx, methodOopDesc::const_offset()));
+    __ movptr(rdi, Address(rdx, constMethodOopDesc::constants_offset()));
     __ movl(rdx, Address(rdx, constMethodOopDesc::codes_offset()));
     // Shift codes right to get the index on the right.
     // The bytecode fetched looks like <index><0xb4><0x2a>
@@ -868,9 +871,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Use the type from the constant pool cache
-    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
-    // Make sure we don't need to mask rdx for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ shrl(rdx, ConstantPoolCacheEntry::tos_state_shift);
+    // Make sure we don't need to mask rdx after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
 #ifdef _LP64
     Label notObj;
     __ cmpl(rdx, atos);
@@ -1185,7 +1188,8 @@
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
     // get mirror
-    __ movptr(t, Address(method, methodOopDesc:: constants_offset()));
+    __ movptr(t, Address(method, methodOopDesc:: const_offset()));
+    __ movptr(t, Address(t, constMethodOopDesc::constants_offset()));
     __ movptr(t, Address(t, constantPoolOopDesc::pool_holder_offset_in_bytes()));
     __ movptr(t, Address(t, mirror_offset));
     // copy mirror into activation object
--- a/src/cpu/x86/vm/frame_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/frame_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -439,7 +439,6 @@
 // frame::sender_for_compiled_frame
 frame frame::sender_for_compiled_frame(RegisterMap* map) const {
   assert(map != NULL, "map must be set");
-  assert(!is_ricochet_frame(), "caller must handle this");
 
   // frame owned by optimizing compiler
   assert(_cb->frame_size() >= 0, "must have non-zero frame size");
@@ -483,7 +482,6 @@
   if (is_entry_frame())       return sender_for_entry_frame(map);
   if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
   assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
-  if (is_ricochet_frame())    return sender_for_ricochet_frame(map);
 
   if (_cb != NULL) {
     return sender_for_compiled_frame(map);
@@ -658,9 +656,7 @@
   values.describe(frame_no, fp() + frame::name##_offset, #name)
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
-  if (is_ricochet_frame()) {
-    MethodHandles::RicochetFrame::describe(this, values, frame_no);
-  } else if (is_interpreted_frame()) {
+  if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_method);
@@ -682,12 +678,7 @@
   if (_cb != NULL) {
     // use the frame size if valid
     int size = _cb->frame_size();
-    if ((size > 0) &&
-        (! is_ricochet_frame())) {
-      // Work-around: ricochet explicitly excluded because frame size is not
-      // constant for the ricochet blob but its frame_size could not, for
-      // some reasons, be declared as <= 0. This potentially confusing
-      // size declaration should be fixed as another CR.
+    if (size > 0) {
       return unextended_sp() + size;
     }
   }
--- a/src/cpu/x86/vm/globals_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -78,4 +78,53 @@
 
 // GC Ergo Flags
 define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                            \
+  develop(bool, IEEEPrecision, true,                                        \
+          "Enables IEEE precision (for INTEL only)")                        \
+                                                                            \
+  product(intx, FenceInstruction, 0,                                        \
+          "(Unsafe,Unstable) Experimental")                                 \
+                                                                            \
+  product(intx,  ReadPrefetchInstr, 0,                                      \
+          "Prefetch instruction to prefetch ahead")                         \
+                                                                            \
+  product(bool, UseStoreImmI16, true,                                       \
+          "Use store immediate 16-bits value instruction on x86")           \
+                                                                            \
+  product(intx, UseAVX, 99,                                                 \
+          "Highest supported AVX instructions set on x86/x64")              \
+                                                                            \
+  diagnostic(bool, UseIncDec, true,                                         \
+          "Use INC, DEC instructions on x86")                               \
+                                                                            \
+  product(bool, UseNewLongLShift, false,                                    \
+          "Use optimized bitwise shift left")                               \
+                                                                            \
+  product(bool, UseAddressNop, false,                                       \
+          "Use '0F 1F [addr]' NOP instructions on x86 cpus")                \
+                                                                            \
+  product(bool, UseXmmLoadAndClearUpper, true,                              \
+          "Load low part of XMM register and clear upper part")             \
+                                                                            \
+  product(bool, UseXmmRegToRegMoveAll, false,                               \
+          "Copy all XMM register bits when moving value between registers") \
+                                                                            \
+  product(bool, UseXmmI2D, false,                                           \
+          "Use SSE2 CVTDQ2PD instruction to convert Integer to Double")     \
+                                                                            \
+  product(bool, UseXmmI2F, false,                                           \
+          "Use SSE2 CVTDQ2PS instruction to convert Integer to Float")      \
+                                                                            \
+  product(bool, UseUnalignedLoadStores, false,                              \
+          "Use SSE2 MOVDQU instruction for Arraycopy")                      \
+                                                                            \
+  /* assembler */                                                           \
+  product(bool, Use486InstrsOnly, false,                                    \
+          "Use 80486 Compliant instruction subset")                         \
+                                                                            \
+  product(bool, UseCountLeadingZerosInstruction, false,                     \
+          "Use count leading zeros instruction")                            \
+
 #endif // CPU_X86_VM_GLOBALS_X86_HPP
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -253,8 +253,12 @@
   get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
   movptr(bytecode, Address(cache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
   const int shift_count = (1 + byte_no) * BitsPerByte;
+  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
+         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
+         "correct shift count");
   shrptr(bytecode, shift_count);
-  andptr(bytecode, 0xFF);
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+  andptr(bytecode, ConstantPoolCacheEntry::bytecode_1_mask);
 }
 
 
--- a/src/cpu/x86/vm/interp_masm_x86_32.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_32.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,7 +77,8 @@
 
   // Helpers for runtime call arguments/results
   void get_method(Register reg)                            { movptr(reg, Address(rbp, frame::interpreter_frame_method_offset * wordSize)); }
-  void get_constant_pool(Register reg)                     { get_method(reg); movptr(reg, Address(reg, methodOopDesc::constants_offset())); }
+  void get_const(Register reg)                             { get_method(reg); movptr(reg, Address(reg, methodOopDesc::const_offset())); }
+  void get_constant_pool(Register reg)                     { get_const(reg); movptr(reg, Address(reg, constMethodOopDesc::constants_offset())); }
   void get_constant_pool_cache(Register reg)               { get_constant_pool(reg); movptr(reg, Address(reg, constantPoolOopDesc::cache_offset_in_bytes())); }
   void get_cpool_and_tags(Register cpool, Register tags)   { get_constant_pool(cpool); movptr(tags, Address(cpool, constantPoolOopDesc::tags_offset_in_bytes()));
   }
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -256,8 +256,12 @@
   // little-endian machines allow us that.
   movl(bytecode, Address(cache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
   const int shift_count = (1 + byte_no) * BitsPerByte;
+  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
+         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
+         "correct shift count");
   shrl(bytecode, shift_count);
-  andl(bytecode, 0xFF);
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+  andl(bytecode, ConstantPoolCacheEntry::bytecode_1_mask);
 }
 
 
--- a/src/cpu/x86/vm/interp_masm_x86_64.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,9 +84,14 @@
     movptr(reg, Address(rbp, frame::interpreter_frame_method_offset * wordSize));
   }
 
-  void get_constant_pool(Register reg) {
+  void get_const(Register reg) {
     get_method(reg);
-    movptr(reg, Address(reg, methodOopDesc::constants_offset()));
+    movptr(reg, Address(reg, methodOopDesc::const_offset()));
+  }
+
+  void get_constant_pool(Register reg) {
+    get_const(reg);
+    movptr(reg, Address(reg, constMethodOopDesc::constants_offset()));
   }
 
   void get_constant_pool_cache(Register reg) {
--- a/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,6 @@
   address generate_normal_entry(bool synchronized);
   address generate_native_entry(bool synchronized);
   address generate_abstract_entry(void);
-  address generate_method_handle_entry(void);
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
--- a/src/cpu/x86/vm/interpreter_x86_32.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/interpreter_x86_32.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -181,6 +181,19 @@
         __ push_fTOS();
         __ pop_fTOS();
         break;
+    case Interpreter::java_lang_math_pow:
+      __ fld_d(Address(rsp, 3*wordSize)); // second argument
+      __ pow_with_fallback(0);
+      // Store to stack to convert 80bit precision back to 64bits
+      __ push_fTOS();
+      __ pop_fTOS();
+      break;
+    case Interpreter::java_lang_math_exp:
+      __ exp_with_fallback(0);
+      // Store to stack to convert 80bit precision back to 64bits
+      __ push_fTOS();
+      __ pop_fTOS();
+      break;
     default                              :
         ShouldNotReachHere();
   }
@@ -230,18 +243,6 @@
 }
 
 
-// Method handle invoker
-// Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
-address InterpreterGenerator::generate_method_handle_entry(void) {
-  if (!EnableInvokeDynamic) {
-    return generate_abstract_entry();
-  }
-
-  address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm);
-
-  return entry_point;
-}
-
 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
 
   // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
--- a/src/cpu/x86/vm/interpreter_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/interpreter_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -271,6 +271,14 @@
       case Interpreter::java_lang_math_log10:
           __ flog10();
           break;
+      case Interpreter::java_lang_math_pow:
+          __ fld_d(Address(rsp, 3*wordSize)); // second argument (one
+                                              // empty stack slot)
+          __ pow_with_fallback(0);
+          break;
+      case Interpreter::java_lang_math_exp:
+          __ exp_with_fallback(0);
+           break;
       default                              :
           ShouldNotReachHere();
     }
@@ -317,19 +325,6 @@
 }
 
 
-// Method handle invoker
-// Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
-address InterpreterGenerator::generate_method_handle_entry(void) {
-  if (!EnableInvokeDynamic) {
-    return generate_abstract_entry();
-  }
-
-  address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm);
-
-  return entry_point;
-}
-
-
 // Empty method, generate a very fast return.
 
 address InterpreterGenerator::generate_empty_entry(void) {
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,8 +32,10 @@
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
 #else
 #define BLOCK_COMMENT(str) __ block_comment(str)
+#define STOP(error) block_comment(error); __ stop(error)
 #endif
 
 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
@@ -43,483 +45,24 @@
   return RegisterOrConstant(value);
 }
 
-address MethodHandleEntry::start_compiled_entry(MacroAssembler* _masm,
-                                                address interpreted_entry) {
-  // Just before the actual machine code entry point, allocate space
-  // for a MethodHandleEntry::Data record, so that we can manage everything
-  // from one base pointer.
-  __ align(wordSize);
-  address target = __ pc() + sizeof(Data);
-  while (__ pc() < target) {
-    __ nop();
-    __ align(wordSize);
-  }
-
-  MethodHandleEntry* me = (MethodHandleEntry*) __ pc();
-  me->set_end_address(__ pc());         // set a temporary end_address
-  me->set_from_interpreted_entry(interpreted_entry);
-  me->set_type_checking_entry(NULL);
-
-  return (address) me;
-}
-
-MethodHandleEntry* MethodHandleEntry::finish_compiled_entry(MacroAssembler* _masm,
-                                                address start_addr) {
-  MethodHandleEntry* me = (MethodHandleEntry*) start_addr;
-  assert(me->end_address() == start_addr, "valid ME");
-
-  // Fill in the real end_address:
-  __ align(wordSize);
-  me->set_end_address(__ pc());
-
-  return me;
-}
-
-// stack walking support
-
-frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
-  RicochetFrame* f = RicochetFrame::from_frame(fr);
-  if (map->update_map())
-    frame::update_map_with_saved_link(map, &f->_sender_link);
-  return frame(f->extended_sender_sp(), f->exact_sender_sp(), f->sender_link(), f->sender_pc());
-}
-
-void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map) {
-  RicochetFrame* f = RicochetFrame::from_frame(fr);
-
-  // pick up the argument type descriptor:
-  Thread* thread = Thread::current();
-  Handle cookie(thread, f->compute_saved_args_layout(true, true));
-
-  // process fixed part
-  blk->do_oop((oop*)f->saved_target_addr());
-  blk->do_oop((oop*)f->saved_args_layout_addr());
-
-  // process variable arguments:
-  if (cookie.is_null())  return;  // no arguments to describe
-
-  // the cookie is actually the invokeExact method for my target
-  // his argument signature is what I'm interested in
-  assert(cookie->is_method(), "");
-  methodHandle invoker(thread, methodOop(cookie()));
-  assert(invoker->name() == vmSymbols::invokeExact_name(), "must be this kind of method");
-  assert(!invoker->is_static(), "must have MH argument");
-  int slot_count = invoker->size_of_parameters();
-  assert(slot_count >= 1, "must include 'this'");
-  intptr_t* base = f->saved_args_base();
-  intptr_t* retval = NULL;
-  if (f->has_return_value_slot())
-    retval = f->return_value_slot_addr();
-  int slot_num = slot_count;
-  intptr_t* loc = &base[slot_num -= 1];
-  //blk->do_oop((oop*) loc);   // original target, which is irrelevant
-  int arg_num = 0;
-  for (SignatureStream ss(invoker->signature()); !ss.is_done(); ss.next()) {
-    if (ss.at_return_type())  continue;
-    BasicType ptype = ss.type();
-    if (ptype == T_ARRAY)  ptype = T_OBJECT; // fold all refs to T_OBJECT
-    assert(ptype >= T_BOOLEAN && ptype <= T_OBJECT, "not array or void");
-    loc = &base[slot_num -= type2size[ptype]];
-    bool is_oop = (ptype == T_OBJECT && loc != retval);
-    if (is_oop)  blk->do_oop((oop*)loc);
-    arg_num += 1;
-  }
-  assert(slot_num == 0, "must have processed all the arguments");
-}
-
-oop MethodHandles::RicochetFrame::compute_saved_args_layout(bool read_cache, bool write_cache) {
-  oop cookie = NULL;
-  if (read_cache) {
-    cookie = saved_args_layout();
-    if (cookie != NULL)  return cookie;
-  }
-  oop target = saved_target();
-  oop mtype  = java_lang_invoke_MethodHandle::type(target);
-  oop mtform = java_lang_invoke_MethodType::form(mtype);
-  cookie = java_lang_invoke_MethodTypeForm::vmlayout(mtform);
-  if (write_cache)  {
-    (*saved_args_layout_addr()) = cookie;
-  }
-  return cookie;
-}
-
-void MethodHandles::RicochetFrame::generate_ricochet_blob(MacroAssembler* _masm,
-                                                          // output params:
-                                                          int* bounce_offset,
-                                                          int* exception_offset,
-                                                          int* frame_size_in_words) {
-  (*frame_size_in_words) = RicochetFrame::frame_size_in_bytes() / wordSize;
-
-  address start = __ pc();
-
-#ifdef ASSERT
-  __ hlt(); __ hlt(); __ hlt();
-  // here's a hint of something special:
-  __ push(MAGIC_NUMBER_1);
-  __ push(MAGIC_NUMBER_2);
-#endif //ASSERT
-  __ hlt();  // not reached
-
-  // A return PC has just been popped from the stack.
-  // Return values are in registers.
-  // The ebp points into the RicochetFrame, which contains
-  // a cleanup continuation we must return to.
-
-  (*bounce_offset) = __ pc() - start;
-  BLOCK_COMMENT("ricochet_blob.bounce");
-
-  if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-  trace_method_handle(_masm, "return/ricochet_blob.bounce");
-
-  __ jmp(frame_address(continuation_offset_in_bytes()));
-  __ hlt();
-  DEBUG_ONLY(__ push(MAGIC_NUMBER_2));
-
-  (*exception_offset) = __ pc() - start;
-  BLOCK_COMMENT("ricochet_blob.exception");
-
-  // compare this to Interpreter::rethrow_exception_entry, which is parallel code
-  // for example, see TemplateInterpreterGenerator::generate_throw_exception
-  // Live registers in:
-  //   rax: exception
-  //   rdx: return address/pc that threw exception (ignored, always equal to bounce addr)
-  __ verify_oop(rax);
-
-  // no need to empty_FPU_stack or reinit_heapbase, since caller frame will do the same if needed
-
-  // Take down the frame.
-
-  // Cf. InterpreterMacroAssembler::remove_activation.
-  leave_ricochet_frame(_masm, /*rcx_recv=*/ noreg,
-                       saved_last_sp_register(),
-                       /*sender_pc_reg=*/ rdx);
-
-  // In between activations - previous activation type unknown yet
-  // compute continuation point - the continuation point expects the
-  // following registers set up:
-  //
-  // rax: exception
-  // rdx: return address/pc that threw exception
-  // rsp: expression stack of caller
-  // rbp: ebp of caller
-  __ push(rax);                                  // save exception
-  __ push(rdx);                                  // save return address
-  Register thread_reg = LP64_ONLY(r15_thread) NOT_LP64(rdi);
-  NOT_LP64(__ get_thread(thread_reg));
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address,
-                                   SharedRuntime::exception_handler_for_return_address),
-                  thread_reg, rdx);
-  __ mov(rbx, rax);                              // save exception handler
-  __ pop(rdx);                                   // restore return address
-  __ pop(rax);                                   // restore exception
-  __ jmp(rbx);                                   // jump to exception
-                                                 // handler of caller
-}
-
-void MethodHandles::RicochetFrame::enter_ricochet_frame(MacroAssembler* _masm,
-                                                        Register rcx_recv,
-                                                        Register rax_argv,
-                                                        address return_handler,
-                                                        Register rbx_temp) {
-  const Register saved_last_sp = saved_last_sp_register();
-  Address rcx_mh_vmtarget(    rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() );
-  Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
-
-  // Push the RicochetFrame a word at a time.
-  // This creates something similar to an interpreter frame.
-  // Cf. TemplateInterpreterGenerator::generate_fixed_frame.
-  BLOCK_COMMENT("push RicochetFrame {");
-  DEBUG_ONLY(int rfo = (int) sizeof(RicochetFrame));
-  assert((rfo -= wordSize) == RicochetFrame::sender_pc_offset_in_bytes(), "");
-#define RF_FIELD(push_value, name)                                      \
-  { push_value;                                                         \
-    assert((rfo -= wordSize) == RicochetFrame::name##_offset_in_bytes(), ""); }
-  RF_FIELD(__ push(rbp),                   sender_link);
-  RF_FIELD(__ push(saved_last_sp),         exact_sender_sp);  // rsi/r13
-  RF_FIELD(__ pushptr(rcx_amh_conversion), conversion);
-  RF_FIELD(__ push(rax_argv),              saved_args_base);   // can be updated if args are shifted
-  RF_FIELD(__ push((int32_t) NULL_WORD),   saved_args_layout); // cache for GC layout cookie
-  if (UseCompressedOops) {
-    __ load_heap_oop(rbx_temp, rcx_mh_vmtarget);
-    RF_FIELD(__ push(rbx_temp),            saved_target);
-  } else {
-    RF_FIELD(__ pushptr(rcx_mh_vmtarget),  saved_target);
-  }
-  __ lea(rbx_temp, ExternalAddress(return_handler));
-  RF_FIELD(__ push(rbx_temp),              continuation);
-#undef RF_FIELD
-  assert(rfo == 0, "fully initialized the RicochetFrame");
-  // compute new frame pointer:
-  __ lea(rbp, Address(rsp, RicochetFrame::sender_link_offset_in_bytes()));
-  // Push guard word #1 in debug mode.
-  DEBUG_ONLY(__ push((int32_t) RicochetFrame::MAGIC_NUMBER_1));
-  // For debugging, leave behind an indication of which stub built this frame.
-  DEBUG_ONLY({ Label L; __ call(L, relocInfo::none); __ bind(L); });
-  BLOCK_COMMENT("} RicochetFrame");
-}
-
-void MethodHandles::RicochetFrame::leave_ricochet_frame(MacroAssembler* _masm,
-                                                        Register rcx_recv,
-                                                        Register new_sp_reg,
-                                                        Register sender_pc_reg) {
-  assert_different_registers(rcx_recv, new_sp_reg, sender_pc_reg);
-  const Register saved_last_sp = saved_last_sp_register();
-  // Take down the frame.
-  // Cf. InterpreterMacroAssembler::remove_activation.
-  BLOCK_COMMENT("end_ricochet_frame {");
-  // TO DO: If (exact_sender_sp - extended_sender_sp) > THRESH, compact the frame down.
-  // This will keep stack in bounds even with unlimited tailcalls, each with an adapter.
-  if (rcx_recv->is_valid())
-    __ movptr(rcx_recv,    RicochetFrame::frame_address(RicochetFrame::saved_target_offset_in_bytes()));
-  __ movptr(sender_pc_reg, RicochetFrame::frame_address(RicochetFrame::sender_pc_offset_in_bytes()));
-  __ movptr(saved_last_sp, RicochetFrame::frame_address(RicochetFrame::exact_sender_sp_offset_in_bytes()));
-  __ movptr(rbp,           RicochetFrame::frame_address(RicochetFrame::sender_link_offset_in_bytes()));
-  __ mov(rsp, new_sp_reg);
-  BLOCK_COMMENT("} end_ricochet_frame");
-}
-
-// Emit code to verify that RBP is pointing at a valid ricochet frame.
-#ifndef PRODUCT
-enum {
-  ARG_LIMIT = 255, SLOP = 4,
-  // use this parameter for checking for garbage stack movements:
-  UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
-  // the slop defends against false alarms due to fencepost errors
-};
-#endif
-
-#ifdef ASSERT
-void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
-  // The stack should look like this:
-  //    ... keep1 | dest=42 | keep2 | RF | magic | handler | magic | recursive args |
-  // Check various invariants.
-  verify_offsets();
-
-  Register rdi_temp = rdi;
-  Register rcx_temp = rcx;
-  { __ push(rdi_temp); __ push(rcx_temp); }
-#define UNPUSH_TEMPS \
-  { __ pop(rcx_temp);  __ pop(rdi_temp); }
-
-  Address magic_number_1_addr  = RicochetFrame::frame_address(RicochetFrame::magic_number_1_offset_in_bytes());
-  Address magic_number_2_addr  = RicochetFrame::frame_address(RicochetFrame::magic_number_2_offset_in_bytes());
-  Address continuation_addr    = RicochetFrame::frame_address(RicochetFrame::continuation_offset_in_bytes());
-  Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
-  Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
-
-  Label L_bad, L_ok;
-  BLOCK_COMMENT("verify_clean {");
-  // Magic numbers must check out:
-  __ cmpptr(magic_number_1_addr, (int32_t) MAGIC_NUMBER_1);
-  __ jcc(Assembler::notEqual, L_bad);
-  __ cmpptr(magic_number_2_addr, (int32_t) MAGIC_NUMBER_2);
-  __ jcc(Assembler::notEqual, L_bad);
-
-  // Arguments pointer must look reasonable:
-  __ movptr(rcx_temp, saved_args_base_addr);
-  __ cmpptr(rcx_temp, rbp);
-  __ jcc(Assembler::below, L_bad);
-  __ subptr(rcx_temp, UNREASONABLE_STACK_MOVE * Interpreter::stackElementSize);
-  __ cmpptr(rcx_temp, rbp);
-  __ jcc(Assembler::above, L_bad);
-
-  load_conversion_dest_type(_masm, rdi_temp, conversion_addr);
-  __ cmpl(rdi_temp, T_VOID);
-  __ jcc(Assembler::equal, L_ok);
-  __ movptr(rcx_temp, saved_args_base_addr);
-  load_conversion_vminfo(_masm, rdi_temp, conversion_addr);
-  __ cmpptr(Address(rcx_temp, rdi_temp, Interpreter::stackElementScale()),
-            (int32_t) RETURN_VALUE_PLACEHOLDER);
-  __ jcc(Assembler::equal, L_ok);
-  __ BIND(L_bad);
-  UNPUSH_TEMPS;
-  __ stop("damaged ricochet frame");
-  __ BIND(L_ok);
-  UNPUSH_TEMPS;
-  BLOCK_COMMENT("} verify_clean");
-
-#undef UNPUSH_TEMPS
-
-}
-#endif //ASSERT
-
 void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
   if (VerifyMethodHandles)
     verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(),
-                 "AMH argument is a Class");
+                 "MH argument is a Class");
   __ load_heap_oop(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
 }
 
-void MethodHandles::load_conversion_vminfo(MacroAssembler* _masm, Register reg, Address conversion_field_addr) {
-  int bits   = BitsPerByte;
-  int offset = (CONV_VMINFO_SHIFT / bits);
-  int shift  = (CONV_VMINFO_SHIFT % bits);
-  __ load_unsigned_byte(reg, conversion_field_addr.plus_disp(offset));
-  assert(CONV_VMINFO_MASK == right_n_bits(bits - shift), "else change type of previous load");
-  assert(shift == 0, "no shift needed");
-}
-
-void MethodHandles::load_conversion_dest_type(MacroAssembler* _masm, Register reg, Address conversion_field_addr) {
-  int bits   = BitsPerByte;
-  int offset = (CONV_DEST_TYPE_SHIFT / bits);
-  int shift  = (CONV_DEST_TYPE_SHIFT % bits);
-  __ load_unsigned_byte(reg, conversion_field_addr.plus_disp(offset));
-  assert(CONV_TYPE_MASK == right_n_bits(bits - shift), "else change type of previous load");
-  __ shrl(reg, shift);
-  DEBUG_ONLY(int conv_type_bits = (int) exact_log2(CONV_TYPE_MASK+1));
-  assert((shift + conv_type_bits) == bits, "left justified in byte");
-}
-
-void MethodHandles::load_stack_move(MacroAssembler* _masm,
-                                    Register rdi_stack_move,
-                                    Register rcx_amh,
-                                    bool might_be_negative) {
-  BLOCK_COMMENT("load_stack_move {");
-  Address rcx_amh_conversion(rcx_amh, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
-  __ movl(rdi_stack_move, rcx_amh_conversion);
-  __ sarl(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
-#ifdef _LP64
-  if (might_be_negative) {
-    // clean high bits of stack motion register (was loaded as an int)
-    __ movslq(rdi_stack_move, rdi_stack_move);
-  }
-#endif //_LP64
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    Label L_ok, L_bad;
-    int32_t stack_move_limit = 0x4000;  // extra-large
-    __ cmpptr(rdi_stack_move, stack_move_limit);
-    __ jcc(Assembler::greaterEqual, L_bad);
-    __ cmpptr(rdi_stack_move, -stack_move_limit);
-    __ jcc(Assembler::greater, L_ok);
-    __ bind(L_bad);
-    __ stop("load_stack_move of garbage value");
-    __ BIND(L_ok);
-  }
-#endif
-  BLOCK_COMMENT("} load_stack_move");
-}
-
 #ifdef ASSERT
-void MethodHandles::RicochetFrame::verify_offsets() {
-  // Check compatibility of this struct with the more generally used offsets of class frame:
-  int ebp_off = sender_link_offset_in_bytes();  // offset from struct base to local rbp value
-  assert(ebp_off + wordSize*frame::interpreter_frame_method_offset      == saved_args_base_offset_in_bytes(), "");
-  assert(ebp_off + wordSize*frame::interpreter_frame_last_sp_offset     == conversion_offset_in_bytes(), "");
-  assert(ebp_off + wordSize*frame::interpreter_frame_sender_sp_offset   == exact_sender_sp_offset_in_bytes(), "");
-  // These last two have to be exact:
-  assert(ebp_off + wordSize*frame::link_offset                          == sender_link_offset_in_bytes(), "");
-  assert(ebp_off + wordSize*frame::return_addr_offset                   == sender_pc_offset_in_bytes(), "");
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, err_msg("%s should be nonzero", xname));
+  return x;
 }
-
-void MethodHandles::RicochetFrame::verify() const {
-  verify_offsets();
-  assert(magic_number_1() == MAGIC_NUMBER_1, err_msg(PTR_FORMAT " == " PTR_FORMAT, magic_number_1(), MAGIC_NUMBER_1));
-  assert(magic_number_2() == MAGIC_NUMBER_2, err_msg(PTR_FORMAT " == " PTR_FORMAT, magic_number_2(), MAGIC_NUMBER_2));
-  if (!Universe::heap()->is_gc_active()) {
-    if (saved_args_layout() != NULL) {
-      assert(saved_args_layout()->is_method(), "must be valid oop");
-    }
-    if (saved_target() != NULL) {
-      assert(java_lang_invoke_MethodHandle::is_instance(saved_target()), "checking frame value");
-    }
-  }
-  int conv_op = adapter_conversion_op(conversion());
-  assert(conv_op == java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS ||
-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS ||
-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF,
-         "must be a sane conversion");
-  if (has_return_value_slot()) {
-    assert(*return_value_slot_addr() == RETURN_VALUE_PLACEHOLDER, "");
-  }
-}
-#endif //PRODUCT
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //ASSERT
 
 #ifdef ASSERT
-void MethodHandles::verify_argslot(MacroAssembler* _masm,
-                                   Register argslot_reg,
-                                   const char* error_message) {
-  // Verify that argslot lies within (rsp, rbp].
-  Label L_ok, L_bad;
-  BLOCK_COMMENT("verify_argslot {");
-  __ cmpptr(argslot_reg, rbp);
-  __ jccb(Assembler::above, L_bad);
-  __ cmpptr(rsp, argslot_reg);
-  __ jccb(Assembler::below, L_ok);
-  __ bind(L_bad);
-  __ stop(error_message);
-  __ BIND(L_ok);
-  BLOCK_COMMENT("} verify_argslot");
-}
-
-void MethodHandles::verify_argslots(MacroAssembler* _masm,
-                                    RegisterOrConstant arg_slots,
-                                    Register arg_slot_base_reg,
-                                    bool negate_argslots,
-                                    const char* error_message) {
-  // Verify that [argslot..argslot+size) lies within (rsp, rbp).
-  Label L_ok, L_bad;
-  Register rdi_temp = rdi;
-  BLOCK_COMMENT("verify_argslots {");
-  __ push(rdi_temp);
-  if (negate_argslots) {
-    if (arg_slots.is_constant()) {
-      arg_slots = -1 * arg_slots.as_constant();
-    } else {
-      __ movptr(rdi_temp, arg_slots);
-      __ negptr(rdi_temp);
-      arg_slots = rdi_temp;
-    }
-  }
-  __ lea(rdi_temp, Address(arg_slot_base_reg, arg_slots, Interpreter::stackElementScale()));
-  __ cmpptr(rdi_temp, rbp);
-  __ pop(rdi_temp);
-  __ jcc(Assembler::above, L_bad);
-  __ cmpptr(rsp, arg_slot_base_reg);
-  __ jcc(Assembler::below, L_ok);
-  __ bind(L_bad);
-  __ stop(error_message);
-  __ BIND(L_ok);
-  BLOCK_COMMENT("} verify_argslots");
-}
-
-// Make sure that arg_slots has the same sign as the given direction.
-// If (and only if) arg_slots is a assembly-time constant, also allow it to be zero.
-void MethodHandles::verify_stack_move(MacroAssembler* _masm,
-                                      RegisterOrConstant arg_slots, int direction) {
-  bool allow_zero = arg_slots.is_constant();
-  if (direction == 0) { direction = +1; allow_zero = true; }
-  assert(stack_move_unit() == -1, "else add extra checks here");
-  if (arg_slots.is_register()) {
-    Label L_ok, L_bad;
-    BLOCK_COMMENT("verify_stack_move {");
-    // testl(arg_slots.as_register(), -stack_move_unit() - 1);  // no need
-    // jcc(Assembler::notZero, L_bad);
-    __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD);
-    if (direction > 0) {
-      __ jcc(allow_zero ? Assembler::less : Assembler::lessEqual, L_bad);
-      __ cmpptr(arg_slots.as_register(), (int32_t) UNREASONABLE_STACK_MOVE);
-      __ jcc(Assembler::less, L_ok);
-    } else {
-      __ jcc(allow_zero ? Assembler::greater : Assembler::greaterEqual, L_bad);
-      __ cmpptr(arg_slots.as_register(), (int32_t) -UNREASONABLE_STACK_MOVE);
-      __ jcc(Assembler::greater, L_ok);
-    }
-    __ bind(L_bad);
-    if (direction > 0)
-      __ stop("assert arg_slots > 0");
-    else
-      __ stop("assert arg_slots < 0");
-    __ BIND(L_ok);
-    BLOCK_COMMENT("} verify_stack_move");
-  } else {
-    intptr_t size = arg_slots.as_constant();
-    if (direction < 0)  size = -size;
-    assert(size >= 0, "correct direction of constant move");
-    assert(size < UNREASONABLE_STACK_MOVE, "reasonable size of constant move");
-  }
-}
-
 void MethodHandles::verify_klass(MacroAssembler* _masm,
                                  Register obj, KlassHandle klass,
                                  const char* error_message) {
@@ -528,12 +71,15 @@
          klass_addr <= SystemDictionaryHandles::Long_klass().raw_value(),
          "must be one of the SystemDictionaryHandles");
   Register temp = rdi;
+  Register temp2 = noreg;
+  LP64_ONLY(temp2 = rscratch1);  // used by MacroAssembler::cmpptr
   Label L_ok, L_bad;
   BLOCK_COMMENT("verify_klass {");
   __ verify_oop(obj);
   __ testptr(obj, obj);
   __ jcc(Assembler::zero, L_bad);
-  __ push(temp);
+  __ push(temp); if (temp2 != noreg)  __ push(temp2);
+#define UNPUSH { if (temp2 != noreg)  __ pop(temp2);  __ pop(temp); }
   __ load_klass(temp, obj);
   __ cmpptr(temp, ExternalAddress((address) klass_addr));
   __ jcc(Assembler::equal, L_ok);
@@ -541,17 +87,42 @@
   __ movptr(temp, Address(temp, super_check_offset));
   __ cmpptr(temp, ExternalAddress((address) klass_addr));
   __ jcc(Assembler::equal, L_ok);
-  __ pop(temp);
+  UNPUSH;
   __ bind(L_bad);
-  __ stop(error_message);
+  __ STOP(error_message);
   __ BIND(L_ok);
-  __ pop(temp);
+  UNPUSH;
   BLOCK_COMMENT("} verify_klass");
 }
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
+  Label L;
+  BLOCK_COMMENT("verify_ref_kind {");
+  __ movl(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
+  __ shrl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
+  __ andl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
+  __ cmpl(temp, ref_kind);
+  __ jcc(Assembler::equal, L);
+  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+    if (ref_kind == JVM_REF_invokeVirtual ||
+        ref_kind == JVM_REF_invokeSpecial)
+      // could do this for all ref_kinds, but would explode assembly code size
+      trace_method_handle(_masm, buf);
+    __ STOP(buf);
+  }
+  BLOCK_COMMENT("} verify_ref_kind");
+  __ bind(L);
+}
+
 #endif //ASSERT
 
-void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp) {
-  if (JvmtiExport::can_post_interpreter_events()) {
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                            bool for_compiler_entry) {
+  assert(method == rbx, "interpreter calling convention");
+  __ verify_oop(method);
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
     Label run_compiled_code;
     // JVMTI events, such as single-stepping, are implemented partly by avoiding running
     // compiled code in threads for which the event is enabled.  Check here for
@@ -567,462 +138,380 @@
     __ cmpb(Address(rthread, JavaThread::interp_only_mode_offset()), 0);
     __ jccb(Assembler::zero, run_compiled_code);
     __ jmp(Address(method, methodOopDesc::interpreter_entry_offset()));
-    __ bind(run_compiled_code);
+    __ BIND(run_compiled_code);
   }
-  __ jmp(Address(method, methodOopDesc::from_interpreted_offset()));
+
+  const ByteSize entry_offset = for_compiler_entry ? methodOopDesc::from_compiled_offset() :
+                                                     methodOopDesc::from_interpreted_offset();
+  __ jmp(Address(method, entry_offset));
 }
 
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2,
+                                        bool for_compiler_entry) {
+  BLOCK_COMMENT("jump_to_lambda_form {");
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2);
+  assert(recv != noreg, "required register");
+  assert(method_temp == rbx, "required register for loading method");
+
+  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  // the following assumes that a methodOop is normally compressed in the vmtarget field:
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
+  __ verify_oop(method_temp);
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // make sure recv is already on stack
+    __ load_sized_value(temp2,
+                        Address(method_temp, methodOopDesc::size_of_parameters_offset()),
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    Label L;
+    __ cmpptr(recv, __ argument_address(temp2, -1));
+    __ jcc(Assembler::equal, L);
+    __ movptr(rax, __ argument_address(temp2, -1));
+    __ STOP("receiver not on stack");
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+
 // Code generation
-address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm) {
-  // rbx: methodOop
-  // rcx: receiver method handle (must load from sp[MethodTypeForm.vmslots])
-  // rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
-  // rdx, rdi: garbage temp, blown away
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  if (iid == vmIntrinsics::_invokeGeneric ||
+      iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    __ hlt();           // empty stubs make SG sick
+    return NULL;
+  }
 
-  Register rbx_method = rbx;
-  Register rcx_recv   = rcx;
-  Register rax_mtype  = rax;
-  Register rdx_temp   = rdx;
-  Register rdi_temp   = rdi;
+  // rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+  // rbx: methodOop
+  // rdx: argument locator (parameter slot count, added to rsp)
+  // rcx: used as temp to hold mh or receiver
+  // rax, rdi: garbage temps, blown away
+  Register rdx_argp   = rdx;   // argument list ptr, live on error paths
+  Register rax_temp   = rax;
+  Register rcx_mh     = rcx;   // MH receiver; dies quickly and is recycled
+  Register rbx_method = rbx;   // eventual target of this invocation
 
-  // emit WrongMethodType path first, to enable jccb back-branch from main path
-  Label wrong_method_type;
-  __ bind(wrong_method_type);
-  Label invoke_generic_slow_path, invoke_exact_error_path;
-  assert(methodOopDesc::intrinsic_id_size_in_bytes() == sizeof(u1), "");;
-  __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) vmIntrinsics::_invokeExact);
-  __ jcc(Assembler::notEqual, invoke_generic_slow_path);
-  __ jmp(invoke_exact_error_path);
+  address code_start = __ pc();
 
   // here's where control starts out:
   __ align(CodeEntryAlignment);
   address entry_point = __ pc();
 
-  // fetch the MethodType from the method handle into rax (the 'check' register)
-  // FIXME: Interpreter should transmit pre-popped stack pointer, to locate base of arg list.
-  // This would simplify several touchy bits of code.
-  // See 6984712: JSR 292 method handle calls need a clean argument base pointer
-  {
-    Register tem = rbx_method;
-    for (jint* pchase = methodOopDesc::method_type_offsets_chain(); (*pchase) != -1; pchase++) {
-      __ movptr(rax_mtype, Address(tem, *pchase));
-      tem = rax_mtype;          // in case there is another indirection
+  if (VerifyMethodHandles) {
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+    __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) iid);
+    __ jcc(Assembler::equal, L);
+    if (iid == vmIntrinsics::_linkToVirtual ||
+        iid == vmIntrinsics::_linkToSpecial) {
+      // could do this for all kinds, but would explode assembly code size
+      trace_method_handle(_masm, "bad methodOop::intrinsic_id");
     }
+    __ STOP("bad methodOop::intrinsic_id");
+    __ bind(L);
+    BLOCK_COMMENT("} verify_intrinsic_id");
+  }
+
+  // First task:  Find out how big the argument list is.
+  Address rdx_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ load_sized_value(rdx_argp,
+                        Address(rbx_method, methodOopDesc::size_of_parameters_offset()),
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    rdx_first_arg_addr = __ argument_address(rdx_argp, -1);
+  } else {
+    DEBUG_ONLY(rdx_argp = noreg);
+  }
+
+  if (!is_signature_polymorphic_static(iid)) {
+    __ movptr(rcx_mh, rdx_first_arg_addr);
+    DEBUG_ONLY(rdx_argp = noreg);
   }
 
-  // given the MethodType, find out where the MH argument is buried
-  __ load_heap_oop(rdx_temp, Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, rdi_temp)));
-  Register rdx_vmslots = rdx_temp;
-  __ movl(rdx_vmslots, Address(rdx_temp, __ delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, rdi_temp)));
-  Address mh_receiver_slot_addr = __ argument_address(rdx_vmslots);
-  __ movptr(rcx_recv, mh_receiver_slot_addr);
-
-  trace_method_handle(_masm, "invokeExact");
-
-  __ check_method_handle_type(rax_mtype, rcx_recv, rdi_temp, wrong_method_type);
+  // rdx_first_arg_addr is live!
 
-  // Nobody uses the MH receiver slot after this.  Make sure.
-  DEBUG_ONLY(__ movptr(mh_receiver_slot_addr, (int32_t)0x999999));
-
-  __ jump_to_method_handle_entry(rcx_recv, rdi_temp);
+  if (TraceMethodHandles) {
+    const char* name = vmIntrinsics::name_at(iid);
+    if (*name == '_')  name += 1;
+    const size_t len = strlen(name) + 50;
+    char* qname = NEW_C_HEAP_ARRAY(char, len, mtInternal);
+    const char* suffix = "";
+    if (vmIntrinsics::method_for(iid) == NULL ||
+        !vmIntrinsics::method_for(iid)->access_flags().is_public()) {
+      if (is_signature_polymorphic_static(iid))
+        suffix = "/static";
+      else
+        suffix = "/private";
+    }
+    jio_snprintf(qname, len, "MethodHandle::interpreter_entry::%s%s", name, suffix);
+    // note: stub look for mh in rcx
+    trace_method_handle(_masm, qname);
+  }
 
-  // error path for invokeExact (only)
-  __ bind(invoke_exact_error_path);
-  // ensure that the top of stack is properly aligned.
-  __ mov(rdi, rsp);
-  __ andptr(rsp, -StackAlignmentInBytes); // Align the stack for the ABI
-  __ pushptr(Address(rdi, 0));  // Pick up the return address
-
-  // Stub wants expected type in rax and the actual type in rcx
-  __ jump(ExternalAddress(StubRoutines::throw_WrongMethodTypeException_entry()));
+  if (iid == vmIntrinsics::_invokeBasic) {
+    generate_method_handle_dispatch(_masm, iid, rcx_mh, noreg, not_for_compiler_entry);
 
-  // for invokeGeneric (only), apply argument and result conversions on the fly
-  __ bind(invoke_generic_slow_path);
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    Label L;
-    __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) vmIntrinsics::_invokeGeneric);
-    __ jcc(Assembler::equal, L);
-    __ stop("bad methodOop::intrinsic_id");
-    __ bind(L);
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register rcx_recv = noreg;
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+      __ movptr(rcx_recv = rcx, rdx_first_arg_addr);
+    }
+    DEBUG_ONLY(rdx_argp = noreg);
+    Register rbx_member = rbx_method;  // MemberName ptr; incoming method ptr is dead now
+    __ pop(rax_temp);           // return address
+    __ pop(rbx_member);         // extract last argument
+    __ push(rax_temp);          // re-push return address
+    generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry);
   }
-#endif //ASSERT
-  Register rbx_temp = rbx_method;  // don't need it now
 
-  // make room on the stack for another pointer:
-  Register rcx_argslot = rcx_recv;
-  __ lea(rcx_argslot, __ argument_address(rdx_vmslots, 1));
-  insert_arg_slots(_masm, 2 * stack_move_unit(),
-                   rcx_argslot, rbx_temp, rdx_temp);
-
-  // load up an adapter from the calling type (Java weaves this)
-  Register rdx_adapter = rdx_temp;
-  __ load_heap_oop(rdx_temp,    Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,               rdi_temp)));
-  __ load_heap_oop(rdx_adapter, Address(rdx_temp,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, rdi_temp)));
-  __ verify_oop(rdx_adapter);
-  __ movptr(Address(rcx_argslot, 1 * Interpreter::stackElementSize), rdx_adapter);
-  // As a trusted first argument, pass the type being called, so the adapter knows
-  // the actual types of the arguments and return values.
-  // (Generic invokers are shared among form-families of method-type.)
-  __ movptr(Address(rcx_argslot, 0 * Interpreter::stackElementSize), rax_mtype);
-  // FIXME: assert that rdx_adapter is of the right method-type.
-  __ mov(rcx, rdx_adapter);
-  trace_method_handle(_masm, "invokeGeneric");
-  __ jump_to_method_handle_entry(rcx, rdi_temp);
+  if (PrintMethodHandleStubs) {
+    address code_end = __ pc();
+    tty->print_cr("--------");
+    tty->print_cr("method handle interpreter entry for %s", vmIntrinsics::name_at(iid));
+    Disassembler::decode(code_start, code_end);
+    tty->cr();
+  }
 
   return entry_point;
 }
 
-// Helper to insert argument slots into the stack.
-// arg_slots must be a multiple of stack_move_unit() and < 0
-// rax_argslot is decremented to point to the new (shifted) location of the argslot
-// But, rdx_temp ends up holding the original value of rax_argslot.
-void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
-                                     RegisterOrConstant arg_slots,
-                                     Register rax_argslot,
-                                     Register rbx_temp, Register rdx_temp) {
-  // allow constant zero
-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
-    return;
-  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
-                             (!arg_slots.is_register() ? rsp : arg_slots.as_register()));
-  if (VerifyMethodHandles)
-    verify_argslot(_masm, rax_argslot, "insertion point must fall within current frame");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, arg_slots, -1);
-
-  // Make space on the stack for the inserted argument(s).
-  // Then pull down everything shallower than rax_argslot.
-  // The stacked return address gets pulled down with everything else.
-  // That is, copy [rsp, argslot) downward by -size words.  In pseudo-code:
-  //   rsp -= size;
-  //   for (rdx = rsp + size; rdx < argslot; rdx++)
-  //     rdx[-size] = rdx[0]
-  //   argslot -= size;
-  BLOCK_COMMENT("insert_arg_slots {");
-  __ mov(rdx_temp, rsp);                        // source pointer for copy
-  __ lea(rsp, Address(rsp, arg_slots, Interpreter::stackElementScale()));
-  {
-    Label loop;
-    __ BIND(loop);
-    // pull one word down each time through the loop
-    __ movptr(rbx_temp, Address(rdx_temp, 0));
-    __ movptr(Address(rdx_temp, arg_slots, Interpreter::stackElementScale()), rbx_temp);
-    __ addptr(rdx_temp, wordSize);
-    __ cmpptr(rdx_temp, rax_argslot);
-    __ jcc(Assembler::below, loop);
-  }
-
-  // Now move the argslot down, to point to the opened-up space.
-  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Interpreter::stackElementScale()));
-  BLOCK_COMMENT("} insert_arg_slots");
-}
-
-// Helper to remove argument slots from the stack.
-// arg_slots must be a multiple of stack_move_unit() and > 0
-void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
-                                     RegisterOrConstant arg_slots,
-                                     Register rax_argslot,
-                                     Register rbx_temp, Register rdx_temp) {
-  // allow constant zero
-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
-    return;
-  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
-                             (!arg_slots.is_register() ? rsp : arg_slots.as_register()));
-  if (VerifyMethodHandles)
-    verify_argslots(_masm, arg_slots, rax_argslot, false,
-                    "deleted argument(s) must fall within current frame");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, arg_slots, +1);
-
-  BLOCK_COMMENT("remove_arg_slots {");
-  // Pull up everything shallower than rax_argslot.
-  // Then remove the excess space on the stack.
-  // The stacked return address gets pulled up with everything else.
-  // That is, copy [rsp, argslot) upward by size words.  In pseudo-code:
-  //   for (rdx = argslot-1; rdx >= rsp; --rdx)
-  //     rdx[size] = rdx[0]
-  //   argslot += size;
-  //   rsp += size;
-  __ lea(rdx_temp, Address(rax_argslot, -wordSize)); // source pointer for copy
-  {
-    Label loop;
-    __ BIND(loop);
-    // pull one word up each time through the loop
-    __ movptr(rbx_temp, Address(rdx_temp, 0));
-    __ movptr(Address(rdx_temp, arg_slots, Interpreter::stackElementScale()), rbx_temp);
-    __ addptr(rdx_temp, -wordSize);
-    __ cmpptr(rdx_temp, rsp);
-    __ jcc(Assembler::aboveEqual, loop);
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  Register rbx_method = rbx;   // eventual target of this invocation
+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
+#ifdef _LP64
+  Register temp1 = rscratch1;
+  Register temp2 = rscratch2;
+  Register temp3 = rax;
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
+    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
   }
-
-  // Now move the argslot up, to point to the just-copied block.
-  __ lea(rsp, Address(rsp, arg_slots, Interpreter::stackElementScale()));
-  // And adjust the argslot address to point at the deletion point.
-  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Interpreter::stackElementScale()));
-  BLOCK_COMMENT("} remove_arg_slots");
-}
-
-// Helper to copy argument slots to the top of the stack.
-// The sequence starts with rax_argslot and is counted by slot_count
-// slot_count must be a multiple of stack_move_unit() and >= 0
-// This function blows the temps but does not change rax_argslot.
-void MethodHandles::push_arg_slots(MacroAssembler* _masm,
-                                   Register rax_argslot,
-                                   RegisterOrConstant slot_count,
-                                   int skip_words_count,
-                                   Register rbx_temp, Register rdx_temp) {
-  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
-                             (!slot_count.is_register() ? rbp : slot_count.as_register()),
-                             rsp);
-  assert(Interpreter::stackElementSize == wordSize, "else change this code");
-
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, slot_count, 0);
-
-  // allow constant zero
-  if (slot_count.is_constant() && slot_count.as_constant() == 0)
-    return;
-
-  BLOCK_COMMENT("push_arg_slots {");
-
-  Register rbx_top = rbx_temp;
-
-  // There is at most 1 word to carry down with the TOS.
-  switch (skip_words_count) {
-  case 1: __ pop(rdx_temp); break;
-  case 0:                   break;
-  default: ShouldNotReachHere();
-  }
-
-  if (slot_count.is_constant()) {
-    for (int i = slot_count.as_constant() - 1; i >= 0; i--) {
-      __ pushptr(Address(rax_argslot, i * wordSize));
-    }
-  } else {
-    Label L_plural, L_loop, L_break;
-    // Emit code to dynamically check for the common cases, zero and one slot.
-    __ cmpl(slot_count.as_register(), (int32_t) 1);
-    __ jccb(Assembler::greater, L_plural);
-    __ jccb(Assembler::less, L_break);
-    __ pushptr(Address(rax_argslot, 0));
-    __ jmpb(L_break);
-    __ BIND(L_plural);
-
-    // Loop for 2 or more:
-    //   rbx = &rax[slot_count]
-    //   while (rbx > rax)  *(--rsp) = *(--rbx)
-    __ lea(rbx_top, Address(rax_argslot, slot_count, Address::times_ptr));
-    __ BIND(L_loop);
-    __ subptr(rbx_top, wordSize);
-    __ pushptr(Address(rbx_top, 0));
-    __ cmpptr(rbx_top, rax_argslot);
-    __ jcc(Assembler::above, L_loop);
-    __ bind(L_break);
-  }
-  switch (skip_words_count) {
-  case 1: __ push(rdx_temp); break;
-  case 0:                    break;
-  default: ShouldNotReachHere();
-  }
-  BLOCK_COMMENT("} push_arg_slots");
-}
-
-// in-place movement; no change to rsp
-// blows rax_temp, rdx_temp
-void MethodHandles::move_arg_slots_up(MacroAssembler* _masm,
-                                      Register rbx_bottom,  // invariant
-                                      Address  top_addr,     // can use rax_temp
-                                      RegisterOrConstant positive_distance_in_slots,
-                                      Register rax_temp, Register rdx_temp) {
-  BLOCK_COMMENT("move_arg_slots_up {");
-  assert_different_registers(rbx_bottom,
-                             rax_temp, rdx_temp,
-                             positive_distance_in_slots.register_or_noreg());
-  Label L_loop, L_break;
-  Register rax_top = rax_temp;
-  if (!top_addr.is_same_address(Address(rax_top, 0)))
-    __ lea(rax_top, top_addr);
-  // Detect empty (or broken) loop:
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    // Verify that &bottom < &top (non-empty interval)
-    Label L_ok, L_bad;
-    if (positive_distance_in_slots.is_register()) {
-      __ cmpptr(positive_distance_in_slots.as_register(), (int32_t) 0);
-      __ jcc(Assembler::lessEqual, L_bad);
-    }
-    __ cmpptr(rbx_bottom, rax_top);
-    __ jcc(Assembler::below, L_ok);
-    __ bind(L_bad);
-    __ stop("valid bounds (copy up)");
-    __ BIND(L_ok);
+#else
+  Register temp1 = (for_compiler_entry ? rsi : rdx);
+  Register temp2 = rdi;
+  Register temp3 = rax;
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : rcx), "only valid assignment");
+    assert_different_registers(temp1,        rcx, rdx);
+    assert_different_registers(temp2,        rcx, rdx);
+    assert_different_registers(temp3,        rcx, rdx);
   }
 #endif
-  __ cmpptr(rbx_bottom, rax_top);
-  __ jccb(Assembler::aboveEqual, L_break);
-  // work rax down to rbx, copying contiguous data upwards
-  // In pseudo-code:
-  //   [rbx, rax) = &[bottom, top)
-  //   while (--rax >= rbx) *(rax + distance) = *(rax + 0), rax--;
-  __ BIND(L_loop);
-  __ subptr(rax_top, wordSize);
-  __ movptr(rdx_temp, Address(rax_top, 0));
-  __ movptr(          Address(rax_top, positive_distance_in_slots, Address::times_ptr), rdx_temp);
-  __ cmpptr(rax_top, rbx_bottom);
-  __ jcc(Assembler::above, L_loop);
-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
-  __ bind(L_break);
-  BLOCK_COMMENT("} move_arg_slots_up");
-}
+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
+  assert_different_registers(temp1, temp2, temp3, member_reg);
+  if (!for_compiler_entry)
+    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    // indirect through MH.form.vmentry.vmtarget
+    jump_to_lambda_form(_masm, receiver_reg, rbx_method, temp1, for_compiler_entry);
+
+  } else {
+    // The method is a member invoker used by direct method handles.
+    if (VerifyMethodHandles) {
+      // make sure the trailing argument really is a MemberName (caller responsibility)
+      verify_klass(_masm, member_reg, SystemDictionaryHandles::MemberName_klass(),
+                   "MemberName required for invokeVirtual etc.");
+    }
+
+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
 
-// in-place movement; no change to rsp
-// blows rax_temp, rdx_temp
-void MethodHandles::move_arg_slots_down(MacroAssembler* _masm,
-                                        Address  bottom_addr,  // can use rax_temp
-                                        Register rbx_top,      // invariant
-                                        RegisterOrConstant negative_distance_in_slots,
-                                        Register rax_temp, Register rdx_temp) {
-  BLOCK_COMMENT("move_arg_slots_down {");
-  assert_different_registers(rbx_top,
-                             negative_distance_in_slots.register_or_noreg(),
-                             rax_temp, rdx_temp);
-  Label L_loop, L_break;
-  Register rax_bottom = rax_temp;
-  if (!bottom_addr.is_same_address(Address(rax_bottom, 0)))
-    __ lea(rax_bottom, bottom_addr);
-  // Detect empty (or broken) loop:
-#ifdef ASSERT
-  assert(!negative_distance_in_slots.is_constant() || negative_distance_in_slots.as_constant() < 0, "");
-  if (VerifyMethodHandles) {
-    // Verify that &bottom < &top (non-empty interval)
-    Label L_ok, L_bad;
-    if (negative_distance_in_slots.is_register()) {
-      __ cmpptr(negative_distance_in_slots.as_register(), (int32_t) 0);
-      __ jcc(Assembler::greaterEqual, L_bad);
+    Register temp1_recv_klass = temp1;
+    if (iid != vmIntrinsics::_linkToStatic) {
+      __ verify_oop(receiver_reg);
+      if (iid == vmIntrinsics::_linkToSpecial) {
+        // Don't actually load the klass; just null-check the receiver.
+        __ null_check(receiver_reg);
+      } else {
+        // load receiver klass itself
+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_oop(temp1_recv_klass);
+      }
+      BLOCK_COMMENT("check_receiver {");
+      // The receiver for the MemberName must be in receiver_reg.
+      // Check the receiver against the MemberName.clazz
+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+        // Did not load it above...
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_oop(temp1_recv_klass);
+      }
+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+        Label L_ok;
+        Register temp2_defc = temp2;
+        __ load_heap_oop(temp2_defc, member_clazz);
+        load_klass_from_Class(_masm, temp2_defc);
+        __ verify_oop(temp2_defc);
+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
+        // If we get here, the type check failed!
+        __ STOP("receiver class disagrees with MemberName.clazz");
+        __ bind(L_ok);
+      }
+      BLOCK_COMMENT("} check_receiver");
     }
-    __ cmpptr(rax_bottom, rbx_top);
-    __ jcc(Assembler::below, L_ok);
-    __ bind(L_bad);
-    __ stop("valid bounds (copy down)");
-    __ BIND(L_ok);
-  }
-#endif
-  __ cmpptr(rax_bottom, rbx_top);
-  __ jccb(Assembler::aboveEqual, L_break);
-  // work rax up to rbx, copying contiguous data downwards
-  // In pseudo-code:
-  //   [rax, rbx) = &[bottom, top)
-  //   while (rax < rbx) *(rax - distance) = *(rax + 0), rax++;
-  __ BIND(L_loop);
-  __ movptr(rdx_temp, Address(rax_bottom, 0));
-  __ movptr(          Address(rax_bottom, negative_distance_in_slots, Address::times_ptr), rdx_temp);
-  __ addptr(rax_bottom, wordSize);
-  __ cmpptr(rax_bottom, rbx_top);
-  __ jcc(Assembler::below, L_loop);
-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
-  __ bind(L_break);
-  BLOCK_COMMENT("} move_arg_slots_down");
-}
+    if (iid == vmIntrinsics::_linkToSpecial ||
+        iid == vmIntrinsics::_linkToStatic) {
+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
+    }
+
+    // Live registers at this point:
+    //  member_reg - MemberName that was the trailing argument
+    //  temp1_recv_klass - klass of stacked receiver, if needed
+    //  rsi/r13 - interpreter linkage (if interpreted)
+    //  rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
+
+    bool method_is_live = false;
+    switch (iid) {
+    case vmIntrinsics::_linkToSpecial:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+      }
+      __ load_heap_oop(rbx_method, member_vmtarget);
+      method_is_live = true;
+      break;
+
+    case vmIntrinsics::_linkToStatic:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+      }
+      __ load_heap_oop(rbx_method, member_vmtarget);
+      method_is_live = true;
+      break;
 
-// Copy from a field or array element to a stacked argument slot.
-// is_element (ignored) says whether caller is loading an array element instead of an instance field.
-void MethodHandles::move_typed_arg(MacroAssembler* _masm,
-                                   BasicType type, bool is_element,
-                                   Address slot_dest, Address value_src,
-                                   Register rbx_temp, Register rdx_temp) {
-  BLOCK_COMMENT(!is_element ? "move_typed_arg {" : "move_typed_arg { (array element)");
-  if (type == T_OBJECT || type == T_ARRAY) {
-    __ load_heap_oop(rbx_temp, value_src);
-    __ movptr(slot_dest, rbx_temp);
-  } else if (type != T_VOID) {
-    int  arg_size      = type2aelembytes(type);
-    bool arg_is_signed = is_signed_subword_type(type);
-    int  slot_size     = (arg_size > wordSize) ? arg_size : wordSize;
-    __ load_sized_value(  rdx_temp,  value_src, arg_size, arg_is_signed, rbx_temp);
-    __ store_sized_value( slot_dest, rdx_temp,  slot_size,               rbx_temp);
-  }
-  BLOCK_COMMENT("} move_typed_arg");
-}
+    case vmIntrinsics::_linkToVirtual:
+    {
+      // same as TemplateTable::invokevirtual,
+      // minus the CP setup and profiling:
+
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+      }
+
+      // pick out the vtable index from the MemberName, and then we can discard it:
+      Register temp2_index = temp2;
+      __ movptr(temp2_index, member_vmindex);
 
-void MethodHandles::move_return_value(MacroAssembler* _masm, BasicType type,
-                                      Address return_slot) {
-  BLOCK_COMMENT("move_return_value {");
-  // Old versions of the JVM must clean the FPU stack after every return.
-#ifndef _LP64
-#ifdef COMPILER2
-  // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
-  if ((type == T_FLOAT && UseSSE < 1) || (type == T_DOUBLE && UseSSE < 2)) {
-    for (int i = 1; i < 8; i++) {
-        __ ffree(i);
+      if (VerifyMethodHandles) {
+        Label L_index_ok;
+        __ cmpl(temp2_index, 0);
+        __ jcc(Assembler::greaterEqual, L_index_ok);
+        __ STOP("no virtual index");
+        __ BIND(L_index_ok);
+      }
+
+      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
+
+      // get target methodOop & entry point
+      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rbx_method);
+      method_is_live = true;
+      break;
     }
-  } else if (UseSSE < 2) {
-    __ empty_FPU_stack();
-  }
-#endif //COMPILER2
-#endif //!_LP64
+
+    case vmIntrinsics::_linkToInterface:
+    {
+      // same as TemplateTable::invokeinterface
+      // (minus the CP setup and profiling, with different argument motion)
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+      }
+
+      Register temp3_intf = temp3;
+      __ load_heap_oop(temp3_intf, member_clazz);
+      load_klass_from_Class(_masm, temp3_intf);
+      __ verify_oop(temp3_intf);
+
+      Register rbx_index = rbx_method;
+      __ movptr(rbx_index, member_vmindex);
+      if (VerifyMethodHandles) {
+        Label L;
+        __ cmpl(rbx_index, 0);
+        __ jcc(Assembler::greaterEqual, L);
+        __ STOP("invalid vtable index for MH.invokeInterface");
+        __ bind(L);
+      }
 
-  // Look at the type and pull the value out of the corresponding register.
-  if (type == T_VOID) {
-    // nothing to do
-  } else if (type == T_OBJECT) {
-    __ movptr(return_slot, rax);
-  } else if (type == T_INT || is_subword_type(type)) {
-    // write the whole word, even if only 32 bits is significant
-    __ movptr(return_slot, rax);
-  } else if (type == T_LONG) {
-    // store the value by parts
-    // Note: We assume longs are continguous (if misaligned) on the interpreter stack.
-    __ store_sized_value(return_slot, rax, BytesPerLong, rdx);
-  } else if (NOT_LP64((type == T_FLOAT  && UseSSE < 1) ||
-                      (type == T_DOUBLE && UseSSE < 2) ||)
-             false) {
-    // Use old x86 FPU registers:
-    if (type == T_FLOAT)
-      __ fstp_s(return_slot);
-    else
-      __ fstp_d(return_slot);
-  } else if (type == T_FLOAT) {
-    __ movflt(return_slot, xmm0);
-  } else if (type == T_DOUBLE) {
-    __ movdbl(return_slot, xmm0);
-  } else {
-    ShouldNotReachHere();
+      // given intf, index, and recv klass, dispatch to the implementation method
+      Label L_no_such_interface;
+      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                 // note: next two args must be the same:
+                                 rbx_index, rbx_method,
+                                 temp2,
+                                 L_no_such_interface);
+
+      __ verify_oop(rbx_method);
+      jump_from_method_handle(_masm, rbx_method, temp2, for_compiler_entry);
+      __ hlt();
+
+      __ bind(L_no_such_interface);
+      __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+      break;
+    }
+
+    default:
+      fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      break;
+    }
+
+    if (method_is_live) {
+      // live at this point:  rbx_method, rsi/r13 (if interpreted)
+
+      // After figuring out which concrete method to call, jump into it.
+      // Note that this works in the interpreter with no data motion.
+      // But the compiled version will require that rcx_recv be shifted out.
+      __ verify_oop(rbx_method);
+      jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry);
+    }
   }
-  BLOCK_COMMENT("} move_return_value");
 }
 
 #ifndef PRODUCT
-#define DESCRIBE_RICOCHET_OFFSET(rf, name) \
-  values.describe(frame_no, (intptr_t *) (((uintptr_t)rf) + MethodHandles::RicochetFrame::name##_offset_in_bytes()), #name)
-
-void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
-    address bp = (address) fr->fp();
-    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
-
-    // ricochet slots
-    DESCRIBE_RICOCHET_OFFSET(rf, exact_sender_sp);
-    DESCRIBE_RICOCHET_OFFSET(rf, conversion);
-    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_base);
-    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_layout);
-    DESCRIBE_RICOCHET_OFFSET(rf, saved_target);
-    DESCRIBE_RICOCHET_OFFSET(rf, continuation);
-
-    // relevant ricochet targets (in caller frame)
-    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
-}
-#endif // ASSERT
-
-#ifndef PRODUCT
-extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
                               oop mh,
                               intptr_t* saved_regs,
                               intptr_t* entry_sp) {
   // called as a leaf from native code: do not block the JVM!
-  bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have rcx_mh
+  bool has_mh = (strstr(adaptername, "/static") == NULL &&
+                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
   const char* mh_reg_name = has_mh ? "rcx_mh" : "rcx";
-  tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT, adaptername, mh_reg_name, mh, entry_sp);
+  tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT,
+                adaptername, mh_reg_name,
+                mh, entry_sp);
 
   if (Verbose) {
     tty->print_cr("Registers:");
@@ -1086,12 +575,18 @@
         values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
         values.describe(-1, dump_sp, "sp for #1");
       }
+      values.describe(-1, entry_sp, "raw top of stack");
 
       tty->print_cr("Stack layout:");
       values.print(p);
     }
-    if (has_mh)
-      print_method_handle(mh);
+    if (has_mh && mh->is_oop()) {
+      mh->print();
+      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
+          java_lang_invoke_MethodHandle::form(mh)->print();
+      }
+    }
   }
 }
 
@@ -1159,1363 +654,3 @@
 }
 #endif //PRODUCT
 
-// which conversion op types are implemented here?
-int MethodHandles::adapter_conversion_ops_supported_mask() {
-  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
-          //OP_PRIM_TO_REF is below...
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
-          //OP_COLLECT_ARGS is below...
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
-         |(
-           java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
-           ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS)
-            ))
-         );
-}
-
-//------------------------------------------------------------------------------
-// MethodHandles::generate_method_handle_stub
-//
-// Generate an "entry" field for a method handle.
-// This determines how the method handle will respond to calls.
-void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
-  MethodHandles::EntryKind ek_orig = ek_original_kind(ek);
-
-  // Here is the register state during an interpreted call,
-  // as set up by generate_method_handle_interpreter_entry():
-  // - rbx: garbage temp (was MethodHandle.invoke methodOop, unused)
-  // - rcx: receiver method handle
-  // - rax: method handle type (only used by the check_mtype entry point)
-  // - rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
-  // - rdx: garbage temp, can blow away
-
-  const Register rcx_recv    = rcx;
-  const Register rax_argslot = rax;
-  const Register rbx_temp    = rbx;
-  const Register rdx_temp    = rdx;
-  const Register rdi_temp    = rdi;
-
-  // This guy is set up by prepare_to_jump_from_interpreted (from interpreted calls)
-  // and gen_c2i_adapter (from compiled calls):
-  const Register saved_last_sp = saved_last_sp_register();
-
-  // Argument registers for _raise_exception.
-  // 32-bit: Pass first two oop/int args in registers ECX and EDX.
-  const Register rarg0_code     = LP64_ONLY(j_rarg0) NOT_LP64(rcx);
-  const Register rarg1_actual   = LP64_ONLY(j_rarg1) NOT_LP64(rdx);
-  const Register rarg2_required = LP64_ONLY(j_rarg2) NOT_LP64(rdi);
-  assert_different_registers(rarg0_code, rarg1_actual, rarg2_required, saved_last_sp);
-
-  guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets");
-
-  // some handy addresses
-  Address rcx_mh_vmtarget(    rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() );
-  Address rcx_dmh_vmindex(    rcx_recv, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes() );
-
-  Address rcx_bmh_vmargslot(  rcx_recv, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes() );
-  Address rcx_bmh_argument(   rcx_recv, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes() );
-
-  Address rcx_amh_vmargslot(  rcx_recv, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes() );
-  Address rcx_amh_argument(   rcx_recv, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes() );
-  Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
-  Address vmarg;                // __ argument_address(vmargslot)
-
-  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
-
-  if (have_entry(ek)) {
-    __ nop();                   // empty stubs make SG sick
-    return;
-  }
-
-#ifdef ASSERT
-  __ push((int32_t) 0xEEEEEEEE);
-  __ push((int32_t) (intptr_t) entry_name(ek));
-  LP64_ONLY(__ push((int32_t) high((intptr_t) entry_name(ek))));
-  __ push((int32_t) 0x33333333);
-#endif //ASSERT
-
-  address interp_entry = __ pc();
-
-  trace_method_handle(_masm, entry_name(ek));
-
-  BLOCK_COMMENT(err_msg("Entry %s {", entry_name(ek)));
-
-  switch ((int) ek) {
-  case _raise_exception:
-    {
-      // Not a real MH entry, but rather shared code for raising an
-      // exception.  Since we use the compiled entry, arguments are
-      // expected in compiler argument registers.
-      assert(raise_exception_method(), "must be set");
-      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
-
-      const Register rax_pc = rax;
-      __ pop(rax_pc);  // caller PC
-      __ mov(rsp, saved_last_sp);  // cut the stack back to where the caller started
-
-      Register rbx_method = rbx_temp;
-      __ movptr(rbx_method, ExternalAddress((address) &_raise_exception_method));
-
-      const int jobject_oop_offset = 0;
-      __ movptr(rbx_method, Address(rbx_method, jobject_oop_offset));  // dereference the jobject
-
-      __ movptr(saved_last_sp, rsp);
-      __ subptr(rsp, 3 * wordSize);
-      __ push(rax_pc);         // restore caller PC
-
-      __ movl  (__ argument_address(constant(2)), rarg0_code);
-      __ movptr(__ argument_address(constant(1)), rarg1_actual);
-      __ movptr(__ argument_address(constant(0)), rarg2_required);
-      jump_from_method_handle(_masm, rbx_method, rax);
-    }
-    break;
-
-  case _invokestatic_mh:
-  case _invokespecial_mh:
-    {
-      Register rbx_method = rbx_temp;
-      __ load_heap_oop(rbx_method, rcx_mh_vmtarget); // target is a methodOop
-      __ verify_oop(rbx_method);
-      // same as TemplateTable::invokestatic or invokespecial,
-      // minus the CP setup and profiling:
-      if (ek == _invokespecial_mh) {
-        // Must load & check the first argument before entering the target method.
-        __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
-        __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
-        __ null_check(rcx_recv);
-        __ verify_oop(rcx_recv);
-      }
-      jump_from_method_handle(_masm, rbx_method, rax);
-    }
-    break;
-
-  case _invokevirtual_mh:
-    {
-      // same as TemplateTable::invokevirtual,
-      // minus the CP setup and profiling:
-
-      // pick out the vtable index and receiver offset from the MH,
-      // and then we can discard it:
-      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
-      Register rbx_index = rbx_temp;
-      __ movl(rbx_index, rcx_dmh_vmindex);
-      // Note:  The verifier allows us to ignore rcx_mh_vmtarget.
-      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
-      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());
-
-      // get receiver klass
-      Register rax_klass = rax_argslot;
-      __ load_klass(rax_klass, rcx_recv);
-      __ verify_oop(rax_klass);
-
-      // get target methodOop & entry point
-      const int base = instanceKlass::vtable_start_offset() * wordSize;
-      assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
-      Address vtable_entry_addr(rax_klass,
-                                rbx_index, Address::times_ptr,
-                                base + vtableEntry::method_offset_in_bytes());
-      Register rbx_method = rbx_temp;
-      __ movptr(rbx_method, vtable_entry_addr);
-
-      __ verify_oop(rbx_method);
-      jump_from_method_handle(_masm, rbx_method, rax);
-    }
-    break;
-
-  case _invokeinterface_mh:
-    {
-      // same as TemplateTable::invokeinterface,
-      // minus the CP setup and profiling:
-
-      // pick out the interface and itable index from the MH.
-      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
-      Register rdx_intf  = rdx_temp;
-      Register rbx_index = rbx_temp;
-      __ load_heap_oop(rdx_intf, rcx_mh_vmtarget);
-      __ movl(rbx_index, rcx_dmh_vmindex);
-      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
-      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());
-
-      // get receiver klass
-      Register rax_klass = rax_argslot;
-      __ load_klass(rax_klass, rcx_recv);
-      __ verify_oop(rax_klass);
-
-      Register rbx_method = rbx_index;
-
-      // get interface klass
-      Label no_such_interface;
-      __ verify_oop(rdx_intf);
-      __ lookup_interface_method(rax_klass, rdx_intf,
-                                 // note: next two args must be the same:
-                                 rbx_index, rbx_method,
-                                 rdi_temp,
-                                 no_such_interface);
-
-      __ verify_oop(rbx_method);
-      jump_from_method_handle(_masm, rbx_method, rax);
-      __ hlt();
-
-      __ bind(no_such_interface);
-      // Throw an exception.
-      // For historical reasons, it will be IncompatibleClassChangeError.
-      __ mov(rbx_temp, rcx_recv);  // rarg2_required might be RCX
-      assert_different_registers(rarg2_required, rbx_temp);
-      __ movptr(rarg2_required, Address(rdx_intf, java_mirror_offset));  // required interface
-      __ mov(   rarg1_actual,   rbx_temp);                               // bad receiver
-      __ movl(  rarg0_code,     (int) Bytecodes::_invokeinterface);      // who is complaining?
-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
-    }
-    break;
-
-  case _bound_ref_mh:
-  case _bound_int_mh:
-  case _bound_long_mh:
-  case _bound_ref_direct_mh:
-  case _bound_int_direct_mh:
-  case _bound_long_direct_mh:
-    {
-      const bool direct_to_method = (ek >= _bound_ref_direct_mh);
-      BasicType arg_type  = ek_bound_mh_arg_type(ek);
-      int       arg_slots = type2size[arg_type];
-
-      // make room for the new argument:
-      __ movl(rax_argslot, rcx_bmh_vmargslot);
-      __ lea(rax_argslot, __ argument_address(rax_argslot));
-
-      insert_arg_slots(_masm, arg_slots * stack_move_unit(), rax_argslot, rbx_temp, rdx_temp);
-
-      // store bound argument into the new stack slot:
-      __ load_heap_oop(rbx_temp, rcx_bmh_argument);
-      if (arg_type == T_OBJECT) {
-        __ movptr(Address(rax_argslot, 0), rbx_temp);
-      } else {
-        Address prim_value_addr(rbx_temp, java_lang_boxing_object::value_offset_in_bytes(arg_type));
-        move_typed_arg(_masm, arg_type, false,
-                       Address(rax_argslot, 0),
-                       prim_value_addr,
-                       rbx_temp, rdx_temp);
-      }
-
-      if (direct_to_method) {
-        Register rbx_method = rbx_temp;
-        __ load_heap_oop(rbx_method, rcx_mh_vmtarget);
-        __ verify_oop(rbx_method);
-        jump_from_method_handle(_masm, rbx_method, rax);
-      } else {
-        __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-        __ verify_oop(rcx_recv);
-        __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-      }
-    }
-    break;
-
-  case _adapter_opt_profiling:
-    if (java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes() != 0) {
-      Address rcx_mh_vmcount(rcx_recv, java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes());
-      __ incrementl(rcx_mh_vmcount);
-    }
-    // fall through
-
-  case _adapter_retype_only:
-  case _adapter_retype_raw:
-    // immediately jump to the next MH layer:
-    __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-    __ verify_oop(rcx_recv);
-    __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    // This is OK when all parameter types widen.
-    // It is also OK when a return type narrows.
-    break;
-
-  case _adapter_check_cast:
-    {
-      // temps:
-      Register rbx_klass = rbx_temp; // interesting AMH data
-
-      // check a reference argument before jumping to the next layer of MH:
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-      vmarg = __ argument_address(rax_argslot);
-
-      // What class are we casting to?
-      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
-      load_klass_from_Class(_masm, rbx_klass);
-
-      Label done;
-      __ movptr(rdx_temp, vmarg);
-      __ testptr(rdx_temp, rdx_temp);
-      __ jcc(Assembler::zero, done);         // no cast if null
-      __ load_klass(rdx_temp, rdx_temp);
-
-      // live at this point:
-      // - rbx_klass:  klass required by the target method
-      // - rdx_temp:   argument klass to test
-      // - rcx_recv:   adapter method handle
-      __ check_klass_subtype(rdx_temp, rbx_klass, rax_argslot, done);
-
-      // If we get here, the type check failed!
-      // Call the wrong_method_type stub, passing the failing argument type in rax.
-      Register rax_mtype = rax_argslot;
-      __ movl(rax_argslot, rcx_amh_vmargslot);  // reload argslot field
-      __ movptr(rdx_temp, vmarg);
-
-      assert_different_registers(rarg2_required, rdx_temp);
-      __ load_heap_oop(rarg2_required, rcx_amh_argument);             // required class
-      __ mov(          rarg1_actual,   rdx_temp);                     // bad object
-      __ movl(         rarg0_code,     (int) Bytecodes::_checkcast);  // who is complaining?
-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
-
-      __ bind(done);
-      // get the new MH:
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    }
-    break;
-
-  case _adapter_prim_to_prim:
-  case _adapter_ref_to_prim:
-  case _adapter_prim_to_ref:
-    // handled completely by optimized cases
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_i2i:        // optimized subcase of adapt_prim_to_prim
-//case _adapter_opt_f2i:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_l2i:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_unboxi:     // optimized subcase of adapt_ref_to_prim
-    {
-      // perform an in-place conversion to int or an int subword
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-      vmarg = __ argument_address(rax_argslot);
-
-      switch (ek) {
-      case _adapter_opt_i2i:
-        __ movl(rdx_temp, vmarg);
-        break;
-      case _adapter_opt_l2i:
-        {
-          // just delete the extra slot; on a little-endian machine we keep the first
-          __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
-          remove_arg_slots(_masm, -stack_move_unit(),
-                           rax_argslot, rbx_temp, rdx_temp);
-          vmarg = Address(rax_argslot, -Interpreter::stackElementSize);
-          __ movl(rdx_temp, vmarg);
-        }
-        break;
-      case _adapter_opt_unboxi:
-        {
-          // Load the value up from the heap.
-          __ movptr(rdx_temp, vmarg);
-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
-#ifdef ASSERT
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt)))
-              assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), "");
-          }
-#endif
-          __ null_check(rdx_temp, value_offset);
-          __ movl(rdx_temp, Address(rdx_temp, value_offset));
-          // We load this as a word.  Because we are little-endian,
-          // the low bits will be correct, but the high bits may need cleaning.
-          // The vminfo will guide us to clean those bits.
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-      }
-
-      // Do the requested conversion and store the value.
-      Register rbx_vminfo = rbx_temp;
-      load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
-
-      // get the new MH:
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      // (now we are done with the old MH)
-
-      // original 32-bit vmdata word must be of this form:
-      //    | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
-      __ xchgptr(rcx, rbx_vminfo);                // free rcx for shifts
-      __ shll(rdx_temp /*, rcx*/);
-      Label zero_extend, done;
-      __ testl(rcx, CONV_VMINFO_SIGN_FLAG);
-      __ jccb(Assembler::zero, zero_extend);
-
-      // this path is taken for int->byte, int->short
-      __ sarl(rdx_temp /*, rcx*/);
-      __ jmpb(done);
-
-      __ bind(zero_extend);
-      // this is taken for int->char
-      __ shrl(rdx_temp /*, rcx*/);
-
-      __ bind(done);
-      __ movl(vmarg, rdx_temp);  // Store the value.
-      __ xchgptr(rcx, rbx_vminfo);                // restore rcx_recv
-
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    }
-    break;
-
-  case _adapter_opt_i2l:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_unboxl:     // optimized subcase of adapt_ref_to_prim
-    {
-      // perform an in-place int-to-long or ref-to-long conversion
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-
-      // on a little-endian machine we keep the first slot and add another after
-      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
-      insert_arg_slots(_masm, stack_move_unit(),
-                       rax_argslot, rbx_temp, rdx_temp);
-      Address vmarg1(rax_argslot, -Interpreter::stackElementSize);
-      Address vmarg2 = vmarg1.plus_disp(Interpreter::stackElementSize);
-
-      switch (ek) {
-      case _adapter_opt_i2l:
-        {
-#ifdef _LP64
-          __ movslq(rdx_temp, vmarg1);  // Load sign-extended
-          __ movq(vmarg1, rdx_temp);    // Store into first slot
-#else
-          __ movl(rdx_temp, vmarg1);
-          __ sarl(rdx_temp, BitsPerInt - 1);  // __ extend_sign()
-          __ movl(vmarg2, rdx_temp); // store second word
-#endif
-        }
-        break;
-      case _adapter_opt_unboxl:
-        {
-          // Load the value up from the heap.
-          __ movptr(rdx_temp, vmarg1);
-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG);
-          assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), "");
-          __ null_check(rdx_temp, value_offset);
-#ifdef _LP64
-          __ movq(rbx_temp, Address(rdx_temp, value_offset));
-          __ movq(vmarg1, rbx_temp);
-#else
-          __ movl(rbx_temp, Address(rdx_temp, value_offset + 0*BytesPerInt));
-          __ movl(rdx_temp, Address(rdx_temp, value_offset + 1*BytesPerInt));
-          __ movl(vmarg1, rbx_temp);
-          __ movl(vmarg2, rdx_temp);
-#endif
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-      }
-
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    }
-    break;
-
-  case _adapter_opt_f2d:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_d2f:        // optimized subcase of adapt_prim_to_prim
-    {
-      // perform an in-place floating primitive conversion
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
-      if (ek == _adapter_opt_f2d) {
-        insert_arg_slots(_masm, stack_move_unit(),
-                         rax_argslot, rbx_temp, rdx_temp);
-      }
-      Address vmarg(rax_argslot, -Interpreter::stackElementSize);
-
-#ifdef _LP64
-      if (ek == _adapter_opt_f2d) {
-        __ movflt(xmm0, vmarg);
-        __ cvtss2sd(xmm0, xmm0);
-        __ movdbl(vmarg, xmm0);
-      } else {
-        __ movdbl(xmm0, vmarg);
-        __ cvtsd2ss(xmm0, xmm0);
-        __ movflt(vmarg, xmm0);
-      }
-#else //_LP64
-      if (ek == _adapter_opt_f2d) {
-        __ fld_s(vmarg);        // load float to ST0
-        __ fstp_d(vmarg);       // store double
-      } else {
-        __ fld_d(vmarg);        // load double to ST0
-        __ fstp_s(vmarg);       // store single
-      }
-#endif //_LP64
-
-      if (ek == _adapter_opt_d2f) {
-        remove_arg_slots(_masm, -stack_move_unit(),
-                         rax_argslot, rbx_temp, rdx_temp);
-      }
-
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    }
-    break;
-
-  case _adapter_swap_args:
-  case _adapter_rot_args:
-    // handled completely by optimized cases
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_swap_1:
-  case _adapter_opt_swap_2:
-  case _adapter_opt_rot_1_up:
-  case _adapter_opt_rot_1_down:
-  case _adapter_opt_rot_2_up:
-  case _adapter_opt_rot_2_down:
-    {
-      int swap_slots = ek_adapter_opt_swap_slots(ek);
-      int rotate     = ek_adapter_opt_swap_mode(ek);
-
-      // 'argslot' is the position of the first argument to swap
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-      __ lea(rax_argslot, __ argument_address(rax_argslot));
-
-      // 'vminfo' is the second
-      Register rbx_destslot = rbx_temp;
-      load_conversion_vminfo(_masm, rbx_destslot, rcx_amh_conversion);
-      __ lea(rbx_destslot, __ argument_address(rbx_destslot));
-      if (VerifyMethodHandles)
-        verify_argslot(_masm, rbx_destslot, "swap point must fall within current frame");
-
-      assert(Interpreter::stackElementSize == wordSize, "else rethink use of wordSize here");
-      if (!rotate) {
-        // simple swap
-        for (int i = 0; i < swap_slots; i++) {
-          __ movptr(rdi_temp, Address(rax_argslot,  i * wordSize));
-          __ movptr(rdx_temp, Address(rbx_destslot, i * wordSize));
-          __ movptr(Address(rax_argslot,  i * wordSize), rdx_temp);
-          __ movptr(Address(rbx_destslot, i * wordSize), rdi_temp);
-        }
-      } else {
-        // A rotate is actually pair of moves, with an "odd slot" (or pair)
-        // changing place with a series of other slots.
-        // First, push the "odd slot", which is going to get overwritten
-        for (int i = swap_slots - 1; i >= 0; i--) {
-          // handle one with rdi_temp instead of a push:
-          if (i == 0)  __ movptr(rdi_temp, Address(rax_argslot, i * wordSize));
-          else         __ pushptr(         Address(rax_argslot, i * wordSize));
-        }
-        if (rotate > 0) {
-          // Here is rotate > 0:
-          // (low mem)                                          (high mem)
-          //     | dest:     more_slots...     | arg: odd_slot :arg+1 |
-          // =>
-          //     | dest: odd_slot | dest+1: more_slots...      :arg+1 |
-          // work argslot down to destslot, copying contiguous data upwards
-          // pseudo-code:
-          //   rax = src_addr - swap_bytes
-          //   rbx = dest_addr
-          //   while (rax >= rbx) *(rax + swap_bytes) = *(rax + 0), rax--;
-          move_arg_slots_up(_masm,
-                            rbx_destslot,
-                            Address(rax_argslot, 0),
-                            swap_slots,
-                            rax_argslot, rdx_temp);
-        } else {
-          // Here is the other direction, rotate < 0:
-          // (low mem)                                          (high mem)
-          //     | arg: odd_slot | arg+1: more_slots...       :dest+1 |
-          // =>
-          //     | arg:    more_slots...     | dest: odd_slot :dest+1 |
-          // work argslot up to destslot, copying contiguous data downwards
-          // pseudo-code:
-          //   rax = src_addr + swap_bytes
-          //   rbx = dest_addr
-          //   while (rax <= rbx) *(rax - swap_bytes) = *(rax + 0), rax++;
-          // dest_slot denotes an exclusive upper limit
-          int limit_bias = OP_ROT_ARGS_DOWN_LIMIT_BIAS;
-          if (limit_bias != 0)
-            __ addptr(rbx_destslot, - limit_bias * wordSize);
-          move_arg_slots_down(_masm,
-                              Address(rax_argslot, swap_slots * wordSize),
-                              rbx_destslot,
-                              -swap_slots,
-                              rax_argslot, rdx_temp);
-          __ subptr(rbx_destslot, swap_slots * wordSize);
-        }
-        // pop the original first chunk into the destination slot, now free
-        for (int i = 0; i < swap_slots; i++) {
-          if (i == 0)  __ movptr(Address(rbx_destslot, i * wordSize), rdi_temp);
-          else         __ popptr(Address(rbx_destslot, i * wordSize));
-        }
-      }
-
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    }
-    break;
-
-  case _adapter_dup_args:
-    {
-      // 'argslot' is the position of the first argument to duplicate
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-      __ lea(rax_argslot, __ argument_address(rax_argslot));
-
-      // 'stack_move' is negative number of words to duplicate
-      Register rdi_stack_move = rdi_temp;
-      load_stack_move(_masm, rdi_stack_move, rcx_recv, true);
-
-      if (VerifyMethodHandles) {
-        verify_argslots(_masm, rdi_stack_move, rax_argslot, true,
-                        "copied argument(s) must fall within current frame");
-      }
-
-      // insert location is always the bottom of the argument list:
-      Address insert_location = __ argument_address(constant(0));
-      int pre_arg_words = insert_location.disp() / wordSize;   // return PC is pushed
-      assert(insert_location.base() == rsp, "");
-
-      __ negl(rdi_stack_move);
-      push_arg_slots(_masm, rax_argslot, rdi_stack_move,
-                     pre_arg_words, rbx_temp, rdx_temp);
-
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    }
-    break;
-
-  case _adapter_drop_args:
-    {
-      // 'argslot' is the position of the first argument to nuke
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-      __ lea(rax_argslot, __ argument_address(rax_argslot));
-
-      // (must do previous push after argslot address is taken)
-
-      // 'stack_move' is number of words to drop
-      Register rdi_stack_move = rdi_temp;
-      load_stack_move(_masm, rdi_stack_move, rcx_recv, false);
-      remove_arg_slots(_masm, rdi_stack_move,
-                       rax_argslot, rbx_temp, rdx_temp);
-
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-    }
-    break;
-
-  case _adapter_collect_args:
-  case _adapter_fold_args:
-  case _adapter_spread_args:
-    // handled completely by optimized cases
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_collect_ref:
-  case _adapter_opt_collect_int:
-  case _adapter_opt_collect_long:
-  case _adapter_opt_collect_float:
-  case _adapter_opt_collect_double:
-  case _adapter_opt_collect_void:
-  case _adapter_opt_collect_0_ref:
-  case _adapter_opt_collect_1_ref:
-  case _adapter_opt_collect_2_ref:
-  case _adapter_opt_collect_3_ref:
-  case _adapter_opt_collect_4_ref:
-  case _adapter_opt_collect_5_ref:
-  case _adapter_opt_filter_S0_ref:
-  case _adapter_opt_filter_S1_ref:
-  case _adapter_opt_filter_S2_ref:
-  case _adapter_opt_filter_S3_ref:
-  case _adapter_opt_filter_S4_ref:
-  case _adapter_opt_filter_S5_ref:
-  case _adapter_opt_collect_2_S0_ref:
-  case _adapter_opt_collect_2_S1_ref:
-  case _adapter_opt_collect_2_S2_ref:
-  case _adapter_opt_collect_2_S3_ref:
-  case _adapter_opt_collect_2_S4_ref:
-  case _adapter_opt_collect_2_S5_ref:
-  case _adapter_opt_fold_ref:
-  case _adapter_opt_fold_int:
-  case _adapter_opt_fold_long:
-  case _adapter_opt_fold_float:
-  case _adapter_opt_fold_double:
-  case _adapter_opt_fold_void:
-  case _adapter_opt_fold_1_ref:
-  case _adapter_opt_fold_2_ref:
-  case _adapter_opt_fold_3_ref:
-  case _adapter_opt_fold_4_ref:
-  case _adapter_opt_fold_5_ref:
-    {
-      // Given a fresh incoming stack frame, build a new ricochet frame.
-      // On entry, TOS points at a return PC, and RBP is the callers frame ptr.
-      // RSI/R13 has the caller's exact stack pointer, which we must also preserve.
-      // RCX contains an AdapterMethodHandle of the indicated kind.
-
-      // Relevant AMH fields:
-      // amh.vmargslot:
-      //   points to the trailing edge of the arguments
-      //   to filter, collect, or fold.  For a boxing operation,
-      //   it points just after the single primitive value.
-      // amh.argument:
-      //   recursively called MH, on |collect| arguments
-      // amh.vmtarget:
-      //   final destination MH, on return value, etc.
-      // amh.conversion.dest:
-      //   tells what is the type of the return value
-      //   (not needed here, since dest is also derived from ek)
-      // amh.conversion.vminfo:
-      //   points to the trailing edge of the return value
-      //   when the vmtarget is to be called; this is
-      //   equal to vmargslot + (retained ? |collect| : 0)
-
-      // Pass 0 or more argument slots to the recursive target.
-      int collect_count_constant = ek_adapter_opt_collect_count(ek);
-
-      // The collected arguments are copied from the saved argument list:
-      int collect_slot_constant = ek_adapter_opt_collect_slot(ek);
-
-      assert(ek_orig == _adapter_collect_args ||
-             ek_orig == _adapter_fold_args, "");
-      bool retain_original_args = (ek_orig == _adapter_fold_args);
-
-      // The return value is replaced (or inserted) at the 'vminfo' argslot.
-      // Sometimes we can compute this statically.
-      int dest_slot_constant = -1;
-      if (!retain_original_args)
-        dest_slot_constant = collect_slot_constant;
-      else if (collect_slot_constant >= 0 && collect_count_constant >= 0)
-        // We are preserving all the arguments, and the return value is prepended,
-        // so the return slot is to the left (above) the |collect| sequence.
-        dest_slot_constant = collect_slot_constant + collect_count_constant;
-
-      // Replace all those slots by the result of the recursive call.
-      // The result type can be one of ref, int, long, float, double, void.
-      // In the case of void, nothing is pushed on the stack after return.
-      BasicType dest = ek_adapter_opt_collect_type(ek);
-      assert(dest == type2wfield[dest], "dest is a stack slot type");
-      int dest_count = type2size[dest];
-      assert(dest_count == 1 || dest_count == 2 || (dest_count == 0 && dest == T_VOID), "dest has a size");
-
-      // Choose a return continuation.
-      EntryKind ek_ret = _adapter_opt_return_any;
-      if (dest != T_CONFLICT && OptimizeMethodHandles) {
-        switch (dest) {
-        case T_INT    : ek_ret = _adapter_opt_return_int;     break;
-        case T_LONG   : ek_ret = _adapter_opt_return_long;    break;
-        case T_FLOAT  : ek_ret = _adapter_opt_return_float;   break;
-        case T_DOUBLE : ek_ret = _adapter_opt_return_double;  break;
-        case T_OBJECT : ek_ret = _adapter_opt_return_ref;     break;
-        case T_VOID   : ek_ret = _adapter_opt_return_void;    break;
-        default       : ShouldNotReachHere();
-        }
-        if (dest == T_OBJECT && dest_slot_constant >= 0) {
-          EntryKind ek_try = EntryKind(_adapter_opt_return_S0_ref + dest_slot_constant);
-          if (ek_try <= _adapter_opt_return_LAST &&
-              ek_adapter_opt_return_slot(ek_try) == dest_slot_constant) {
-            ek_ret = ek_try;
-          }
-        }
-        assert(ek_adapter_opt_return_type(ek_ret) == dest, "");
-      }
-
-      // Already pushed:  ... keep1 | collect | keep2 | sender_pc |
-      // push(sender_pc);
-
-      // Compute argument base:
-      Register rax_argv = rax_argslot;
-      __ lea(rax_argv, __ argument_address(constant(0)));
-
-      // Push a few extra argument words, if we need them to store the return value.
-      {
-        int extra_slots = 0;
-        if (retain_original_args) {
-          extra_slots = dest_count;
-        } else if (collect_count_constant == -1) {
-          extra_slots = dest_count;  // collect_count might be zero; be generous
-        } else if (dest_count > collect_count_constant) {
-          extra_slots = (dest_count - collect_count_constant);
-        } else {
-          // else we know we have enough dead space in |collect| to repurpose for return values
-        }
-        DEBUG_ONLY(extra_slots += 1);
-        if (extra_slots > 0) {
-          __ pop(rbx_temp);   // return value
-          __ subptr(rsp, (extra_slots * Interpreter::stackElementSize));
-          // Push guard word #2 in debug mode.
-          DEBUG_ONLY(__ movptr(Address(rsp, 0), (int32_t) RicochetFrame::MAGIC_NUMBER_2));
-          __ push(rbx_temp);
-        }
-      }
-
-      RicochetFrame::enter_ricochet_frame(_masm, rcx_recv, rax_argv,
-                                          entry(ek_ret)->from_interpreted_entry(), rbx_temp);
-
-      // Now pushed:  ... keep1 | collect | keep2 | RF |
-      // some handy frame slots:
-      Address exact_sender_sp_addr = RicochetFrame::frame_address(RicochetFrame::exact_sender_sp_offset_in_bytes());
-      Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
-      Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
-
-#ifdef ASSERT
-      if (VerifyMethodHandles && dest != T_CONFLICT) {
-        BLOCK_COMMENT("verify AMH.conv.dest");
-        load_conversion_dest_type(_masm, rbx_temp, conversion_addr);
-        Label L_dest_ok;
-        __ cmpl(rbx_temp, (int) dest);
-        __ jcc(Assembler::equal, L_dest_ok);
-        if (dest == T_INT) {
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt))) {
-              __ cmpl(rbx_temp, (int) bt);
-              __ jcc(Assembler::equal, L_dest_ok);
-            }
-          }
-        }
-        __ stop("bad dest in AMH.conv");
-        __ BIND(L_dest_ok);
-      }
-#endif //ASSERT
-
-      // Find out where the original copy of the recursive argument sequence begins.
-      Register rax_coll = rax_argv;
-      {
-        RegisterOrConstant collect_slot = collect_slot_constant;
-        if (collect_slot_constant == -1) {
-          __ movl(rdi_temp, rcx_amh_vmargslot);
-          collect_slot = rdi_temp;
-        }
-        if (collect_slot_constant != 0)
-          __ lea(rax_coll, Address(rax_argv, collect_slot, Interpreter::stackElementScale()));
-        // rax_coll now points at the trailing edge of |collect| and leading edge of |keep2|
-      }
-
-      // Replace the old AMH with the recursive MH.  (No going back now.)
-      // In the case of a boxing call, the recursive call is to a 'boxer' method,
-      // such as Integer.valueOf or Long.valueOf.  In the case of a filter
-      // or collect call, it will take one or more arguments, transform them,
-      // and return some result, to store back into argument_base[vminfo].
-      __ load_heap_oop(rcx_recv, rcx_amh_argument);
-      if (VerifyMethodHandles)  verify_method_handle(_masm, rcx_recv);
-
-      // Push a space for the recursively called MH first:
-      __ push((int32_t)NULL_WORD);
-
-      // Calculate |collect|, the number of arguments we are collecting.
-      Register rdi_collect_count = rdi_temp;
-      RegisterOrConstant collect_count;
-      if (collect_count_constant >= 0) {
-        collect_count = collect_count_constant;
-      } else {
-        __ load_method_handle_vmslots(rdi_collect_count, rcx_recv, rdx_temp);
-        collect_count = rdi_collect_count;
-      }
-#ifdef ASSERT
-      if (VerifyMethodHandles && collect_count_constant >= 0) {
-        __ load_method_handle_vmslots(rbx_temp, rcx_recv, rdx_temp);
-        Label L_count_ok;
-        __ cmpl(rbx_temp, collect_count_constant);
-        __ jcc(Assembler::equal, L_count_ok);
-        __ stop("bad vminfo in AMH.conv");
-        __ BIND(L_count_ok);
-      }
-#endif //ASSERT
-
-      // copy |collect| slots directly to TOS:
-      push_arg_slots(_masm, rax_coll, collect_count, 0, rbx_temp, rdx_temp);
-      // Now pushed:  ... keep1 | collect | keep2 | RF... | collect |
-      // rax_coll still points at the trailing edge of |collect| and leading edge of |keep2|
-
-      // If necessary, adjust the saved arguments to make room for the eventual return value.
-      // Normal adjustment:  ... keep1 | +dest+ | -collect- | keep2 | RF... | collect |
-      // If retaining args:  ... keep1 | +dest+ |  collect  | keep2 | RF... | collect |
-      // In the non-retaining case, this might move keep2 either up or down.
-      // We don't have to copy the whole | RF... collect | complex,
-      // but we must adjust RF.saved_args_base.
-      // Also, from now on, we will forget about the original copy of |collect|.
-      // If we are retaining it, we will treat it as part of |keep2|.
-      // For clarity we will define |keep3| = |collect|keep2| or |keep2|.
-
-      BLOCK_COMMENT("adjust trailing arguments {");
-      // Compare the sizes of |+dest+| and |-collect-|, which are opposed opening and closing movements.
-      int                open_count  = dest_count;
-      RegisterOrConstant close_count = collect_count_constant;
-      Register rdi_close_count = rdi_collect_count;
-      if (retain_original_args) {
-        close_count = constant(0);
-      } else if (collect_count_constant == -1) {
-        close_count = rdi_collect_count;
-      }
-
-      // How many slots need moving?  This is simply dest_slot (0 => no |keep3|).
-      RegisterOrConstant keep3_count;
-      Register rsi_keep3_count = rsi;  // can repair from RF.exact_sender_sp
-      if (dest_slot_constant >= 0) {
-        keep3_count = dest_slot_constant;
-      } else  {
-        load_conversion_vminfo(_masm, rsi_keep3_count, conversion_addr);
-        keep3_count = rsi_keep3_count;
-      }
-#ifdef ASSERT
-      if (VerifyMethodHandles && dest_slot_constant >= 0) {
-        load_conversion_vminfo(_masm, rbx_temp, conversion_addr);
-        Label L_vminfo_ok;
-        __ cmpl(rbx_temp, dest_slot_constant);
-        __ jcc(Assembler::equal, L_vminfo_ok);
-        __ stop("bad vminfo in AMH.conv");
-        __ BIND(L_vminfo_ok);
-      }
-#endif //ASSERT
-
-      // tasks remaining:
-      bool move_keep3 = (!keep3_count.is_constant() || keep3_count.as_constant() != 0);
-      bool stomp_dest = (NOT_DEBUG(dest == T_OBJECT) DEBUG_ONLY(dest_count != 0));
-      bool fix_arg_base = (!close_count.is_constant() || open_count != close_count.as_constant());
-
-      if (stomp_dest | fix_arg_base) {
-        // we will probably need an updated rax_argv value
-        if (collect_slot_constant >= 0) {
-          // rax_coll already holds the leading edge of |keep2|, so tweak it
-          assert(rax_coll == rax_argv, "elided a move");
-          if (collect_slot_constant != 0)
-            __ subptr(rax_argv, collect_slot_constant * Interpreter::stackElementSize);
-        } else {
-          // Just reload from RF.saved_args_base.
-          __ movptr(rax_argv, saved_args_base_addr);
-        }
-      }
-
-      // Old and new argument locations (based at slot 0).
-      // Net shift (&new_argv - &old_argv) is (close_count - open_count).
-      bool zero_open_count = (open_count == 0);  // remember this bit of info
-      if (move_keep3 && fix_arg_base) {
-        // It will be easier to have everything in one register:
-        if (close_count.is_register()) {
-          // Deduct open_count from close_count register to get a clean +/- value.
-          __ subptr(close_count.as_register(), open_count);
-        } else {
-          close_count = close_count.as_constant() - open_count;
-        }
-        open_count = 0;
-      }
-      Address old_argv(rax_argv, 0);
-      Address new_argv(rax_argv, close_count,  Interpreter::stackElementScale(),
-                                - open_count * Interpreter::stackElementSize);
-
-      // First decide if any actual data are to be moved.
-      // We can skip if (a) |keep3| is empty, or (b) the argument list size didn't change.
-      // (As it happens, all movements involve an argument list size change.)
-
-      // If there are variable parameters, use dynamic checks to skip around the whole mess.
-      Label L_done;
-      if (!keep3_count.is_constant()) {
-        __ testl(keep3_count.as_register(), keep3_count.as_register());
-        __ jcc(Assembler::zero, L_done);
-      }
-      if (!close_count.is_constant()) {
-        __ cmpl(close_count.as_register(), open_count);
-        __ jcc(Assembler::equal, L_done);
-      }
-
-      if (move_keep3 && fix_arg_base) {
-        bool emit_move_down = false, emit_move_up = false, emit_guard = false;
-        if (!close_count.is_constant()) {
-          emit_move_down = emit_guard = !zero_open_count;
-          emit_move_up   = true;
-        } else if (open_count != close_count.as_constant()) {
-          emit_move_down = (open_count > close_count.as_constant());
-          emit_move_up   = !emit_move_down;
-        }
-        Label L_move_up;
-        if (emit_guard) {
-          __ cmpl(close_count.as_register(), open_count);
-          __ jcc(Assembler::greater, L_move_up);
-        }
-
-        if (emit_move_down) {
-          // Move arguments down if |+dest+| > |-collect-|
-          // (This is rare, except when arguments are retained.)
-          // This opens space for the return value.
-          if (keep3_count.is_constant()) {
-            for (int i = 0; i < keep3_count.as_constant(); i++) {
-              __ movptr(rdx_temp, old_argv.plus_disp(i * Interpreter::stackElementSize));
-              __ movptr(          new_argv.plus_disp(i * Interpreter::stackElementSize), rdx_temp);
-            }
-          } else {
-            Register rbx_argv_top = rbx_temp;
-            __ lea(rbx_argv_top, old_argv.plus_disp(keep3_count, Interpreter::stackElementScale()));
-            move_arg_slots_down(_masm,
-                                old_argv,     // beginning of old argv
-                                rbx_argv_top, // end of old argv
-                                close_count,  // distance to move down (must be negative)
-                                rax_argv, rdx_temp);
-            // Used argv as an iteration variable; reload from RF.saved_args_base.
-            __ movptr(rax_argv, saved_args_base_addr);
-          }
-        }
-
-        if (emit_guard) {
-          __ jmp(L_done);  // assumes emit_move_up is true also
-          __ BIND(L_move_up);
-        }
-
-        if (emit_move_up) {
-
-          // Move arguments up if |+dest+| < |-collect-|
-          // (This is usual, except when |keep3| is empty.)
-          // This closes up the space occupied by the now-deleted collect values.
-          if (keep3_count.is_constant()) {
-            for (int i = keep3_count.as_constant() - 1; i >= 0; i--) {
-              __ movptr(rdx_temp, old_argv.plus_disp(i * Interpreter::stackElementSize));
-              __ movptr(          new_argv.plus_disp(i * Interpreter::stackElementSize), rdx_temp);
-            }
-          } else {
-            Address argv_top = old_argv.plus_disp(keep3_count, Interpreter::stackElementScale());
-            move_arg_slots_up(_masm,
-                              rax_argv,     // beginning of old argv
-                              argv_top,     // end of old argv
-                              close_count,  // distance to move up (must be positive)
-                              rbx_temp, rdx_temp);
-          }
-        }
-      }
-      __ BIND(L_done);
-
-      if (fix_arg_base) {
-        // adjust RF.saved_args_base by adding (close_count - open_count)
-        if (!new_argv.is_same_address(Address(rax_argv, 0)))
-          __ lea(rax_argv, new_argv);
-        __ movptr(saved_args_base_addr, rax_argv);
-      }
-
-      if (stomp_dest) {
-        // Stomp the return slot, so it doesn't hold garbage.
-        // This isn't strictly necessary, but it may help detect bugs.
-        int forty_two = RicochetFrame::RETURN_VALUE_PLACEHOLDER;
-        __ movptr(Address(rax_argv, keep3_count, Address::times_ptr),
-                  (int32_t) forty_two);
-        // uses rsi_keep3_count
-      }
-      BLOCK_COMMENT("} adjust trailing arguments");
-
-      BLOCK_COMMENT("do_recursive_call");
-      __ mov(saved_last_sp, rsp);    // set rsi/r13 for callee
-      __ pushptr(ExternalAddress(SharedRuntime::ricochet_blob()->bounce_addr()).addr());
-      // The globally unique bounce address has two purposes:
-      // 1. It helps the JVM recognize this frame (frame::is_ricochet_frame).
-      // 2. When returned to, it cuts back the stack and redirects control flow
-      //    to the return handler.
-      // The return handler will further cut back the stack when it takes
-      // down the RF.  Perhaps there is a way to streamline this further.
-
-      // State during recursive call:
-      // ... keep1 | dest | dest=42 | keep3 | RF... | collect | bounce_pc |
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-
-      break;
-    }
-
-  case _adapter_opt_return_ref:
-  case _adapter_opt_return_int:
-  case _adapter_opt_return_long:
-  case _adapter_opt_return_float:
-  case _adapter_opt_return_double:
-  case _adapter_opt_return_void:
-  case _adapter_opt_return_S0_ref:
-  case _adapter_opt_return_S1_ref:
-  case _adapter_opt_return_S2_ref:
-  case _adapter_opt_return_S3_ref:
-  case _adapter_opt_return_S4_ref:
-  case _adapter_opt_return_S5_ref:
-    {
-      BasicType dest_type_constant = ek_adapter_opt_return_type(ek);
-      int       dest_slot_constant = ek_adapter_opt_return_slot(ek);
-
-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-
-      if (dest_slot_constant == -1) {
-        // The current stub is a general handler for this dest_type.
-        // It can be called from _adapter_opt_return_any below.
-        // Stash the address in a little table.
-        assert((dest_type_constant & CONV_TYPE_MASK) == dest_type_constant, "oob");
-        address return_handler = __ pc();
-        _adapter_return_handlers[dest_type_constant] = return_handler;
-        if (dest_type_constant == T_INT) {
-          // do the subword types too
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt)) &&
-                _adapter_return_handlers[bt] == NULL) {
-              _adapter_return_handlers[bt] = return_handler;
-            }
-          }
-        }
-      }
-
-      Register rbx_arg_base = rbx_temp;
-      assert_different_registers(rax, rdx,  // possibly live return value registers
-                                 rdi_temp, rbx_arg_base);
-
-      Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
-      Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
-
-      __ movptr(rbx_arg_base, saved_args_base_addr);
-      RegisterOrConstant dest_slot = dest_slot_constant;
-      if (dest_slot_constant == -1) {
-        load_conversion_vminfo(_masm, rdi_temp, conversion_addr);
-        dest_slot = rdi_temp;
-      }
-      // Store the result back into the argslot.
-      // This code uses the interpreter calling sequence, in which the return value
-      // is usually left in the TOS register, as defined by InterpreterMacroAssembler::pop.
-      // There are certain irregularities with floating point values, which can be seen
-      // in TemplateInterpreterGenerator::generate_return_entry_for.
-      move_return_value(_masm, dest_type_constant, Address(rbx_arg_base, dest_slot, Interpreter::stackElementScale()));
-
-      RicochetFrame::leave_ricochet_frame(_masm, rcx_recv, rbx_arg_base, rdx_temp);
-      __ push(rdx_temp);  // repush the return PC
-
-      // Load the final target and go.
-      if (VerifyMethodHandles)  verify_method_handle(_masm, rcx_recv);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-      __ hlt(); // --------------------
-      break;
-    }
-
-  case _adapter_opt_return_any:
-    {
-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-      Register rdi_conv = rdi_temp;
-      assert_different_registers(rax, rdx,  // possibly live return value registers
-                                 rdi_conv, rbx_temp);
-
-      Address conversion_addr = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
-      load_conversion_dest_type(_masm, rdi_conv, conversion_addr);
-      __ lea(rbx_temp, ExternalAddress((address) &_adapter_return_handlers[0]));
-      __ movptr(rbx_temp, Address(rbx_temp, rdi_conv, Address::times_ptr));
-
-#ifdef ASSERT
-      { Label L_badconv;
-        __ testptr(rbx_temp, rbx_temp);
-        __ jccb(Assembler::zero, L_badconv);
-        __ jmp(rbx_temp);
-        __ bind(L_badconv);
-        __ stop("bad method handle return");
-      }
-#else //ASSERT
-      __ jmp(rbx_temp);
-#endif //ASSERT
-      break;
-    }
-
-  case _adapter_opt_spread_0:
-  case _adapter_opt_spread_1_ref:
-  case _adapter_opt_spread_2_ref:
-  case _adapter_opt_spread_3_ref:
-  case _adapter_opt_spread_4_ref:
-  case _adapter_opt_spread_5_ref:
-  case _adapter_opt_spread_ref:
-  case _adapter_opt_spread_byte:
-  case _adapter_opt_spread_char:
-  case _adapter_opt_spread_short:
-  case _adapter_opt_spread_int:
-  case _adapter_opt_spread_long:
-  case _adapter_opt_spread_float:
-  case _adapter_opt_spread_double:
-    {
-      // spread an array out into a group of arguments
-      int length_constant = ek_adapter_opt_spread_count(ek);
-      bool length_can_be_zero = (length_constant == 0);
-      if (length_constant < 0) {
-        // some adapters with variable length must handle the zero case
-        if (!OptimizeMethodHandles ||
-            ek_adapter_opt_spread_type(ek) != T_OBJECT)
-          length_can_be_zero = true;
-      }
-
-      // find the address of the array argument
-      __ movl(rax_argslot, rcx_amh_vmargslot);
-      __ lea(rax_argslot, __ argument_address(rax_argslot));
-
-      // grab another temp
-      Register rsi_temp = rsi;
-
-      // arx_argslot points both to the array and to the first output arg
-      vmarg = Address(rax_argslot, 0);
-
-      // Get the array value.
-      Register  rdi_array       = rdi_temp;
-      Register  rdx_array_klass = rdx_temp;
-      BasicType elem_type = ek_adapter_opt_spread_type(ek);
-      int       elem_slots = type2size[elem_type];  // 1 or 2
-      int       array_slots = 1;  // array is always a T_OBJECT
-      int       length_offset   = arrayOopDesc::length_offset_in_bytes();
-      int       elem0_offset    = arrayOopDesc::base_offset_in_bytes(elem_type);
-      __ movptr(rdi_array, vmarg);
-
-      Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
-      if (length_can_be_zero) {
-        // handle the null pointer case, if zero is allowed
-        Label L_skip;
-        if (length_constant < 0) {
-          load_conversion_vminfo(_masm, rbx_temp, rcx_amh_conversion);
-          __ testl(rbx_temp, rbx_temp);
-          __ jcc(Assembler::notZero, L_skip);
-        }
-        __ testptr(rdi_array, rdi_array);
-        __ jcc(Assembler::notZero, L_skip);
-
-        // If 'rsi' contains the 'saved_last_sp' (this is only the
-        // case in a 32-bit version of the VM) we have to save 'rsi'
-        // on the stack because later on (at 'L_array_is_empty') 'rsi'
-        // will be overwritten.
-        { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
-        // Also prepare a handy macro which restores 'rsi' if required.
-#define UNPUSH_RSI                                                      \
-        { if (rsi_temp == saved_last_sp)  __ pop(saved_last_sp); }
-
-        __ jmp(L_array_is_empty);
-        __ bind(L_skip);
-      }
-      __ null_check(rdi_array, oopDesc::klass_offset_in_bytes());
-      __ load_klass(rdx_array_klass, rdi_array);
-
-      // Save 'rsi' if required (see comment above).  Do this only
-      // after the null check such that the exception handler which is
-      // called in the case of a null pointer exception will not be
-      // confused by the extra value on the stack (it expects the
-      // return pointer on top of the stack)
-      { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
-
-      // Check the array type.
-      Register rbx_klass = rbx_temp;
-      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
-      load_klass_from_Class(_masm, rbx_klass);
-
-      Label ok_array_klass, bad_array_klass, bad_array_length;
-      __ check_klass_subtype(rdx_array_klass, rbx_klass, rsi_temp, ok_array_klass);
-      // If we get here, the type check failed!
-      __ jmp(bad_array_klass);
-      __ BIND(ok_array_klass);
-
-      // Check length.
-      if (length_constant >= 0) {
-        __ cmpl(Address(rdi_array, length_offset), length_constant);
-      } else {
-        Register rbx_vminfo = rbx_temp;
-        load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
-        __ cmpl(rbx_vminfo, Address(rdi_array, length_offset));
-      }
-      __ jcc(Assembler::notEqual, bad_array_length);
-
-      Register rdx_argslot_limit = rdx_temp;
-
-      // Array length checks out.  Now insert any required stack slots.
-      if (length_constant == -1) {
-        // Form a pointer to the end of the affected region.
-        __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize));
-        // 'stack_move' is negative number of words to insert
-        // This number already accounts for elem_slots.
-        Register rsi_stack_move = rsi_temp;
-        load_stack_move(_masm, rsi_stack_move, rcx_recv, true);
-        __ cmpptr(rsi_stack_move, 0);
-        assert(stack_move_unit() < 0, "else change this comparison");
-        __ jcc(Assembler::less, L_insert_arg_space);
-        __ jcc(Assembler::equal, L_copy_args);
-        // single argument case, with no array movement
-        __ BIND(L_array_is_empty);
-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
-                         rax_argslot, rbx_temp, rdx_temp);
-        __ jmp(L_args_done);  // no spreading to do
-        __ BIND(L_insert_arg_space);
-        // come here in the usual case, stack_move < 0 (2 or more spread arguments)
-        Register rdi_temp = rdi_array;  // spill this
-        insert_arg_slots(_masm, rsi_stack_move,
-                         rax_argslot, rbx_temp, rdi_temp);
-        // reload the array since rsi was killed
-        // reload from rdx_argslot_limit since rax_argslot is now decremented
-        __ movptr(rdi_array, Address(rdx_argslot_limit, -Interpreter::stackElementSize));
-      } else if (length_constant >= 1) {
-        int new_slots = (length_constant * elem_slots) - array_slots;
-        insert_arg_slots(_masm, new_slots * stack_move_unit(),
-                         rax_argslot, rbx_temp, rdx_temp);
-      } else if (length_constant == 0) {
-        __ BIND(L_array_is_empty);
-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
-                         rax_argslot, rbx_temp, rdx_temp);
-      } else {
-        ShouldNotReachHere();
-      }
-
-      // Copy from the array to the new slots.
-      // Note: Stack change code preserves integrity of rax_argslot pointer.
-      // So even after slot insertions, rax_argslot still points to first argument.
-      // Beware:  Arguments that are shallow on the stack are deep in the array,
-      // and vice versa.  So a downward-growing stack (the usual) has to be copied
-      // elementwise in reverse order from the source array.
-      __ BIND(L_copy_args);
-      if (length_constant == -1) {
-        // [rax_argslot, rdx_argslot_limit) is the area we are inserting into.
-        // Array element [0] goes at rdx_argslot_limit[-wordSize].
-        Register rdi_source = rdi_array;
-        __ lea(rdi_source, Address(rdi_array, elem0_offset));
-        Register rdx_fill_ptr = rdx_argslot_limit;
-        Label loop;
-        __ BIND(loop);
-        __ addptr(rdx_fill_ptr, -Interpreter::stackElementSize * elem_slots);
-        move_typed_arg(_masm, elem_type, true,
-                       Address(rdx_fill_ptr, 0), Address(rdi_source, 0),
-                       rbx_temp, rsi_temp);
-        __ addptr(rdi_source, type2aelembytes(elem_type));
-        __ cmpptr(rdx_fill_ptr, rax_argslot);
-        __ jcc(Assembler::above, loop);
-      } else if (length_constant == 0) {
-        // nothing to copy
-      } else {
-        int elem_offset = elem0_offset;
-        int slot_offset = length_constant * Interpreter::stackElementSize;
-        for (int index = 0; index < length_constant; index++) {
-          slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
-          move_typed_arg(_masm, elem_type, true,
-                         Address(rax_argslot, slot_offset), Address(rdi_array, elem_offset),
-                         rbx_temp, rsi_temp);
-          elem_offset += type2aelembytes(elem_type);
-        }
-      }
-      __ BIND(L_args_done);
-
-      // Arguments are spread.  Move to next method handle.
-      UNPUSH_RSI;
-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
-
-      __ bind(bad_array_klass);
-      UNPUSH_RSI;
-      assert(!vmarg.uses(rarg2_required), "must be different registers");
-      __ load_heap_oop( rarg2_required, Address(rdx_array_klass, java_mirror_offset));  // required type
-      __ movptr(        rarg1_actual,   vmarg);                                         // bad array
-      __ movl(          rarg0_code,     (int) Bytecodes::_aaload);                      // who is complaining?
-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
-
-      __ bind(bad_array_length);
-      UNPUSH_RSI;
-      assert(!vmarg.uses(rarg2_required), "must be different registers");
-      __ mov(    rarg2_required, rcx_recv);                       // AMH requiring a certain length
-      __ movptr( rarg1_actual,   vmarg);                          // bad array
-      __ movl(   rarg0_code,     (int) Bytecodes::_arraylength);  // who is complaining?
-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
-#undef UNPUSH_RSI
-
-      break;
-    }
-
-  default:
-    // do not require all platforms to recognize all adapter types
-    __ nop();
-    return;
-  }
-  BLOCK_COMMENT(err_msg("} Entry %s", entry_name(ek)));
-  __ hlt();
-
-  address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry);
-  __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-
-  init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie));
-}
--- a/src/cpu/x86/vm/methodHandles_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/methodHandles_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,266 +27,12 @@
 
 // Adapters
 enum /* platform_dependent_constants */ {
-  adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 15000)) LP64_ONLY(32000 DEBUG_ONLY(+ 120000))
-};
-
-public:
-
-// The stack just after the recursive call from a ricochet frame
-// looks something like this.  Offsets are marked in words, not bytes.
-// rsi (r13 on LP64) is part of the interpreter calling sequence
-// which tells the callee where is my real rsp (for frame walking).
-// (...lower memory addresses)
-// rsp:     [ return pc                 ]   always the global RicochetBlob::bounce_addr
-// rsp+1:   [ recursive arg N           ]
-// rsp+2:   [ recursive arg N-1         ]
-// ...
-// rsp+N:   [ recursive arg 1           ]
-// rsp+N+1: [ recursive method handle   ]
-// ...
-// rbp-6:   [ cleanup continuation pc   ]   <-- (struct RicochetFrame)
-// rbp-5:   [ saved target MH           ]   the MH we will call on the saved args
-// rbp-4:   [ saved args layout oop     ]   an int[] array which describes argument layout
-// rbp-3:   [ saved args pointer        ]   address of transformed adapter arg M (slot 0)
-// rbp-2:   [ conversion                ]   information about how the return value is used
-// rbp-1:   [ exact sender sp           ]   exact TOS (rsi/r13) of original sender frame
-// rbp+0:   [ saved sender fp           ]   (for original sender of AMH)
-// rbp+1:   [ saved sender pc           ]   (back to original sender of AMH)
-// rbp+2:   [ transformed adapter arg M ]   <-- (extended TOS of original sender)
-// rbp+3:   [ transformed adapter arg M-1]
-// ...
-// rbp+M+1: [ transformed adapter arg 1 ]
-// rbp+M+2: [ padding                   ] <-- (rbp + saved args base offset)
-// ...      [ optional padding]
-// (higher memory addresses...)
-//
-// The arguments originally passed by the original sender
-// are lost, and arbitrary amounts of stack motion might have
-// happened due to argument transformation.
-// (This is done by C2I/I2C adapters and non-direct method handles.)
-// This is why there is an unpredictable amount of memory between
-// the extended and exact TOS of the sender.
-// The ricochet adapter itself will also (in general) perform
-// transformations before the recursive call.
-//
-// The transformed and saved arguments, immediately above the saved
-// return PC, are a well-formed method handle invocation ready to execute.
-// When the GC needs to walk the stack, these arguments are described
-// via the saved arg types oop, an int[] array with a private format.
-// This array is derived from the type of the transformed adapter
-// method handle, which also sits at the base of the saved argument
-// bundle.  Since the GC may not be able to fish out the int[]
-// array, so it is pushed explicitly on the stack.  This may be
-// an unnecessary expense.
-//
-// The following register conventions are significant at this point:
-// rsp       the thread stack, as always; preserved by caller
-// rsi/r13   exact TOS of recursive frame (contents of [rbp-2])
-// rcx       recursive method handle (contents of [rsp+N+1])
-// rbp       preserved by caller (not used by caller)
-// Unless otherwise specified, all registers can be blown by the call.
-//
-// If this frame must be walked, the transformed adapter arguments
-// will be found with the help of the saved arguments descriptor.
-//
-// Therefore, the descriptor must match the referenced arguments.
-// The arguments must be followed by at least one word of padding,
-// which will be necessary to complete the final method handle call.
-// That word is not treated as holding an oop.  Neither is the word
-//
-// The word pointed to by the return argument pointer is not
-// treated as an oop, even if points to a saved argument.
-// This allows the saved argument list to have a "hole" in it
-// to receive an oop from the recursive call.
-// (The hole might temporarily contain RETURN_VALUE_PLACEHOLDER.)
-//
-// When the recursive callee returns, RicochetBlob::bounce_addr will
-// immediately jump to the continuation stored in the RF.
-// This continuation will merge the recursive return value
-// into the saved argument list.  At that point, the original
-// rsi, rbp, and rsp will be reloaded, the ricochet frame will
-// disappear, and the final target of the adapter method handle
-// will be invoked on the transformed argument list.
-
-class RicochetFrame {
-  friend class MethodHandles;
-  friend class VMStructs;
-
- private:
-  intptr_t* _continuation;          // what to do when control gets back here
-  oopDesc*  _saved_target;          // target method handle to invoke on saved_args
-  oopDesc*  _saved_args_layout;     // caching point for MethodTypeForm.vmlayout cookie
-  intptr_t* _saved_args_base;       // base of pushed arguments (slot 0, arg N) (-3)
-  intptr_t  _conversion;            // misc. information from original AdapterMethodHandle (-2)
-  intptr_t* _exact_sender_sp;       // parallel to interpreter_frame_sender_sp (-1)
-  intptr_t* _sender_link;           // *must* coincide with frame::link_offset (0)
-  address   _sender_pc;             // *must* coincide with frame::return_addr_offset (1)
-
- public:
-  intptr_t* continuation() const        { return _continuation; }
-  oop       saved_target() const        { return _saved_target; }
-  oop       saved_args_layout() const   { return _saved_args_layout; }
-  intptr_t* saved_args_base() const     { return _saved_args_base; }
-  intptr_t  conversion() const          { return _conversion; }
-  intptr_t* exact_sender_sp() const     { return _exact_sender_sp; }
-  intptr_t* sender_link() const         { return _sender_link; }
-  address   sender_pc() const           { return _sender_pc; }
-
-  intptr_t* extended_sender_sp() const {
-    // The extended sender SP is above the current RicochetFrame.
-    return (intptr_t*) (((address) this) + sizeof(RicochetFrame));
-  }
-
-  intptr_t  return_value_slot_number() const {
-    return adapter_conversion_vminfo(conversion());
-  }
-  BasicType return_value_type() const {
-    return adapter_conversion_dest_type(conversion());
-  }
-  bool has_return_value_slot() const {
-    return return_value_type() != T_VOID;
-  }
-  intptr_t* return_value_slot_addr() const {
-    assert(has_return_value_slot(), "");
-    return saved_arg_slot_addr(return_value_slot_number());
-  }
-  intptr_t* saved_target_slot_addr() const {
-    return saved_arg_slot_addr(saved_args_length());
-  }
-  intptr_t* saved_arg_slot_addr(int slot) const {
-    assert(slot >= 0, "");
-    return (intptr_t*)( (address)saved_args_base() + (slot * Interpreter::stackElementSize) );
-  }
-
-  jint      saved_args_length() const;
-  jint      saved_arg_offset(int arg) const;
-
-  // GC interface
-  oop*  saved_target_addr()                     { return (oop*)&_saved_target; }
-  oop*  saved_args_layout_addr()                { return (oop*)&_saved_args_layout; }
-
-  oop  compute_saved_args_layout(bool read_cache, bool write_cache);
-
-  // Compiler/assembler interface.
-  static int continuation_offset_in_bytes()     { return offset_of(RicochetFrame, _continuation); }
-  static int saved_target_offset_in_bytes()     { return offset_of(RicochetFrame, _saved_target); }
-  static int saved_args_layout_offset_in_bytes(){ return offset_of(RicochetFrame, _saved_args_layout); }
-  static int saved_args_base_offset_in_bytes()  { return offset_of(RicochetFrame, _saved_args_base); }
-  static int conversion_offset_in_bytes()       { return offset_of(RicochetFrame, _conversion); }
-  static int exact_sender_sp_offset_in_bytes()  { return offset_of(RicochetFrame, _exact_sender_sp); }
-  static int sender_link_offset_in_bytes()      { return offset_of(RicochetFrame, _sender_link); }
-  static int sender_pc_offset_in_bytes()        { return offset_of(RicochetFrame, _sender_pc); }
-
-  // This value is not used for much, but it apparently must be nonzero.
-  static int frame_size_in_bytes()              { return sender_link_offset_in_bytes(); }
-
-#ifdef ASSERT
-  // The magic number is supposed to help find ricochet frames within the bytes of stack dumps.
-  enum { MAGIC_NUMBER_1 = 0xFEED03E, MAGIC_NUMBER_2 = 0xBEEF03E };
-  static int magic_number_1_offset_in_bytes()   { return -wordSize; }
-  static int magic_number_2_offset_in_bytes()   { return sizeof(RicochetFrame); }
-  intptr_t magic_number_1() const               { return *(intptr_t*)((address)this + magic_number_1_offset_in_bytes()); };
-  intptr_t magic_number_2() const               { return *(intptr_t*)((address)this + magic_number_2_offset_in_bytes()); };
-#endif //ASSERT
-
-  enum { RETURN_VALUE_PLACEHOLDER = (NOT_DEBUG(0) DEBUG_ONLY(42)) };
-
-  static void verify_offsets() NOT_DEBUG_RETURN;
-  void verify() const NOT_DEBUG_RETURN; // check for MAGIC_NUMBER, etc.
-  void zap_arguments() NOT_DEBUG_RETURN;
-
-  static void generate_ricochet_blob(MacroAssembler* _masm,
-                                     // output params:
-                                     int* bounce_offset,
-                                     int* exception_offset,
-                                     int* frame_size_in_words);
-
-  static void enter_ricochet_frame(MacroAssembler* _masm,
-                                   Register rcx_recv,
-                                   Register rax_argv,
-                                   address return_handler,
-                                   Register rbx_temp);
-  static void leave_ricochet_frame(MacroAssembler* _masm,
-                                   Register rcx_recv,
-                                   Register new_sp_reg,
-                                   Register sender_pc_reg);
-
-  static Address frame_address(int offset = 0) {
-    // The RicochetFrame is found by subtracting a constant offset from rbp.
-    return Address(rbp, - sender_link_offset_in_bytes() + offset);
-  }
-
-  static RicochetFrame* from_frame(const frame& fr) {
-    address bp = (address) fr.fp();
-    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
-    rf->verify();
-    return rf;
-  }
-
-  static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
-
-  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
+  adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
 };
 
 // Additional helper methods for MethodHandles code generation:
 public:
   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
-  static void load_conversion_vminfo(MacroAssembler* _masm, Register reg, Address conversion_field_addr);
-  static void load_conversion_dest_type(MacroAssembler* _masm, Register reg, Address conversion_field_addr);
-
-  static void load_stack_move(MacroAssembler* _masm,
-                              Register rdi_stack_move,
-                              Register rcx_amh,
-                              bool might_be_negative);
-
-  static void insert_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               Register rax_argslot,
-                               Register rbx_temp, Register rdx_temp);
-
-  static void remove_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               Register rax_argslot,
-                               Register rbx_temp, Register rdx_temp);
-
-  static void push_arg_slots(MacroAssembler* _masm,
-                                   Register rax_argslot,
-                                   RegisterOrConstant slot_count,
-                                   int skip_words_count,
-                                   Register rbx_temp, Register rdx_temp);
-
-  static void move_arg_slots_up(MacroAssembler* _masm,
-                                Register rbx_bottom,  // invariant
-                                Address  top_addr,    // can use rax_temp
-                                RegisterOrConstant positive_distance_in_slots,
-                                Register rax_temp, Register rdx_temp);
-
-  static void move_arg_slots_down(MacroAssembler* _masm,
-                                  Address  bottom_addr,  // can use rax_temp
-                                  Register rbx_top,      // invariant
-                                  RegisterOrConstant negative_distance_in_slots,
-                                  Register rax_temp, Register rdx_temp);
-
-  static void move_typed_arg(MacroAssembler* _masm,
-                             BasicType type, bool is_element,
-                             Address slot_dest, Address value_src,
-                             Register rbx_temp, Register rdx_temp);
-
-  static void move_return_value(MacroAssembler* _masm, BasicType type,
-                                Address return_slot);
-
-  static void verify_argslot(MacroAssembler* _masm, Register argslot_reg,
-                             const char* error_message) NOT_DEBUG_RETURN;
-
-  static void verify_argslots(MacroAssembler* _masm,
-                              RegisterOrConstant argslot_count,
-                              Register argslot_reg,
-                              bool negate_argslot,
-                              const char* error_message) NOT_DEBUG_RETURN;
-
-  static void verify_stack_move(MacroAssembler* _masm,
-                                RegisterOrConstant arg_slots,
-                                int direction) NOT_DEBUG_RETURN;
 
   static void verify_klass(MacroAssembler* _masm,
                            Register obj, KlassHandle klass,
@@ -297,9 +43,17 @@
                  "reference is a MH");
   }
 
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
   // Similar to InterpreterMacroAssembler::jump_from_interpreted.
   // Takes care of special dispatch from single stepping too.
-  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp);
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2,
+                                  bool for_compiler_entry);
 
   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
 
--- a/src/cpu/x86/vm/register_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/register_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
 const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
                                                                  2 * FloatRegisterImpl::number_of_registers;
 const int ConcreteRegisterImpl::max_xmm = ConcreteRegisterImpl::max_fpr +
-                                                                 2 * XMMRegisterImpl::number_of_registers;
+                                                                 8 * XMMRegisterImpl::number_of_registers;
 const char* RegisterImpl::name() const {
   const char* names[number_of_registers] = {
 #ifndef AMD64
--- a/src/cpu/x86/vm/register_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/register_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -158,7 +158,7 @@
   XMMRegister successor() const                          { return as_XMMRegister(encoding() + 1); }
 
   // accessors
-  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  int   encoding() const                          { assert(is_valid(), err_msg("invalid register (%d)", (int)(intptr_t)this )); return (intptr_t)this; }
   bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
   const char* name() const;
 };
@@ -216,7 +216,7 @@
                                RegisterImpl::number_of_registers +  // "H" half of a 64bit register
 #endif // AMD64
                            2 * FloatRegisterImpl::number_of_registers +
-                           2 * XMMRegisterImpl::number_of_registers +
+                           8 * XMMRegisterImpl::number_of_registers +
                            1 // eflags
   };
 
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -643,6 +643,19 @@
   __ movdbl(r, Address(saved_sp, next_val_off));
 }
 
+static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
+                        address code_start, address code_end,
+                        Label& L_ok) {
+  Label L_fail;
+  __ lea(temp_reg, ExternalAddress(code_start));
+  __ cmpptr(pc_reg, temp_reg);
+  __ jcc(Assembler::belowEqual, L_fail);
+  __ lea(temp_reg, ExternalAddress(code_end));
+  __ cmpptr(pc_reg, temp_reg);
+  __ jcc(Assembler::below, L_ok);
+  __ bind(L_fail);
+}
+
 static void gen_i2c_adapter(MacroAssembler *masm,
                             int total_args_passed,
                             int comp_args_on_stack,
@@ -653,9 +666,53 @@
   // we may do a i2c -> c2i transition if we lose a race where compiled
   // code goes non-entrant while we get args ready.
 
+  // Adapters can be frameless because they do not require the caller
+  // to perform additional cleanup work, such as correcting the stack pointer.
+  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
+  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
+  // even if a callee has modified the stack pointer.
+  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
+  // routinely repairs its caller's stack pointer (from sender_sp, which is set
+  // up via the senderSP register).
+  // In other words, if *either* the caller or callee is interpreted, we can
+  // get the stack pointer repaired after a call.
+  // This is why c2i and i2c adapters cannot be indefinitely composed.
+  // In particular, if a c2i adapter were to somehow call an i2c adapter,
+  // both caller and callee would be compiled methods, and neither would
+  // clean up the stack pointer changes performed by the two adapters.
+  // If this happens, control eventually transfers back to the compiled
+  // caller, but with an uncorrected stack, causing delayed havoc.
+
   // Pick up the return address
   __ movptr(rax, Address(rsp, 0));
 
+  if (VerifyAdapterCalls &&
+      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
+    // So, let's test for cascading c2i/i2c adapters right now.
+    //  assert(Interpreter::contains($return_addr) ||
+    //         StubRoutines::contains($return_addr),
+    //         "i2c adapter must return to an interpreter frame");
+    __ block_comment("verify_i2c { ");
+    Label L_ok;
+    if (Interpreter::code() != NULL)
+      range_check(masm, rax, rdi,
+                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
+                  L_ok);
+    if (StubRoutines::code1() != NULL)
+      range_check(masm, rax, rdi,
+                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
+                  L_ok);
+    if (StubRoutines::code2() != NULL)
+      range_check(masm, rax, rdi,
+                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
+                  L_ok);
+    const char* msg = "i2c adapter must return to an interpreter frame";
+    __ block_comment(msg);
+    __ stop(msg);
+    __ bind(L_ok);
+    __ block_comment("} verify_i2ce ");
+  }
+
   // Must preserve original SP for loading incoming arguments because
   // we need to align the outgoing SP for compiled code.
   __ movptr(rdi, rsp);
@@ -1293,6 +1350,89 @@
   __ bind(done);
 }
 
+static void verify_oop_args(MacroAssembler* masm,
+                            int total_args_passed,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  Register temp_reg = rbx;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < total_args_passed; i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 int total_args_passed,
+                                 int comp_args_on_stack,
+                                 vmIntrinsics::ID special_dispatch,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, total_args_passed, sig_bt, regs);
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
+  if (ref_kind != 0) {
+    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
+    member_reg = rbx;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    guarantee(false, err_msg("special_dispatch=%d", special_dispatch));
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
+    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
+    VMReg r = regs[member_arg_pos].first();
+    assert(r->is_valid(), "bad member arg");
+    if (r->is_stack()) {
+      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(total_args_passed > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      assert(false, "receiver always in a register");
+      receiver_reg = rcx;  // known to be free at this point
+      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
 
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
@@ -1323,14 +1463,37 @@
 //    transition back to thread_in_Java
 //    return to caller
 //
-nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                 methodHandle method,
                                                 int compile_id,
                                                 int total_in_args,
                                                 int comp_args_on_stack,
-                                                BasicType *in_sig_bt,
-                                                VMRegPair *in_regs,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
                                                 BasicType ret_type) {
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+    gen_special_dispatch(masm,
+                         total_in_args,
+                         comp_args_on_stack,
+                         method->intrinsic_id(),
+                         in_sig_bt,
+                         in_regs);
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
   bool is_critical_native = true;
   address native_func = method->critical_native_function();
   if (native_func == NULL) {
@@ -1436,7 +1599,7 @@
       if (in_regs[i].first()->is_Register()) {
         const Register reg = in_regs[i].first()->as_Register();
         switch (in_sig_bt[i]) {
-          case T_ARRAY:
+          case T_ARRAY:  // critical array (uses 2 slots on LP64)
           case T_BOOLEAN:
           case T_BYTE:
           case T_SHORT:
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -590,6 +590,19 @@
   __ jmp(rcx);
 }
 
+static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
+                        address code_start, address code_end,
+                        Label& L_ok) {
+  Label L_fail;
+  __ lea(temp_reg, ExternalAddress(code_start));
+  __ cmpptr(pc_reg, temp_reg);
+  __ jcc(Assembler::belowEqual, L_fail);
+  __ lea(temp_reg, ExternalAddress(code_end));
+  __ cmpptr(pc_reg, temp_reg);
+  __ jcc(Assembler::below, L_ok);
+  __ bind(L_fail);
+}
+
 static void gen_i2c_adapter(MacroAssembler *masm,
                             int total_args_passed,
                             int comp_args_on_stack,
@@ -605,9 +618,53 @@
   // save code can segv when fxsave instructions find improperly
   // aligned stack pointer.
 
+  // Adapters can be frameless because they do not require the caller
+  // to perform additional cleanup work, such as correcting the stack pointer.
+  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
+  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
+  // even if a callee has modified the stack pointer.
+  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
+  // routinely repairs its caller's stack pointer (from sender_sp, which is set
+  // up via the senderSP register).
+  // In other words, if *either* the caller or callee is interpreted, we can
+  // get the stack pointer repaired after a call.
+  // This is why c2i and i2c adapters cannot be indefinitely composed.
+  // In particular, if a c2i adapter were to somehow call an i2c adapter,
+  // both caller and callee would be compiled methods, and neither would
+  // clean up the stack pointer changes performed by the two adapters.
+  // If this happens, control eventually transfers back to the compiled
+  // caller, but with an uncorrected stack, causing delayed havoc.
+
   // Pick up the return address
   __ movptr(rax, Address(rsp, 0));
 
+  if (VerifyAdapterCalls &&
+      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
+    // So, let's test for cascading c2i/i2c adapters right now.
+    //  assert(Interpreter::contains($return_addr) ||
+    //         StubRoutines::contains($return_addr),
+    //         "i2c adapter must return to an interpreter frame");
+    __ block_comment("verify_i2c { ");
+    Label L_ok;
+    if (Interpreter::code() != NULL)
+      range_check(masm, rax, r11,
+                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
+                  L_ok);
+    if (StubRoutines::code1() != NULL)
+      range_check(masm, rax, r11,
+                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
+                  L_ok);
+    if (StubRoutines::code2() != NULL)
+      range_check(masm, rax, r11,
+                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
+                  L_ok);
+    const char* msg = "i2c adapter must return to an interpreter frame";
+    __ block_comment(msg);
+    __ stop(msg);
+    __ bind(L_ok);
+    __ block_comment("} verify_i2ce ");
+  }
+
   // Must preserve original SP for loading incoming arguments because
   // we need to align the outgoing SP for compiled code.
   __ movptr(r11, rsp);
@@ -1366,6 +1423,14 @@
 }
 
 
+// Different signatures may require very different orders for the move
+// to avoid clobbering other arguments.  There's no simple way to
+// order them safely.  Compute a safe order for issuing stores and
+// break any cycles in those stores.  This code is fairly general but
+// it's not necessary on the other platforms so we keep it in the
+// platform dependent code instead of moving it into a shared file.
+// (See bugs 7013347 & 7145024.)
+// Note that this code is specific to LP64.
 class ComputeMoveOrder: public StackObj {
   class MoveOperation: public ResourceObj {
     friend class ComputeMoveOrder;
@@ -1532,6 +1597,89 @@
   }
 };
 
+static void verify_oop_args(MacroAssembler* masm,
+                            int total_args_passed,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  Register temp_reg = rbx;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < total_args_passed; i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 int total_args_passed,
+                                 int comp_args_on_stack,
+                                 vmIntrinsics::ID special_dispatch,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, total_args_passed, sig_bt, regs);
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
+  if (ref_kind != 0) {
+    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
+    member_reg = rbx;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    guarantee(false, err_msg("special_dispatch=%d", special_dispatch));
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
+    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
+    VMReg r = regs[member_arg_pos].first();
+    assert(r->is_valid(), "bad member arg");
+    if (r->is_stack()) {
+      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(total_args_passed > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      assert(false, "receiver always in a register");
+      receiver_reg = j_rarg0;  // known to be free at this point
+      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
 
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
@@ -1539,14 +1687,60 @@
 // convention (handlizes oops, etc), transitions to native, makes the call,
 // returns to java state (possibly blocking), unhandlizes any result and
 // returns.
-nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GC_locker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GC_locker::needs_gc())
+//      SharedRuntime::block_for_jni_critical();
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                 methodHandle method,
                                                 int compile_id,
                                                 int total_in_args,
                                                 int comp_args_on_stack,
-                                                BasicType *in_sig_bt,
-                                                VMRegPair *in_regs,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
                                                 BasicType ret_type) {
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+    gen_special_dispatch(masm,
+                         total_in_args,
+                         comp_args_on_stack,
+                         method->intrinsic_id(),
+                         in_sig_bt,
+                         in_regs);
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
   bool is_critical_native = true;
   address native_func = method->critical_native_function();
   if (native_func == NULL) {
@@ -1658,7 +1852,7 @@
           case T_SHORT:
           case T_CHAR:
           case T_INT:  single_slots++; break;
-          case T_ARRAY:
+          case T_ARRAY:  // specific to LP64 (7145024)
           case T_LONG: double_slots++; break;
           default:  ShouldNotReachHere();
         }
@@ -3792,8 +3986,12 @@
   //
   // address OptoRuntime::handle_exception_C(JavaThread* thread)
 
-  __ set_last_Java_frame(noreg, noreg, NULL);
+  // At a method handle call, the stack may not be properly aligned
+  // when returning with an exception.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(noreg, noreg, the_pc);
   __ mov(c_rarg0, r15_thread);
+  __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
 
   // Set an oopmap for the call site.  This oopmap will only be used if we
@@ -3804,9 +4002,9 @@
 
   OopMapSet* oop_maps = new OopMapSet();
 
-  oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0));
-
-  __ reset_last_Java_frame(false, false);
+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+  __ reset_last_Java_frame(false, true);
 
   // Restore callee-saved registers
 
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -2136,11 +2136,23 @@
       __ trigfunc('t');
       __ ret(0);
     }
+    {
+      StubCodeMark mark(this, "StubRoutines", "exp");
+      StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
 
-    // The intrinsic version of these seem to return the same value as
-    // the strict version.
-    StubRoutines::_intrinsic_exp = SharedRuntime::dexp;
-    StubRoutines::_intrinsic_pow = SharedRuntime::dpow;
+      __ fld_d(Address(rsp, 4));
+      __ exp_with_fallback(0);
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "pow");
+      StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
+
+      __ fld_d(Address(rsp, 12));
+      __ fld_d(Address(rsp, 4));
+      __ pow_with_fallback(0);
+      __ ret(0);
+    }
   }
 
  public:
@@ -2315,12 +2327,6 @@
                                                                                    CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
 
     // Build this early so it's available for the interpreter
-    StubRoutines::_throw_WrongMethodTypeException_entry =
-      generate_throw_exception("WrongMethodTypeException throw_exception",
-                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
-                               rax, rcx);
-
-    // Build this early so it's available for the interpreter
     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
   }
 
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -710,6 +710,21 @@
     return start;
   }
 
+  // Support for intptr_t get_previous_sp()
+  //
+  // This routine is used to find the previous stack pointer for the
+  // caller.
+  address generate_get_previous_sp() {
+    StubCodeMark mark(this, "StubRoutines", "get_previous_sp");
+    address start = __ pc();
+
+    __ movptr(rax, rsp);
+    __ addptr(rax, 8); // return address is at the top of the stack.
+    __ ret(0);
+
+    return start;
+  }
+
   //----------------------------------------------------------------------------------------------------
   // Support for void verify_mxcsr()
   //
@@ -2913,11 +2928,34 @@
       __ addq(rsp, 8);
       __ ret(0);
     }
-
-    // The intrinsic version of these seem to return the same value as
-    // the strict version.
-    StubRoutines::_intrinsic_exp = SharedRuntime::dexp;
-    StubRoutines::_intrinsic_pow = SharedRuntime::dpow;
+    {
+      StubCodeMark mark(this, "StubRoutines", "exp");
+      StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
+
+      __ subq(rsp, 8);
+      __ movdbl(Address(rsp, 0), xmm0);
+      __ fld_d(Address(rsp, 0));
+      __ exp_with_fallback(0);
+      __ fstp_d(Address(rsp, 0));
+      __ movdbl(xmm0, Address(rsp, 0));
+      __ addq(rsp, 8);
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "pow");
+      StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
+
+      __ subq(rsp, 8);
+      __ movdbl(Address(rsp, 0), xmm1);
+      __ fld_d(Address(rsp, 0));
+      __ movdbl(Address(rsp, 0), xmm0);
+      __ fld_d(Address(rsp, 0));
+      __ pow_with_fallback(0);
+      __ fstp_d(Address(rsp, 0));
+      __ movdbl(xmm0, Address(rsp, 0));
+      __ addq(rsp, 8);
+      __ ret(0);
+    }
   }
 
 #undef __
@@ -3060,17 +3098,10 @@
 
     // platform dependent
     StubRoutines::x86::_get_previous_fp_entry = generate_get_previous_fp();
+    StubRoutines::x86::_get_previous_sp_entry = generate_get_previous_sp();
 
     StubRoutines::x86::_verify_mxcsr_entry    = generate_verify_mxcsr();
 
-    // Build this early so it's available for the interpreter.  Stub
-    // expects the required and actual types as register arguments in
-    // j_rarg0 and j_rarg1 respectively.
-    StubRoutines::_throw_WrongMethodTypeException_entry =
-      generate_throw_exception("WrongMethodTypeException throw_exception",
-                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
-                               rax, rcx);
-
     // Build this early so it's available for the interpreter.
     StubRoutines::_throw_StackOverflowError_entry =
       generate_throw_exception("StackOverflowError throw_exception",
--- a/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,6 +43,7 @@
 // a description of how to extend it, see the stubRoutines.hpp file.
 
 address StubRoutines::x86::_get_previous_fp_entry = NULL;
+address StubRoutines::x86::_get_previous_sp_entry = NULL;
 
 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
 
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -41,6 +41,7 @@
 
  private:
   static address _get_previous_fp_entry;
+  static address _get_previous_sp_entry;
   static address _verify_mxcsr_entry;
 
   static address _f2i_fixup;
@@ -61,6 +62,11 @@
     return _get_previous_fp_entry;
   }
 
+  static address get_previous_sp_entry()
+  {
+    return _get_previous_sp_entry;
+  }
+
   static address verify_mxcsr_entry()
   {
     return _verify_mxcsr_entry;
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -566,7 +566,8 @@
     __ testl(rax, JVM_ACC_STATIC);
     __ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0)));  // get receiver (assume this is frequent case)
     __ jcc(Assembler::zero, done);
-    __ movptr(rax, Address(rbx, methodOopDesc::constants_offset()));
+    __ movptr(rax, Address(rbx, methodOopDesc::const_offset()));
+    __ movptr(rax, Address(rax, constMethodOopDesc::constants_offset()));
     __ movptr(rax, Address(rax, constantPoolOopDesc::pool_holder_offset_in_bytes()));
     __ movptr(rax, Address(rax, mirror_offset));
     __ bind(done);
@@ -606,7 +607,8 @@
     __ push(0);
   }
 
-  __ movptr(rdx, Address(rbx, methodOopDesc::constants_offset()));
+  __ movptr(rdx, Address(rbx, methodOopDesc::const_offset()));
+  __ movptr(rdx, Address(rdx, constMethodOopDesc::constants_offset()));
   __ movptr(rdx, Address(rdx, constantPoolOopDesc::cache_offset_in_bytes()));
   __ push(rdx);                                       // set constant pool cache
   __ push(rdi);                                       // set locals pointer
@@ -661,9 +663,9 @@
     __ testptr(rax, rax);
     __ jcc(Assembler::zero, slow_path);
 
-    __ movptr(rdi, Address(rbx, methodOopDesc::constants_offset()));
     // read first instruction word and extract bytecode @ 1 and index @ 2
     __ movptr(rdx, Address(rbx, methodOopDesc::const_offset()));
+    __ movptr(rdi, Address(rdx, constMethodOopDesc::constants_offset()));
     __ movl(rdx, Address(rdx, constMethodOopDesc::codes_offset()));
     // Shift codes right to get the index on the right.
     // The bytecode fetched looks like <index><0xb4><0x2a>
@@ -708,9 +710,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Use the type from the constant pool cache
-    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
-    // Make sure we don't need to mask rdx for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ shrl(rdx, ConstantPoolCacheEntry::tos_state_shift);
+    // Make sure we don't need to mask rdx after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
     __ cmpl(rdx, btos);
     __ jcc(Assembler::notEqual, notByte);
     __ load_signed_byte(rax, field_address);
@@ -1026,7 +1028,8 @@
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
     // get mirror
-    __ movptr(t, Address(method, methodOopDesc:: constants_offset()));
+    __ movptr(t, Address(method, methodOopDesc:: const_offset()));
+    __ movptr(t, Address(t, constMethodOopDesc::constants_offset()));
     __ movptr(t, Address(t, constantPoolOopDesc::pool_holder_offset_in_bytes()));
     __ movptr(t, Address(t, mirror_offset));
     // copy mirror into activation frame
@@ -1510,7 +1513,6 @@
     case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();        break;
     case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();     break;
     case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();     break;
-    case Interpreter::method_handle          : entry_point = ((InterpreterGenerator*)this)->generate_method_handle_entry(); break;
 
     case Interpreter::java_lang_math_sin     : // fall thru
     case Interpreter::java_lang_math_cos     : // fall thru
@@ -1518,10 +1520,14 @@
     case Interpreter::java_lang_math_abs     : // fall thru
     case Interpreter::java_lang_math_log     : // fall thru
     case Interpreter::java_lang_math_log10   : // fall thru
-    case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+    case Interpreter::java_lang_math_sqrt    : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
     case Interpreter::java_lang_ref_reference_get
                                              : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
-    default                                  : ShouldNotReachHere();                                                       break;
+    default:
+      fatal(err_msg("unexpected method kind: %d", kind));
+      break;
   }
 
   if (entry_point) return entry_point;
@@ -1540,7 +1546,9 @@
     case Interpreter::java_lang_math_abs     : // fall thru
     case Interpreter::java_lang_math_log     : // fall thru
     case Interpreter::java_lang_math_log10   : // fall thru
-    case Interpreter::java_lang_math_sqrt    :
+    case Interpreter::java_lang_math_sqrt    : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     :
       return false;
     default:
       return true;
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -522,7 +522,8 @@
     // get receiver (assume this is frequent case)
     __ movptr(rax, Address(r14, Interpreter::local_offset_in_bytes(0)));
     __ jcc(Assembler::zero, done);
-    __ movptr(rax, Address(rbx, methodOopDesc::constants_offset()));
+    __ movptr(rax, Address(rbx, methodOopDesc::const_offset()));
+    __ movptr(rax, Address(rax, constMethodOopDesc::constants_offset()));
     __ movptr(rax, Address(rax,
                            constantPoolOopDesc::pool_holder_offset_in_bytes()));
     __ movptr(rax, Address(rax, mirror_offset));
@@ -579,7 +580,8 @@
     __ push(0);
   }
 
-  __ movptr(rdx, Address(rbx, methodOopDesc::constants_offset()));
+  __ movptr(rdx, Address(rbx, methodOopDesc::const_offset()));
+  __ movptr(rdx, Address(rdx, constMethodOopDesc::constants_offset()));
   __ movptr(rdx, Address(rdx, constantPoolOopDesc::cache_offset_in_bytes()));
   __ push(rdx); // set constant pool cache
   __ push(r14); // set locals pointer
@@ -629,9 +631,9 @@
     __ testptr(rax, rax);
     __ jcc(Assembler::zero, slow_path);
 
-    __ movptr(rdi, Address(rbx, methodOopDesc::constants_offset()));
     // read first instruction word and extract bytecode @ 1 and index @ 2
     __ movptr(rdx, Address(rbx, methodOopDesc::const_offset()));
+    __ movptr(rdi, Address(rdx, constMethodOopDesc::constants_offset()));
     __ movl(rdx, Address(rdx, constMethodOopDesc::codes_offset()));
     // Shift codes right to get the index on the right.
     // The bytecode fetched looks like <index><0xb4><0x2a>
@@ -681,9 +683,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Use the type from the constant pool cache
-    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
-    // Make sure we don't need to mask edx for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ shrl(rdx, ConstantPoolCacheEntry::tos_state_shift);
+    // Make sure we don't need to mask edx after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
 
     __ cmpl(rdx, atos);
     __ jcc(Assembler::notEqual, notObj);
@@ -1020,7 +1022,8 @@
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
     // get mirror
-    __ movptr(t, Address(method, methodOopDesc::constants_offset()));
+    __ movptr(t, Address(method, methodOopDesc::const_offset()));
+    __ movptr(t, Address(t, constMethodOopDesc::constants_offset()));
     __ movptr(t, Address(t, constantPoolOopDesc::pool_holder_offset_in_bytes()));
     __ movptr(t, Address(t, mirror_offset));
     // copy mirror into activation frame
@@ -1521,12 +1524,11 @@
   switch (kind) {
   case Interpreter::zerolocals             :                                                                             break;
   case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
-  case Interpreter::native                 : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(false); break;
-  case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(true);  break;
-  case Interpreter::empty                  : entry_point = ((InterpreterGenerator*) this)->generate_empty_entry();       break;
-  case Interpreter::accessor               : entry_point = ((InterpreterGenerator*) this)->generate_accessor_entry();    break;
-  case Interpreter::abstract               : entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry();    break;
-  case Interpreter::method_handle          : entry_point = ((InterpreterGenerator*) this)->generate_method_handle_entry();break;
+  case Interpreter::native                 : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); break;
+  case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);  break;
+  case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();       break;
+  case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();    break;
+  case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();    break;
 
   case Interpreter::java_lang_math_sin     : // fall thru
   case Interpreter::java_lang_math_cos     : // fall thru
@@ -1534,10 +1536,14 @@
   case Interpreter::java_lang_math_abs     : // fall thru
   case Interpreter::java_lang_math_log     : // fall thru
   case Interpreter::java_lang_math_log10   : // fall thru
-  case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);    break;
+  case Interpreter::java_lang_math_sqrt    : // fall thru
+  case Interpreter::java_lang_math_pow     : // fall thru
+  case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);    break;
   case Interpreter::java_lang_ref_reference_get
                                            : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
-  default                                  : ShouldNotReachHere();                                                       break;
+  default:
+    fatal(err_msg("unexpected method kind: %d", kind));
+    break;
   }
 
   if (entry_point) {
@@ -1558,7 +1564,9 @@
     case Interpreter::java_lang_math_abs     : // fall thru
     case Interpreter::java_lang_math_log     : // fall thru
     case Interpreter::java_lang_math_log10   : // fall thru
-    case Interpreter::java_lang_math_sqrt    :
+    case Interpreter::java_lang_math_sqrt    : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     :
       return false;
     default:
       return true;
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -446,13 +446,13 @@
   const Register cache = rcx;
   const Register index = rdx;
 
-  resolve_cache_and_index(f1_oop, rax, cache, index, wide ? sizeof(u2) : sizeof(u1));
+  resolve_cache_and_index(f12_oop, rax, cache, index, wide ? sizeof(u2) : sizeof(u1));
   if (VerifyOops) {
     __ verify_oop(rax);
   }
 
   Label L_done, L_throw_exception;
-  const Register con_klass_temp = rcx;  // same as Rcache
+  const Register con_klass_temp = rcx;  // same as cache
   __ load_klass(con_klass_temp, rax);
   __ cmpptr(con_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr()));
   __ jcc(Assembler::notEqual, L_done);
@@ -2084,15 +2084,15 @@
                                             Register Rcache,
                                             Register index,
                                             size_t index_size) {
-  Register temp = rbx;
-
+  const Register temp = rbx;
   assert_different_registers(result, Rcache, index, temp);
 
   Label resolved;
-  if (byte_no == f1_oop) {
-    // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
-    // This kind of CP cache entry does not need to match the flags byte, because
+  if (byte_no == f12_oop) {
+    // We are resolved if the f1 field contains a non-null object (CallSite, MethodType, etc.)
+    // This kind of CP cache entry does not need to match bytecode_1 or bytecode_2, because
     // there is a 1-1 relation between bytecode type and CP entry type.
+    // The caller will also load a methodOop from f2.
     assert(result != noreg, ""); //else do cmpptr(Address(...), (int32_t) NULL_WORD)
     __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
     __ movptr(result, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()));
@@ -2112,15 +2112,18 @@
     case Bytecodes::_getstatic      : // fall through
     case Bytecodes::_putstatic      : // fall through
     case Bytecodes::_getfield       : // fall through
-    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break;
+    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);        break;
     case Bytecodes::_invokevirtual  : // fall through
     case Bytecodes::_invokespecial  : // fall through
     case Bytecodes::_invokestatic   : // fall through
-    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
-    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); break;
-    case Bytecodes::_fast_aldc      : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
-    case Bytecodes::_fast_aldc_w    : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
-    default                         : ShouldNotReachHere();                                 break;
+    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);         break;
+    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);   break;
+    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
+    case Bytecodes::_fast_aldc      : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);            break;
+    case Bytecodes::_fast_aldc_w    : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);            break;
+    default:
+      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
+      break;
   }
   __ movl(temp, (int)bytecode());
   __ call_VM(noreg, entry, temp);
@@ -2149,7 +2152,7 @@
   __ movl(flags, Address(cache, index, Address::times_ptr,
            in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())));
 
-  // klass     overwrite register
+  // klass overwrite register
   if (is_static) {
     __ movptr(obj, Address(cache, index, Address::times_ptr,
                            in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())));
@@ -2161,7 +2164,7 @@
                                                Register itable_index,
                                                Register flags,
                                                bool is_invokevirtual,
-                                               bool is_invokevfinal /*unused*/,
+                                               bool is_invokevfinal, /*unused*/
                                                bool is_invokedynamic) {
   // setup registers
   const Register cache = rcx;
@@ -2171,28 +2174,33 @@
   assert_different_registers(itable_index, flags);
   assert_different_registers(itable_index, cache, index);
   // determine constant pool cache field offsets
+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
   const int method_offset = in_bytes(
     constantPoolCacheOopDesc::base_offset() +
-      (is_invokevirtual
+      ((byte_no == f2_byte)
        ? ConstantPoolCacheEntry::f2_offset()
-       : ConstantPoolCacheEntry::f1_offset()
-      )
-    );
+       : ConstantPoolCacheEntry::f1_offset()));
   const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::flags_offset());
   // access constant pool cache fields
   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::f2_offset());
 
-  if (byte_no == f1_oop) {
-    // Resolved f1_oop goes directly into 'method' register.
-    assert(is_invokedynamic, "");
-    resolve_cache_and_index(byte_no, method, cache, index, sizeof(u4));
+  if (byte_no == f12_oop) {
+    // Resolved f1_oop (CallSite, MethodType, etc.) goes into 'itable_index'.
+    // Resolved f2_oop (methodOop invoker) will go into 'method' (at index_offset).
+    // See ConstantPoolCacheEntry::set_dynamic_call and set_method_handle.
+    size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+    resolve_cache_and_index(byte_no, itable_index, cache, index, index_size);
+    __ movptr(method, Address(cache, index, Address::times_ptr, index_offset));
+    itable_index = noreg;  // hack to disable load below
   } else {
     resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
     __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
   }
   if (itable_index != noreg) {
+    // pick up itable index from f2 also:
+    assert(byte_no == f1_byte, "already picked up f1");
     __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
   }
   __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
@@ -2260,10 +2268,10 @@
 
   Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
 
-  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
+  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
   assert(btos == 0, "change code, btos != 0");
   // btos
-  __ andptr(flags, 0x0f);
+  __ andptr(flags, ConstantPoolCacheEntry::tos_state_mask);
   __ jcc(Assembler::notZero, notByte);
 
   __ load_signed_byte(rax, lo );
@@ -2415,9 +2423,9 @@
       __ movl(rcx, Address(rax, rdx, Address::times_ptr, in_bytes(cp_base_offset +
                                    ConstantPoolCacheEntry::flags_offset())));
       __ mov(rbx, rsp);
-      __ shrl(rcx, ConstantPoolCacheEntry::tosBits);
-      // Make sure we don't need to mask rcx for tosBits after the above shift
-      ConstantPoolCacheEntry::verify_tosBits();
+      __ shrl(rcx, ConstantPoolCacheEntry::tos_state_shift);
+      // Make sure we don't need to mask rcx after the above shift
+      ConstantPoolCacheEntry::verify_tos_state_shift();
       __ cmpl(rcx, ltos);
       __ jccb(Assembler::equal, two_word);
       __ cmpl(rcx, dtos);
@@ -2467,7 +2475,7 @@
 
   Label notVolatile, Done;
   __ movl(rdx, flags);
-  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
+  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
   __ andl(rdx, 0x1);
 
   // field addresses
@@ -2476,9 +2484,9 @@
 
   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
 
-  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
+  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
   assert(btos == 0, "change code, btos != 0");
-  __ andl(flags, 0x0f);
+  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
   __ jcc(Assembler::notZero, notByte);
 
   // btos
@@ -2651,56 +2659,49 @@
     // Check to see if a field modification watch has been set before we take
     // the time to call into the VM.
     Label L2;
-    __ mov32(rcx, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
-    __ testl(rcx,rcx);
-    __ jcc(Assembler::zero, L2);
-    __ pop_ptr(rbx);               // copy the object pointer from tos
-    __ verify_oop(rbx);
-    __ push_ptr(rbx);              // put the object pointer back on tos
-    __ subptr(rsp, sizeof(jvalue));  // add space for a jvalue object
-    __ mov(rcx, rsp);
-    __ push_ptr(rbx);                 // save object pointer so we can steal rbx,
-    __ xorptr(rbx, rbx);
-    const Address lo_value(rcx, rbx, Address::times_1, 0*wordSize);
-    const Address hi_value(rcx, rbx, Address::times_1, 1*wordSize);
-    switch (bytecode()) {          // load values into the jvalue object
-    case Bytecodes::_fast_bputfield: __ movb(lo_value, rax); break;
-    case Bytecodes::_fast_sputfield: __ movw(lo_value, rax); break;
-    case Bytecodes::_fast_cputfield: __ movw(lo_value, rax); break;
-    case Bytecodes::_fast_iputfield: __ movl(lo_value, rax);                         break;
-    case Bytecodes::_fast_lputfield:
-      NOT_LP64(__ movptr(hi_value, rdx));
-      __ movptr(lo_value, rax);
-      break;
-
-    // need to call fld_s() after fstp_s() to restore the value for below
-    case Bytecodes::_fast_fputfield: __ fstp_s(lo_value); __ fld_s(lo_value);        break;
-
-    // need to call fld_d() after fstp_d() to restore the value for below
-    case Bytecodes::_fast_dputfield: __ fstp_d(lo_value); __ fld_d(lo_value);        break;
-
-    // since rcx is not an object we don't call store_check() here
-    case Bytecodes::_fast_aputfield: __ movptr(lo_value, rax);                       break;
-
-    default:  ShouldNotReachHere();
-    }
-    __ pop_ptr(rbx);  // restore copy of object pointer
-
-    // Save rax, and sometimes rdx because call_VM() will clobber them,
-    // then use them for JVM/DI purposes
-    __ push(rax);
-    if (bytecode() == Bytecodes::_fast_lputfield) __ push(rdx);
-    // access constant pool cache entry
-    __ get_cache_entry_pointer_at_bcp(rax, rdx, 1);
-    __ verify_oop(rbx);
-    // rbx,: object pointer copied above
-    // rax,: cache entry pointer
-    // rcx: jvalue object on the stack
-    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx);
-    if (bytecode() == Bytecodes::_fast_lputfield) __ pop(rdx);  // restore high value
-    __ pop(rax);     // restore lower value
-    __ addptr(rsp, sizeof(jvalue));  // release jvalue object space
-    __ bind(L2);
+     __ mov32(rcx, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
+     __ testl(rcx,rcx);
+     __ jcc(Assembler::zero, L2);
+     __ pop_ptr(rbx);               // copy the object pointer from tos
+     __ verify_oop(rbx);
+     __ push_ptr(rbx);              // put the object pointer back on tos
+
+     // Save tos values before call_VM() clobbers them. Since we have
+     // to do it for every data type, we use the saved values as the
+     // jvalue object.
+     switch (bytecode()) {          // load values into the jvalue object
+     case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
+     case Bytecodes::_fast_bputfield: // fall through
+     case Bytecodes::_fast_sputfield: // fall through
+     case Bytecodes::_fast_cputfield: // fall through
+     case Bytecodes::_fast_iputfield: __ push_i(rax); break;
+     case Bytecodes::_fast_dputfield: __ push_d(); break;
+     case Bytecodes::_fast_fputfield: __ push_f(); break;
+     case Bytecodes::_fast_lputfield: __ push_l(rax); break;
+
+     default:
+       ShouldNotReachHere();
+     }
+     __ mov(rcx, rsp);              // points to jvalue on the stack
+     // access constant pool cache entry
+     __ get_cache_entry_pointer_at_bcp(rax, rdx, 1);
+     __ verify_oop(rbx);
+     // rbx,: object pointer copied above
+     // rax,: cache entry pointer
+     // rcx: jvalue object on the stack
+     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx);
+
+     switch (bytecode()) {             // restore tos values
+     case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
+     case Bytecodes::_fast_bputfield: // fall through
+     case Bytecodes::_fast_sputfield: // fall through
+     case Bytecodes::_fast_cputfield: // fall through
+     case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
+     case Bytecodes::_fast_dputfield: __ pop_d(); break;
+     case Bytecodes::_fast_fputfield: __ pop_f(); break;
+     case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
+     }
+     __ bind(L2);
   }
 }
 
@@ -2726,7 +2727,7 @@
   // volatile_barrier( );
 
   Label notVolatile, Done;
-  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
+  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
   __ andl(rdx, 0x1);
   // Check for volatile store
   __ testl(rdx, rdx);
@@ -2892,19 +2893,29 @@
 }
 
 
-void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) {
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method,  // linked method (or i-klass)
+                                   Register index,   // itable index, MethodType, etc.
+                                   Register recv,    // if caller wants to see it
+                                   Register flags    // if caller wants to test it
+                                   ) {
   // determine flags
-  Bytecodes::Code code = bytecode();
+  const Bytecodes::Code code = bytecode();
   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
-  const bool load_receiver      = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic);
-  const bool receiver_null_check = is_invokespecial;
-  const bool save_flags = is_invokeinterface || is_invokevirtual;
+  const bool load_receiver       = (recv  != noreg);
+  const bool save_flags          = (flags != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
+  assert(flags == noreg || flags == rdx, "");
+  assert(recv  == noreg || recv  == rcx, "");
+
   // setup registers & access constant pool cache
-  const Register recv   = rcx;
-  const Register flags  = rdx;
+  if (recv  == noreg)  recv  = rcx;
+  if (flags == noreg)  flags = rdx;
   assert_different_registers(method, index, recv, flags);
 
   // save 'interpreter return address'
@@ -2912,37 +2923,43 @@
 
   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
 
+  // maybe push appendix to arguments (just before return address)
+  if (is_invokedynamic || is_invokehandle) {
+    Label L_no_push;
+    __ verify_oop(index);
+    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
+    __ jccb(Assembler::zero, L_no_push);
+    // Push the appendix as a trailing parameter.
+    // This must be done before we get the receiver,
+    // since the parameter_size includes it.
+    __ push(index);  // push appendix (MethodType, CallSite, etc.)
+    __ bind(L_no_push);
+  }
+
   // load receiver if needed (note: no return address pushed yet)
   if (load_receiver) {
-    assert(!is_invokedynamic, "");
     __ movl(recv, flags);
-    __ andl(recv, 0xFF);
-    // recv count is 0 based?
-    Address recv_addr(rsp, recv, Interpreter::stackElementScale(), -Interpreter::expr_offset_in_bytes(1));
+    __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
+    const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
+    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
+    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
     __ movptr(recv, recv_addr);
     __ verify_oop(recv);
   }
 
-  // do null check if needed
-  if (receiver_null_check) {
-    __ null_check(recv);
-  }
-
   if (save_flags) {
     __ mov(rsi, flags);
   }
 
   // compute return type
-  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
-  // Make sure we don't need to mask flags for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
+  // Make sure we don't need to mask flags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
   // load return address
   {
-    address table_addr;
-    if (is_invokeinterface || is_invokedynamic)
-      table_addr = (address)Interpreter::return_5_addrs_by_index_table();
-    else
-      table_addr = (address)Interpreter::return_3_addrs_by_index_table();
+    const address table_addr = (is_invokeinterface || is_invokedynamic) ?
+        (address)Interpreter::return_5_addrs_by_index_table() :
+        (address)Interpreter::return_3_addrs_by_index_table();
     ExternalAddress table(table_addr);
     __ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr)));
   }
@@ -2950,7 +2967,7 @@
   // push return address
   __ push(flags);
 
-  // Restore flag value from the constant pool cache, and restore rsi
+  // Restore flags value from the constant pool cache, and restore rsi
   // for later null checks.  rsi is the bytecode pointer
   if (save_flags) {
     __ mov(flags, rsi);
@@ -2959,22 +2976,26 @@
 }
 
 
-void TemplateTable::invokevirtual_helper(Register index, Register recv,
-                        Register flags) {
-
+void TemplateTable::invokevirtual_helper(Register index,
+                                         Register recv,
+                                         Register flags) {
   // Uses temporary registers rax, rdx
   assert_different_registers(index, recv, rax, rdx);
+  assert(index == rbx, "");
+  assert(recv  == rcx, "");
 
   // Test for an invoke of a final method
   Label notFinal;
   __ movl(rax, flags);
-  __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
+  __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
   __ jcc(Assembler::zero, notFinal);
 
-  Register method = index;  // method must be rbx,
-  assert(method == rbx, "methodOop must be rbx, for interpreter calling convention");
+  const Register method = index;  // method must be rbx
+  assert(method == rbx,
+         "methodOop must be rbx for interpreter calling convention");
 
   // do the call - the index is actually the method to call
+  // that is, f2 is a vtable index if !is_vfinal, else f2 is a methodOop
   __ verify_oop(method);
 
   // It's final, need a null check here!
@@ -2989,7 +3010,6 @@
 
   // get receiver klass
   __ null_check(recv, oopDesc::klass_offset_in_bytes());
-  // Keep recv in rcx for callee expects it there
   __ load_klass(rax, recv);
   __ verify_oop(rax);
 
@@ -2997,9 +3017,7 @@
   __ profile_virtual_call(rax, rdi, rdx);
 
   // get target methodOop & entry point
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
-  __ movptr(method, Address(rax, index, Address::times_ptr, base + vtableEntry::method_offset_in_bytes()));
+  __ lookup_virtual_method(rax, index, method);
   __ jump_from_interpreted(method, rdx);
 }
 
@@ -3007,9 +3025,12 @@
 void TemplateTable::invokevirtual(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f2_byte, "use this argument");
-  prepare_invoke(rbx, noreg, byte_no);
-
-  // rbx,: index
+  prepare_invoke(byte_no,
+                 rbx,    // method or vtable index
+                 noreg,  // unused itable index
+                 rcx, rdx); // recv, flags
+
+  // rbx: index
   // rcx: receiver
   // rdx: flags
 
@@ -3020,7 +3041,10 @@
 void TemplateTable::invokespecial(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(byte_no, rbx, noreg,  // get f1 methodOop
+                 rcx);  // get receiver also for null check
+  __ verify_oop(rcx);
+  __ null_check(rcx);
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
@@ -3031,7 +3055,7 @@
 void TemplateTable::invokestatic(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(byte_no, rbx);  // get f1 methodOop
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
@@ -3049,10 +3073,11 @@
 void TemplateTable::invokeinterface(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
-  prepare_invoke(rax, rbx, byte_no);
-
-  // rax,: Interface
-  // rbx,: index
+  prepare_invoke(byte_no, rax, rbx,  // get f1 klassOop, f2 itable index
+                 rcx, rdx); // recv, flags
+
+  // rax: interface klass (from f1)
+  // rbx: itable index (from f2)
   // rcx: receiver
   // rdx: flags
 
@@ -3062,7 +3087,7 @@
   // another compliant java compiler.
   Label notMethod;
   __ movl(rdi, rdx);
-  __ andl(rdi, (1 << ConstantPoolCacheEntry::methodInterface));
+  __ andl(rdi, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
   __ jcc(Assembler::zero, notMethod);
 
   invokevirtual_helper(rbx, rcx, rdx);
@@ -3070,6 +3095,7 @@
 
   // Get receiver klass into rdx - also a null check
   __ restore_locals();  // restore rdi
+  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
   __ load_klass(rdx, rcx);
   __ verify_oop(rdx);
 
@@ -3084,7 +3110,7 @@
                              rbx, rsi,
                              no_such_interface);
 
-  // rbx,: methodOop to call
+  // rbx: methodOop to call
   // rcx: receiver
   // Check for abstract method error
   // Note: This should be done more efficiently via a throw_abstract_method_error
@@ -3123,9 +3149,39 @@
   __ should_not_reach_here();
 }
 
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f12_oop, "use this argument");
+  const Register rbx_method = rbx;  // (from f2)
+  const Register rax_mtype  = rax;  // (from f1)
+  const Register rcx_recv   = rcx;
+  const Register rdx_flags  = rdx;
+
+  if (!EnableInvokeDynamic) {
+    // rewriter does not generate this bytecode
+    __ should_not_reach_here();
+    return;
+  }
+
+  prepare_invoke(byte_no,
+                 rbx_method, rax_mtype,  // get f2 methodOop, f1 MethodType
+                 rcx_recv);
+  __ verify_oop(rbx_method);
+  __ verify_oop(rcx_recv);
+  __ null_check(rcx_recv);
+
+  // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
+
+  // FIXME: profile the LambdaForm also
+  __ profile_final_call(rax);
+
+  __ jump_from_interpreted(rbx_method, rdx);
+}
+
+
 void TemplateTable::invokedynamic(int byte_no) {
   transition(vtos, vtos);
-  assert(byte_no == f1_oop, "use this argument");
+  assert(byte_no == f12_oop, "use this argument");
 
   if (!EnableInvokeDynamic) {
     // We should not encounter this bytecode if !EnableInvokeDynamic.
@@ -3138,26 +3194,23 @@
     return;
   }
 
-  prepare_invoke(rax, rbx, byte_no);
-
-  // rax: CallSite object (f1)
-  // rbx: unused (f2)
-  // rcx: receiver address
-  // rdx: flags (unused)
-
-  Register rax_callsite      = rax;
-  Register rcx_method_handle = rcx;
+  const Register rbx_method   = rbx;
+  const Register rax_callsite = rax;
+
+  prepare_invoke(byte_no, rbx_method, rax_callsite);
+
+  // rax: CallSite object (from f1)
+  // rbx: MH.linkToCallSite method (from f2)
+
+  // Note:  rax_callsite is already pushed by prepare_invoke
 
   // %%% should make a type profile for any invokedynamic that takes a ref argument
   // profile this call
   __ profile_call(rsi);
 
   __ verify_oop(rax_callsite);
-  __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx)));
-  __ null_check(rcx_method_handle);
-  __ verify_oop(rcx_method_handle);
-  __ prepare_to_jump_from_interpreted();
-  __ jump_to_method_handle_entry(rcx_method_handle, rdx);
+
+  __ jump_from_interpreted(rbx_method, rdx);
 }
 
 //----------------------------------------------------------------------------------------------------
--- a/src/cpu/x86/vm/templateTable_x86_32.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,10 +25,15 @@
 #ifndef CPU_X86_VM_TEMPLATETABLE_X86_32_HPP
 #define CPU_X86_VM_TEMPLATETABLE_X86_32_HPP
 
-  static void prepare_invoke(Register method, Register index, int byte_no);
+  static void prepare_invoke(int byte_no,
+                             Register method,         // linked method (or i-klass)
+                             Register index = noreg,  // itable index, MethodType, etc.
+                             Register recv  = noreg,  // if caller wants to see it
+                             Register flags = noreg   // if caller wants to test it
+                             );
   static void invokevirtual_helper(Register index, Register recv,
                                    Register flags);
-  static void volatile_barrier(Assembler::Membar_mask_bits order_constraint );
+  static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
 
   // Helpers
   static void index_check(Register array, Register index);
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -458,7 +458,7 @@
   const Register cache = rcx;
   const Register index = rdx;
 
-  resolve_cache_and_index(f1_oop, rax, cache, index, wide ? sizeof(u2) : sizeof(u1));
+  resolve_cache_and_index(f12_oop, rax, cache, index, wide ? sizeof(u2) : sizeof(u1));
   if (VerifyOops) {
     __ verify_oop(rax);
   }
@@ -2125,10 +2125,11 @@
   assert_different_registers(result, Rcache, index, temp);
 
   Label resolved;
-  if (byte_no == f1_oop) {
-    // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
-    // This kind of CP cache entry does not need to match the flags byte, because
+  if (byte_no == f12_oop) {
+    // We are resolved if the f1 field contains a non-null object (CallSite, MethodType, etc.)
+    // This kind of CP cache entry does not need to match bytecode_1 or bytecode_2, because
     // there is a 1-1 relation between bytecode type and CP entry type.
+    // The caller will also load a methodOop from f2.
     assert(result != noreg, ""); //else do cmpptr(Address(...), (int32_t) NULL_WORD)
     __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
     __ movptr(result, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()));
@@ -2157,6 +2158,9 @@
   case Bytecodes::_invokeinterface:
     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
     break;
+  case Bytecodes::_invokehandle:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
+    break;
   case Bytecodes::_invokedynamic:
     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
     break;
@@ -2167,7 +2171,7 @@
     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
     break;
   default:
-    ShouldNotReachHere();
+    fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
     break;
   }
   __ movl(temp, (int) bytecode());
@@ -2180,7 +2184,7 @@
   __ bind(resolved);
 }
 
-// The Rcache and index registers must be set before call
+// The cache and index registers must be set before call
 void TemplateTable::load_field_cp_cache_entry(Register obj,
                                               Register cache,
                                               Register index,
@@ -2191,17 +2195,17 @@
 
   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
   // Field offset
-  __ movptr(off, Address(cache, index, Address::times_8,
+  __ movptr(off, Address(cache, index, Address::times_ptr,
                          in_bytes(cp_base_offset +
                                   ConstantPoolCacheEntry::f2_offset())));
   // Flags
-  __ movl(flags, Address(cache, index, Address::times_8,
+  __ movl(flags, Address(cache, index, Address::times_ptr,
                          in_bytes(cp_base_offset +
                                   ConstantPoolCacheEntry::flags_offset())));
 
   // klass overwrite register
   if (is_static) {
-    __ movptr(obj, Address(cache, index, Address::times_8,
+    __ movptr(obj, Address(cache, index, Address::times_ptr,
                            in_bytes(cp_base_offset +
                                     ConstantPoolCacheEntry::f1_offset())));
   }
@@ -2222,9 +2226,10 @@
   assert_different_registers(itable_index, flags);
   assert_different_registers(itable_index, cache, index);
   // determine constant pool cache field offsets
+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
   const int method_offset = in_bytes(
     constantPoolCacheOopDesc::base_offset() +
-      (is_invokevirtual
+      ((byte_no == f2_byte)
        ? ConstantPoolCacheEntry::f2_offset()
        : ConstantPoolCacheEntry::f1_offset()));
   const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
@@ -2233,15 +2238,21 @@
   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::f2_offset());
 
-  if (byte_no == f1_oop) {
-    // Resolved f1_oop goes directly into 'method' register.
-    assert(is_invokedynamic, "");
-    resolve_cache_and_index(byte_no, method, cache, index, sizeof(u4));
+  if (byte_no == f12_oop) {
+    // Resolved f1_oop (CallSite, MethodType, etc.) goes into 'itable_index'.
+    // Resolved f2_oop (methodOop invoker) will go into 'method' (at index_offset).
+    // See ConstantPoolCacheEntry::set_dynamic_call and set_method_handle.
+    size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+    resolve_cache_and_index(byte_no, itable_index, cache, index, index_size);
+    __ movptr(method, Address(cache, index, Address::times_ptr, index_offset));
+    itable_index = noreg;  // hack to disable load below
   } else {
     resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
     __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
   }
   if (itable_index != noreg) {
+    // pick up itable index from f2 also:
+    assert(byte_no == f1_byte, "already picked up f1");
     __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
   }
   __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
@@ -2317,10 +2328,11 @@
   Label Done, notByte, notInt, notShort, notChar,
               notLong, notFloat, notObj, notDouble;
 
-  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
+  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
+  // Make sure we don't need to mask edx after the above shift
   assert(btos == 0, "change code, btos != 0");
 
-  __ andl(flags, 0x0F);
+  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
   __ jcc(Assembler::notZero, notByte);
   // btos
   __ load_signed_byte(rax, field);
@@ -2466,10 +2478,9 @@
                            Address::times_8,
                            in_bytes(cp_base_offset +
                                      ConstantPoolCacheEntry::flags_offset())));
-      __ shrl(c_rarg3, ConstantPoolCacheEntry::tosBits);
-      // Make sure we don't need to mask rcx for tosBits after the
-      // above shift
-      ConstantPoolCacheEntry::verify_tosBits();
+      __ shrl(c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
+      // Make sure we don't need to mask rcx after the above shift
+      ConstantPoolCacheEntry::verify_tos_state_shift();
       __ movptr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
       __ cmpl(c_rarg3, ltos);
       __ cmovptr(Assembler::equal,
@@ -2516,7 +2527,7 @@
 
   Label notVolatile, Done;
   __ movl(rdx, flags);
-  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
+  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
   __ andl(rdx, 0x1);
 
   // field address
@@ -2525,10 +2536,10 @@
   Label notByte, notInt, notShort, notChar,
         notLong, notFloat, notObj, notDouble;
 
-  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
+  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
 
   assert(btos == 0, "change code, btos != 0");
-  __ andl(flags, 0x0f);
+  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
   __ jcc(Assembler::notZero, notByte);
 
   // btos
@@ -2685,26 +2696,23 @@
     __ pop_ptr(rbx);                  // copy the object pointer from tos
     __ verify_oop(rbx);
     __ push_ptr(rbx);                 // put the object pointer back on tos
-    __ subptr(rsp, sizeof(jvalue));  // add space for a jvalue object
-    __ mov(c_rarg3, rsp);
-    const Address field(c_rarg3, 0);
-
+    // Save tos values before call_VM() clobbers them. Since we have
+    // to do it for every data type, we use the saved values as the
+    // jvalue object.
     switch (bytecode()) {          // load values into the jvalue object
-    case Bytecodes::_fast_aputfield: __ movq(field, rax); break;
-    case Bytecodes::_fast_lputfield: __ movq(field, rax); break;
-    case Bytecodes::_fast_iputfield: __ movl(field, rax); break;
-    case Bytecodes::_fast_bputfield: __ movb(field, rax); break;
+    case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
+    case Bytecodes::_fast_bputfield: // fall through
     case Bytecodes::_fast_sputfield: // fall through
-    case Bytecodes::_fast_cputfield: __ movw(field, rax); break;
-    case Bytecodes::_fast_fputfield: __ movflt(field, xmm0); break;
-    case Bytecodes::_fast_dputfield: __ movdbl(field, xmm0); break;
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ push_i(rax); break;
+    case Bytecodes::_fast_dputfield: __ push_d(); break;
+    case Bytecodes::_fast_fputfield: __ push_f(); break;
+    case Bytecodes::_fast_lputfield: __ push_l(rax); break;
+
     default:
       ShouldNotReachHere();
     }
-
-    // Save rax because call_VM() will clobber it, then use it for
-    // JVMTI purposes
-    __ push(rax);
+    __ mov(c_rarg3, rsp);             // points to jvalue on the stack
     // access constant pool cache entry
     __ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1);
     __ verify_oop(rbx);
@@ -2715,8 +2723,17 @@
                CAST_FROM_FN_PTR(address,
                                 InterpreterRuntime::post_field_modification),
                rbx, c_rarg2, c_rarg3);
-    __ pop(rax);     // restore lower value
-    __ addptr(rsp, sizeof(jvalue));  // release jvalue object space
+
+    switch (bytecode()) {             // restore tos values
+    case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
+    case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
+    case Bytecodes::_fast_dputfield: __ pop_d(); break;
+    case Bytecodes::_fast_fputfield: __ pop_f(); break;
+    case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
+    }
     __ bind(L2);
   }
 }
@@ -2745,7 +2762,7 @@
   //                                              Assembler::StoreStore));
 
   Label notVolatile;
-  __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
+  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
   __ andl(rdx, 0x1);
 
   // Get object from stack
@@ -2826,7 +2843,7 @@
   //   __ movl(rdx, Address(rcx, rbx, Address::times_8,
   //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
   //                                 ConstantPoolCacheEntry::flags_offset())));
-  //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
+  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
   //   __ andl(rdx, 0x1);
   // }
   __ movptr(rbx, Address(rcx, rbx, Address::times_8,
@@ -2914,7 +2931,7 @@
   //   __ movl(rdx, Address(rcx, rdx, Address::times_8,
   //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
   //                                 ConstantPoolCacheEntry::flags_offset())));
-  //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
+  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
   //   __ testl(rdx, 0x1);
   //   __ jcc(Assembler::zero, notVolatile);
   //   __ membar(Assembler::LoadLoad);
@@ -2934,19 +2951,29 @@
   ShouldNotReachHere();
 }
 
-void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) {
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method,  // linked method (or i-klass)
+                                   Register index,   // itable index, MethodType, etc.
+                                   Register recv,    // if caller wants to see it
+                                   Register flags    // if caller wants to test it
+                                   ) {
   // determine flags
-  Bytecodes::Code code = bytecode();
+  const Bytecodes::Code code = bytecode();
   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
-  const bool load_receiver      = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic);
-  const bool receiver_null_check = is_invokespecial;
-  const bool save_flags = is_invokeinterface || is_invokevirtual;
+  const bool load_receiver       = (recv  != noreg);
+  const bool save_flags          = (flags != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
+  assert(flags == noreg || flags == rdx, "");
+  assert(recv  == noreg || recv  == rcx, "");
+
   // setup registers & access constant pool cache
-  const Register recv   = rcx;
-  const Register flags  = rdx;
+  if (recv  == noreg)  recv  = rcx;
+  if (flags == noreg)  flags = rdx;
   assert_different_registers(method, index, recv, flags);
 
   // save 'interpreter return address'
@@ -2954,36 +2981,44 @@
 
   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
 
-  // load receiver if needed (note: no return address pushed yet)
+  // maybe push appendix to arguments (just before return address)
+  if (is_invokedynamic || is_invokehandle) {
+    Label L_no_push;
+    __ verify_oop(index);
+    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
+    __ jccb(Assembler::zero, L_no_push);
+    // Push the appendix as a trailing parameter.
+    // This must be done before we get the receiver,
+    // since the parameter_size includes it.
+    __ push(index);  // push appendix (MethodType, CallSite, etc.)
+    __ bind(L_no_push);
+  }
+
+  // load receiver if needed (after appendix is pushed so parameter size is correct)
+  // Note: no return address pushed yet
   if (load_receiver) {
-    assert(!is_invokedynamic, "");
     __ movl(recv, flags);
-    __ andl(recv, 0xFF);
-    Address recv_addr(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1));
+    __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
+    const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
+    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
+    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
     __ movptr(recv, recv_addr);
     __ verify_oop(recv);
   }
 
-  // do null check if needed
-  if (receiver_null_check) {
-    __ null_check(recv);
-  }
-
   if (save_flags) {
     __ movl(r13, flags);
   }
 
   // compute return type
-  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
-  // Make sure we don't need to mask flags for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
+  // Make sure we don't need to mask flags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
   // load return address
   {
-    address table_addr;
-    if (is_invokeinterface || is_invokedynamic)
-      table_addr = (address)Interpreter::return_5_addrs_by_index_table();
-    else
-      table_addr = (address)Interpreter::return_3_addrs_by_index_table();
+    const address table_addr = (is_invokeinterface || is_invokedynamic) ?
+        (address)Interpreter::return_5_addrs_by_index_table() :
+        (address)Interpreter::return_3_addrs_by_index_table();
     ExternalAddress table(table_addr);
     __ lea(rscratch1, table);
     __ movptr(flags, Address(rscratch1, flags, Address::times_ptr));
@@ -2992,7 +3027,7 @@
   // push return address
   __ push(flags);
 
-  // Restore flag field from the constant pool cache, and restore esi
+  // Restore flags value from the constant pool cache, and restore rsi
   // for later null checks.  r13 is the bytecode pointer
   if (save_flags) {
     __ movl(flags, r13);
@@ -3006,11 +3041,13 @@
                                          Register flags) {
   // Uses temporary registers rax, rdx
   assert_different_registers(index, recv, rax, rdx);
+  assert(index == rbx, "");
+  assert(recv  == rcx, "");
 
   // Test for an invoke of a final method
   Label notFinal;
   __ movl(rax, flags);
-  __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
+  __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
   __ jcc(Assembler::zero, notFinal);
 
   const Register method = index;  // method must be rbx
@@ -3018,6 +3055,7 @@
          "methodOop must be rbx for interpreter calling convention");
 
   // do the call - the index is actually the method to call
+  // that is, f2 is a vtable index if !is_vfinal, else f2 is a methodOop
   __ verify_oop(method);
 
   // It's final, need a null check here!
@@ -3033,20 +3071,13 @@
   // get receiver klass
   __ null_check(recv, oopDesc::klass_offset_in_bytes());
   __ load_klass(rax, recv);
-
   __ verify_oop(rax);
 
   // profile this call
   __ profile_virtual_call(rax, r14, rdx);
 
   // get target methodOop & entry point
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == 8,
-         "adjust the scaling in the code below");
-  __ movptr(method, Address(rax, index,
-                                 Address::times_8,
-                                 base + vtableEntry::method_offset_in_bytes()));
-  __ movptr(rdx, Address(method, methodOopDesc::interpreter_entry_offset()));
+  __ lookup_virtual_method(rax, index, method);
   __ jump_from_interpreted(method, rdx);
 }
 
@@ -3054,7 +3085,10 @@
 void TemplateTable::invokevirtual(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f2_byte, "use this argument");
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(byte_no,
+                 rbx,    // method or vtable index
+                 noreg,  // unused itable index
+                 rcx, rdx); // recv, flags
 
   // rbx: index
   // rcx: receiver
@@ -3067,7 +3101,10 @@
 void TemplateTable::invokespecial(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(byte_no, rbx, noreg,  // get f1 methodOop
+                 rcx);  // get receiver also for null check
+  __ verify_oop(rcx);
+  __ null_check(rcx);
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
@@ -3078,7 +3115,7 @@
 void TemplateTable::invokestatic(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
-  prepare_invoke(rbx, noreg, byte_no);
+  prepare_invoke(byte_no, rbx);  // get f1 methodOop
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
@@ -3094,10 +3131,11 @@
 void TemplateTable::invokeinterface(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
-  prepare_invoke(rax, rbx, byte_no);
-
-  // rax: Interface
-  // rbx: index
+  prepare_invoke(byte_no, rax, rbx,  // get f1 klassOop, f2 itable index
+                 rcx, rdx); // recv, flags
+
+  // rax: interface klass (from f1)
+  // rbx: itable index (from f2)
   // rcx: receiver
   // rdx: flags
 
@@ -3107,14 +3145,15 @@
   // another compliant java compiler.
   Label notMethod;
   __ movl(r14, rdx);
-  __ andl(r14, (1 << ConstantPoolCacheEntry::methodInterface));
+  __ andl(r14, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
   __ jcc(Assembler::zero, notMethod);
 
   invokevirtual_helper(rbx, rcx, rdx);
   __ bind(notMethod);
 
   // Get receiver klass into rdx - also a null check
-  __ restore_locals(); // restore r14
+  __ restore_locals();  // restore r14
+  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
   __ load_klass(rdx, rcx);
   __ verify_oop(rdx);
 
@@ -3129,7 +3168,7 @@
                              rbx, r13,
                              no_such_interface);
 
-  // rbx,: methodOop to call
+  // rbx: methodOop to call
   // rcx: receiver
   // Check for abstract method error
   // Note: This should be done more efficiently via a throw_abstract_method_error
@@ -3166,12 +3205,42 @@
                    InterpreterRuntime::throw_IncompatibleClassChangeError));
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
-  return;
 }
 
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f12_oop, "use this argument");
+  const Register rbx_method = rbx;  // f2
+  const Register rax_mtype  = rax;  // f1
+  const Register rcx_recv   = rcx;
+  const Register rdx_flags  = rdx;
+
+  if (!EnableInvokeDynamic) {
+    // rewriter does not generate this bytecode
+    __ should_not_reach_here();
+    return;
+  }
+
+  prepare_invoke(byte_no,
+                 rbx_method, rax_mtype,  // get f2 methodOop, f1 MethodType
+                 rcx_recv);
+  __ verify_oop(rbx_method);
+  __ verify_oop(rcx_recv);
+  __ null_check(rcx_recv);
+
+  // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
+
+  // FIXME: profile the LambdaForm also
+  __ profile_final_call(rax);
+
+  __ jump_from_interpreted(rbx_method, rdx);
+}
+
+
 void TemplateTable::invokedynamic(int byte_no) {
   transition(vtos, vtos);
-  assert(byte_no == f1_oop, "use this argument");
+  assert(byte_no == f12_oop, "use this argument");
 
   if (!EnableInvokeDynamic) {
     // We should not encounter this bytecode if !EnableInvokeDynamic.
@@ -3184,26 +3253,23 @@
     return;
   }
 
-  prepare_invoke(rax, rbx, byte_no);
-
-  // rax: CallSite object (f1)
-  // rbx: unused (f2)
-  // rcx: receiver address
-  // rdx: flags (unused)
-
-  Register rax_callsite      = rax;
-  Register rcx_method_handle = rcx;
+  const Register rbx_method   = rbx;
+  const Register rax_callsite = rax;
+
+  prepare_invoke(byte_no, rbx_method, rax_callsite);
+
+  // rax: CallSite object (from f1)
+  // rbx: MH.linkToCallSite method (from f2)
+
+  // Note:  rax_callsite is already pushed by prepare_invoke
 
   // %%% should make a type profile for any invokedynamic that takes a ref argument
   // profile this call
   __ profile_call(r13);
 
   __ verify_oop(rax_callsite);
-  __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx)));
-  __ null_check(rcx_method_handle);
-  __ verify_oop(rcx_method_handle);
-  __ prepare_to_jump_from_interpreted();
-  __ jump_to_method_handle_entry(rcx_method_handle, rdx);
+
+  __ jump_from_interpreted(rbx_method, rdx);
 }
 
 
--- a/src/cpu/x86/vm/templateTable_x86_64.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_64.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,7 +25,12 @@
 #ifndef CPU_X86_VM_TEMPLATETABLE_X86_64_HPP
 #define CPU_X86_VM_TEMPLATETABLE_X86_64_HPP
 
-  static void prepare_invoke(Register method, Register index, int byte_no);
+  static void prepare_invoke(int byte_no,
+                             Register method,         // linked method (or i-klass)
+                             Register index = noreg,  // itable index, MethodType, etc.
+                             Register recv  = noreg,  // if caller wants to see it
+                             Register flags = noreg   // if caller wants to test it
+                             );
   static void invokevirtual_helper(Register index, Register recv,
                                    Register flags);
   static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -467,6 +467,32 @@
   if (!supports_avx ()) // Drop to 0 if no AVX  support
     UseAVX = 0;
 
+#ifdef COMPILER2
+  if (UseFPUForSpilling) {
+    if (UseSSE < 2) {
+      // Only supported with SSE2+
+      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+    }
+  }
+  if (MaxVectorSize > 0) {
+    if (!is_power_of_2(MaxVectorSize)) {
+      warning("MaxVectorSize must be a power of 2");
+      FLAG_SET_DEFAULT(MaxVectorSize, 32);
+    }
+    if (MaxVectorSize > 32) {
+      FLAG_SET_DEFAULT(MaxVectorSize, 32);
+    }
+    if (MaxVectorSize > 16 && UseAVX == 0) {
+      // Only supported with AVX+
+      FLAG_SET_DEFAULT(MaxVectorSize, 16);
+    }
+    if (UseSSE < 2) {
+      // Only supported with SSE2+
+      FLAG_SET_DEFAULT(MaxVectorSize, 0);
+    }
+  }
+#endif
+
   // On new cpus instructions which update whole XMM register should be used
   // to prevent partial register stall due to dependencies on high half.
   //
@@ -536,7 +562,7 @@
         AllocatePrefetchInstr = 3;
       }
       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
-      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+      if( supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
         UseXMMForArrayCopy = true;
       }
       if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
@@ -544,6 +570,12 @@
       }
     }
 
+#ifdef COMPILER2
+    if (MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on current AMD cpus.
+      FLAG_SET_DEFAULT(MaxVectorSize, 16);
+    }
+#endif // COMPILER2
   }
 
   if( is_intel() ) { // Intel cpus specific settings
@@ -606,15 +638,6 @@
     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
   }
 
-#ifdef COMPILER2
-  if (UseFPUForSpilling) {
-    if (UseSSE < 2) {
-      // Only supported with SSE2+
-      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
-    }
-  }
-#endif
-
   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 
--- a/src/cpu/x86/vm/vmreg_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/vmreg_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,8 +48,9 @@
 
   XMMRegister xreg = ::as_XMMRegister(0);
   for ( ; i < ConcreteRegisterImpl::max_xmm ; ) {
-    regName[i++] = xreg->name();
-    regName[i++] = xreg->name();
+    for (int j = 0 ; j < 8 ; j++) {
+      regName[i++] = xreg->name();
+    }
     xreg = xreg->successor();
   }
   for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
--- a/src/cpu/x86/vm/vmreg_x86.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/vmreg_x86.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,7 @@
 }
 
 inline VMReg XMMRegisterImpl::as_VMReg() {
-  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr);
+  return VMRegImpl::as_VMReg((encoding() << 3) + ConcreteRegisterImpl::max_fpr);
 }
 
 
@@ -75,7 +75,7 @@
 inline XMMRegister VMRegImpl::as_XMMRegister() {
   assert( is_XMMRegister() && is_even(value()), "must be" );
   // Yuk
-  return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 1);
+  return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 3);
 }
 
 inline   bool VMRegImpl::is_concrete() {
--- a/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -76,8 +76,7 @@
   // get receiver klass
   address npe_addr = __ pc();
   __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
-  // compute entry offset (in words)
-  int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
+
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
@@ -93,7 +92,8 @@
   const Register method = rbx;
 
   // load methodOop and target address
-  __ movptr(method, Address(rax, entry_offset*wordSize + vtableEntry::method_offset_in_bytes()));
+  __ lookup_virtual_method(rax, vtable_index, method);
+
   if (DebugVtables) {
     Label L;
     __ cmpptr(method, (int32_t)NULL_WORD);
--- a/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -69,10 +69,6 @@
   address npe_addr = __ pc();
   __ load_klass(rax, j_rarg0);
 
-  // compute entry offset (in words)
-  int entry_offset =
-    instanceKlass::vtable_start_offset() + vtable_index * vtableEntry::size();
-
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
@@ -90,9 +86,8 @@
   // load methodOop and target address
   const Register method = rbx;
 
-  __ movptr(method, Address(rax,
-                            entry_offset * wordSize +
-                            vtableEntry::method_offset_in_bytes()));
+  __ lookup_virtual_method(rax, vtable_index, method);
+
   if (DebugVtables) {
     Label L;
     __ cmpptr(method, (int32_t)NULL_WORD);
--- a/src/cpu/x86/vm/x86.ad	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/x86.ad	Sat Oct 20 00:08:35 2012 -0700
@@ -24,6 +24,456 @@
 
 // X86 Common Architecture Description File
 
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def"  name ( register save type, C convention save type,
+//                   ideal register type, encoding );
+// Register Save Types:
+//
+// NS  = No-Save:       The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method, &
+//                      that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call:  The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method,
+//                      but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, but they do not need to be saved at call
+//                      sites.
+//
+// AS  = Always-Save:   The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
+// Word a in each register holds a Float, words ab hold a Double.
+// The whole registers are used in SSE4.2 version intrinsics,
+// array copy stubs and superword operations (see UseSSE42Intrinsics,
+// UseXMMForArrayCopy and UseSuperword flags).
+// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
+// Linux ABI:   No register preserved across function calls
+//              XMM0-XMM7 might hold parameters
+// Windows ABI: XMM6-XMM15 preserved across function calls
+//              XMM0-XMM3 might hold parameters
+
+reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
+reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
+reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
+reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
+reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
+reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
+reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
+reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
+
+reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
+reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
+reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
+reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
+reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
+reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
+reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
+reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
+
+reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
+reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
+reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
+reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
+reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
+reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
+reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
+reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
+
+reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
+reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
+reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
+reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
+reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
+reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
+reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
+reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
+
+reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
+reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
+reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
+reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
+reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
+reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
+reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
+reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
+
+reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
+reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
+reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
+reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
+reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
+reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
+reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
+reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
+
+#ifdef _WIN64
+
+reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
+reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
+reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
+reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
+reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
+reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
+reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
+reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
+
+reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
+reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
+reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
+reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
+reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
+reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
+reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
+reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
+
+reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
+reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
+reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
+reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
+reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
+reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
+reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
+reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
+
+reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
+reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
+reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
+reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
+reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
+reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
+reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
+reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
+
+reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
+reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
+reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
+reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
+reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
+reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
+reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
+reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
+
+reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
+reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
+reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
+reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
+reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
+reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
+reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
+reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
+
+reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
+reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
+reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
+reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
+reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
+reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
+reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
+reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
+
+reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
+reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
+reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
+reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
+reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
+reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
+reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
+reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
+
+reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
+reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
+reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
+reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
+reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
+reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
+reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
+reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
+
+reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
+reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
+reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
+reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
+reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
+reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
+reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
+reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
+
+#else // _WIN64
+
+reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
+reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
+reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
+reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
+reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
+reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
+reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
+reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
+
+reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
+reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
+reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
+reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
+reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
+reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
+reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
+reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
+
+#ifdef _LP64
+
+reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
+reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
+reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
+reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
+reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
+reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
+reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
+reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
+
+reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
+reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
+reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
+reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
+reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
+reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
+reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
+reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
+
+reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
+reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
+reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
+reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
+reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
+reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
+reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
+reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
+
+reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
+reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
+reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
+reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
+reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
+reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
+reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
+reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
+
+reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
+reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
+reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
+reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
+reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
+reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
+reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
+reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
+
+reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
+reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
+reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
+reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
+reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
+reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
+reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
+reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
+
+reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
+reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
+reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
+reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
+reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
+reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
+reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
+reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
+
+reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
+reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
+reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
+reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
+reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
+reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
+reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
+reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
+
+#endif // _LP64
+
+#endif // _WIN64
+
+#ifdef _LP64
+reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
+#else
+reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
+#endif // _LP64
+
+alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
+                   XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
+                   XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
+                   XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
+                   XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
+                   XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
+                   XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
+                   XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
+#ifdef _LP64
+                  ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
+                   XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
+                   XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
+                   XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
+                   XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
+                   XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
+                   XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
+                   XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
+#endif
+                   );
+
+// flags allocation class should be last.
+alloc_class chunk2(RFLAGS);
+
+// Singleton class for condition codes
+reg_class int_flags(RFLAGS);
+
+// Class for all float registers
+reg_class float_reg(XMM0,
+                    XMM1,
+                    XMM2,
+                    XMM3,
+                    XMM4,
+                    XMM5,
+                    XMM6,
+                    XMM7
+#ifdef _LP64
+                   ,XMM8,
+                    XMM9,
+                    XMM10,
+                    XMM11,
+                    XMM12,
+                    XMM13,
+                    XMM14,
+                    XMM15
+#endif
+                    );
+
+// Class for all double registers
+reg_class double_reg(XMM0,  XMM0b,
+                     XMM1,  XMM1b,
+                     XMM2,  XMM2b,
+                     XMM3,  XMM3b,
+                     XMM4,  XMM4b,
+                     XMM5,  XMM5b,
+                     XMM6,  XMM6b,
+                     XMM7,  XMM7b
+#ifdef _LP64
+                    ,XMM8,  XMM8b,
+                     XMM9,  XMM9b,
+                     XMM10, XMM10b,
+                     XMM11, XMM11b,
+                     XMM12, XMM12b,
+                     XMM13, XMM13b,
+                     XMM14, XMM14b,
+                     XMM15, XMM15b
+#endif
+                     );
+
+// Class for all 32bit vector registers
+reg_class vectors_reg(XMM0,
+                      XMM1,
+                      XMM2,
+                      XMM3,
+                      XMM4,
+                      XMM5,
+                      XMM6,
+                      XMM7
+#ifdef _LP64
+                     ,XMM8,
+                      XMM9,
+                      XMM10,
+                      XMM11,
+                      XMM12,
+                      XMM13,
+                      XMM14,
+                      XMM15
+#endif
+                      );
+
+// Class for all 64bit vector registers
+reg_class vectord_reg(XMM0,  XMM0b,
+                      XMM1,  XMM1b,
+                      XMM2,  XMM2b,
+                      XMM3,  XMM3b,
+                      XMM4,  XMM4b,
+                      XMM5,  XMM5b,
+                      XMM6,  XMM6b,
+                      XMM7,  XMM7b
+#ifdef _LP64
+                     ,XMM8,  XMM8b,
+                      XMM9,  XMM9b,
+                      XMM10, XMM10b,
+                      XMM11, XMM11b,
+                      XMM12, XMM12b,
+                      XMM13, XMM13b,
+                      XMM14, XMM14b,
+                      XMM15, XMM15b
+#endif
+                      );
+
+// Class for all 128bit vector registers
+reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
+                      XMM1,  XMM1b,  XMM1c,  XMM1d,
+                      XMM2,  XMM2b,  XMM2c,  XMM2d,
+                      XMM3,  XMM3b,  XMM3c,  XMM3d,
+                      XMM4,  XMM4b,  XMM4c,  XMM4d,
+                      XMM5,  XMM5b,  XMM5c,  XMM5d,
+                      XMM6,  XMM6b,  XMM6c,  XMM6d,
+                      XMM7,  XMM7b,  XMM7c,  XMM7d
+#ifdef _LP64
+                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,
+                      XMM9,  XMM9b,  XMM9c,  XMM9d,
+                      XMM10, XMM10b, XMM10c, XMM10d,
+                      XMM11, XMM11b, XMM11c, XMM11d,
+                      XMM12, XMM12b, XMM12c, XMM12d,
+                      XMM13, XMM13b, XMM13c, XMM13d,
+                      XMM14, XMM14b, XMM14c, XMM14d,
+                      XMM15, XMM15b, XMM15c, XMM15d
+#endif
+                      );
+
+// Class for all 256bit vector registers
+reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
+                      XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
+                      XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
+                      XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
+                      XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
+                      XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
+                      XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
+                      XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
+#ifdef _LP64
+                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
+                      XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
+                      XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
+                      XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
+                      XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
+                      XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
+                      XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
+                      XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
+#endif
+                      );
+
+%}
+
 source %{
   // Float masks come from different places depending on platform.
 #ifdef _LP64
@@ -38,6 +488,270 @@
   static address double_signflip() { return (address)double_signflip_pool; }
 #endif
 
+// Map Types to machine register types
+const int Matcher::base2reg[Type::lastype] = {
+  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
+  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+  Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
+  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+  0, 0/*abio*/,
+  Op_RegP /* Return address */, 0, /* the memories */
+  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+  0  /*bottom*/
+};
+
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode))
+    return false;
+
+  switch (opcode) {
+    case Op_PopCountI:
+    case Op_PopCountL:
+      if (!UsePopCountInstruction)
+        return false;
+    case Op_MulVI:
+      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
+        return false;
+    break;
+  }
+
+  return true;  // Per default match rules are supported.
+}
+
+// Max vector size in bytes. 0 if not supported.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  if (UseSSE < 2) return 0;
+  // SSE2 supports 128bit vectors for all types.
+  // AVX2 supports 256bit vectors for all types.
+  int size = (UseAVX > 1) ? 32 : 16;
+  // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
+  if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
+    size = 32;
+  // Use flag to limit vector size.
+  size = MIN2(size,(int)MaxVectorSize);
+  // Minimum 2 values in vector (or 4 for bytes).
+  switch (bt) {
+  case T_DOUBLE:
+  case T_LONG:
+    if (size < 16) return 0;
+  case T_FLOAT:
+  case T_INT:
+    if (size < 8) return 0;
+  case T_BOOLEAN:
+  case T_BYTE:
+  case T_CHAR:
+  case T_SHORT:
+    if (size < 4) return 0;
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return size;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+const int Matcher::min_vector_size(const BasicType bt) {
+  int max_size = max_vector_size(bt);
+  // Min size which can be loaded into vector is 4 bytes.
+  int size = (type2aelembytes(bt) == 1) ? 4 : 2;
+  return MIN2(size,max_size);
+}
+
+// Vector ideal reg corresponding to specidied size in bytes
+const int Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize >= size, "");
+  switch(size) {
+    case  4: return Op_VecS;
+    case  8: return Op_VecD;
+    case 16: return Op_VecX;
+    case 32: return Op_VecY;
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+// x86 supports misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  return !AlignVector; // can be changed by flag
+}
+
+// Helper methods for MachSpillCopyNode::implementation().
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
+  // In 64-bit VM size calculation is very complex. Emitting instructions
+  // into scratch buffer is used to get size in 64-bit VM.
+  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
+  assert(ireg == Op_VecS || // 32bit vector
+         (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
+         (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
+         "no non-adjacent vector moves" );
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    switch (ireg) {
+    case Op_VecS: // copy whole register
+    case Op_VecD:
+    case Op_VecX:
+      __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+      break;
+    case Op_VecY:
+      __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    int size = __ offset() - offset;
+#ifdef ASSERT
+    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+    assert(!do_size || size == 4, "incorrect size calculattion");
+#endif
+    return size;
+#ifndef PRODUCT
+  } else if (!do_size) {
+    switch (ireg) {
+    case Op_VecS:
+    case Op_VecD:
+    case Op_VecX:
+      st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+      break;
+    case Op_VecY:
+      st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#endif
+  }
+  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
+  return 4;
+}
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st) {
+  // In 64-bit VM size calculation is very complex. Emitting instructions
+  // into scratch buffer is used to get size in 64-bit VM.
+  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    if (is_load) {
+      switch (ireg) {
+      case Op_VecS:
+        __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      case Op_VecD:
+        __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      case Op_VecX:
+        __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      case Op_VecY:
+        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else { // store
+      switch (ireg) {
+      case Op_VecS:
+        __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      case Op_VecD:
+        __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      case Op_VecX:
+        __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      case Op_VecY:
+        __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    }
+    int size = __ offset() - offset;
+#ifdef ASSERT
+    int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+    assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
+#endif
+    return size;
+#ifndef PRODUCT
+  } else if (!do_size) {
+    if (is_load) {
+      switch (ireg) {
+      case Op_VecS:
+        st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+      case Op_VecD:
+        st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+       case Op_VecX:
+        st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+      case Op_VecY:
+        st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else { // store
+      switch (ireg) {
+      case Op_VecS:
+        st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+      case Op_VecD:
+        st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+       case Op_VecX:
+        st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+      case Op_VecY:
+        st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    }
+#endif
+  }
+  int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+  return 5+offset_size;
+}
+
+static inline jfloat replicate4_imm(int con, int width) {
+  // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
+  assert(width == 1 || width == 2, "only byte or short types here");
+  int bit_width = width * 8;
+  jint val = con;
+  val &= (1 << bit_width) - 1;  // mask off sign bits
+  while(bit_width < 32) {
+    val |= (val << bit_width);
+    bit_width <<= 1;
+  }
+  jfloat fval = *((jfloat*) &val);  // coerce to float type
+  return fval;
+}
+
+static inline jdouble replicate8_imm(int con, int width) {
+  // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
+  assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
+  int bit_width = width * 8;
+  jlong val = con;
+  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
+  while(bit_width < 64) {
+    val |= (val << bit_width);
+    bit_width <<= 1;
+  }
+  jdouble dval = *((jdouble*) &val);  // coerce to double type
+  return dval;
+}
+
 #ifndef PRODUCT
   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
     st->print("nop \t# %d bytes pad for loops and calls", _count);
@@ -103,6 +817,46 @@
 
 %}
 
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+// Vectors
+operand vecS() %{
+  constraint(ALLOC_IN_RC(vectors_reg));
+  match(VecS);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecD() %{
+  constraint(ALLOC_IN_RC(vectord_reg));
+  match(VecD);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecX() %{
+  constraint(ALLOC_IN_RC(vectorx_reg));
+  match(VecX);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecY() %{
+  constraint(ALLOC_IN_RC(vectory_reg));
+  match(VecY);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
 
 // ============================================================================
@@ -153,7 +907,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_reg(regF dst, regF src1, regF src2) %{
+instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src1 src2));
 
@@ -165,7 +919,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_mem(regF dst, regF src1, memory src2) %{
+instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src1 (LoadF src2)));
 
@@ -177,7 +931,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_imm(regF dst, regF src, immF con) %{
+instruct addF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src con));
 
@@ -224,7 +978,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_reg(regD dst, regD src1, regD src2) %{
+instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src1 src2));
 
@@ -236,7 +990,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_mem(regD dst, regD src1, memory src2) %{
+instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src1 (LoadD src2)));
 
@@ -248,7 +1002,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_imm(regD dst, regD src, immD con) %{
+instruct addD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src con));
 
@@ -295,7 +1049,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_reg(regF dst, regF src1, regF src2) %{
+instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src1 src2));
 
@@ -307,7 +1061,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_mem(regF dst, regF src1, memory src2) %{
+instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src1 (LoadF src2)));
 
@@ -319,7 +1073,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_imm(regF dst, regF src, immF con) %{
+instruct subF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src con));
 
@@ -366,7 +1120,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_reg(regD dst, regD src1, regD src2) %{
+instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src1 src2));
 
@@ -378,7 +1132,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_mem(regD dst, regD src1, memory src2) %{
+instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src1 (LoadD src2)));
 
@@ -390,7 +1144,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_imm(regD dst, regD src, immD con) %{
+instruct subD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src con));
 
@@ -437,7 +1191,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_reg(regF dst, regF src1, regF src2) %{
+instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src1 src2));
 
@@ -449,7 +1203,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_mem(regF dst, regF src1, memory src2) %{
+instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src1 (LoadF src2)));
 
@@ -461,7 +1215,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_imm(regF dst, regF src, immF con) %{
+instruct mulF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src con));
 
@@ -508,7 +1262,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_reg(regD dst, regD src1, regD src2) %{
+instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src1 src2));
 
@@ -520,7 +1274,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_mem(regD dst, regD src1, memory src2) %{
+instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src1 (LoadD src2)));
 
@@ -532,7 +1286,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_imm(regD dst, regD src, immD con) %{
+instruct mulD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src con));
 
@@ -579,7 +1333,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_reg(regF dst, regF src1, regF src2) %{
+instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src1 src2));
 
@@ -591,7 +1345,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_mem(regF dst, regF src1, memory src2) %{
+instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src1 (LoadF src2)));
 
@@ -603,7 +1357,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_imm(regF dst, regF src, immF con) %{
+instruct divF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src con));
 
@@ -650,7 +1404,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_reg(regD dst, regD src1, regD src2) %{
+instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src1 src2));
 
@@ -662,7 +1416,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_mem(regD dst, regD src1, memory src2) %{
+instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src1 (LoadD src2)));
 
@@ -674,7 +1428,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_imm(regD dst, regD src, immD con) %{
+instruct divD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src con));
 
@@ -697,14 +1451,15 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsF_reg(regF dst, regF src) %{
+instruct absF_reg_reg(regF dst, regF src) %{
   predicate(UseAVX > 0);
   match(Set dst (AbsF src));
   ins_cost(150);
   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
   ins_encode %{
+    bool vector256 = false;
     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signmask()));
+              ExternalAddress(float_signmask()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -721,15 +1476,16 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsD_reg(regD dst, regD src) %{
+instruct absD_reg_reg(regD dst, regD src) %{
   predicate(UseAVX > 0);
   match(Set dst (AbsD src));
   ins_cost(150);
   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
             "# abs double by sign masking" %}
   ins_encode %{
+    bool vector256 = false;
     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signmask()));
+              ExternalAddress(double_signmask()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -745,14 +1501,15 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegF_reg(regF dst, regF src) %{
+instruct negF_reg_reg(regF dst, regF src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegF src));
   ins_cost(150);
   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
   ins_encode %{
+    bool vector256 = false;
     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signflip()));
+              ExternalAddress(float_signflip()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -769,15 +1526,16 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegD_reg(regD dst, regD src) %{
+instruct negD_reg_reg(regD dst, regD src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegD src));
   ins_cost(150);
   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
             "# neg double by sign flipping" %}
   ins_encode %{
+    bool vector256 = false;
     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signflip()));
+              ExternalAddress(double_signflip()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -852,3 +1610,3210 @@
   ins_pipe(pipe_slow);
 %}
 
+
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load vectors (4 bytes long)
+instruct loadV4(vecS dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 4);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Load vectors (8 bytes long)
+instruct loadV8(vecD dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 8);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Load vectors (16 bytes long)
+instruct loadV16(vecX dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 16);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
+  ins_encode %{
+    __ movdqu($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Load vectors (32 bytes long)
+instruct loadV32(vecY dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 32);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
+  ins_encode %{
+    __ vmovdqu($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Store vectors
+instruct storeV4(memory mem, vecS src) %{
+  predicate(n->as_StoreVector()->memory_size() == 4);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
+  ins_encode %{
+    __ movdl($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeV8(memory mem, vecD src) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeV16(memory mem, vecX src) %{
+  predicate(n->as_StoreVector()->memory_size() == 16);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
+  ins_encode %{
+    __ movdqu($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeV32(memory mem, vecY src) %{
+  predicate(n->as_StoreVector()->memory_size() == 32);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
+  ins_encode %{
+    __ vmovdqu($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar to be vector
+instruct Repl4B(vecS dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate4B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate8B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\t! replicate16B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar immediate to be vector by loading from const table.
+instruct Repl4B_imm(vecS dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateB con));
+  format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar zero to be vector
+instruct Repl4B_zero(vecS dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateB zero));
+  format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8B_zero(vecD dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB zero));
+  format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16B_zero(vecX dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB zero));
+  format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl32B_zero(vecY dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB zero));
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate char/short (2 byte) scalar to be vector
+instruct Repl2S(vecS dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate2S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate4S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\t! replicate8S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2S_imm(vecS dst, immI con) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateS con));
+  format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate char/short (2 byte) scalar zero to be vector
+instruct Repl2S_zero(vecS dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateS zero));
+  format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S_zero(vecD dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS zero));
+  format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S_zero(vecX dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS zero));
+  format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16S_zero(vecY dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS zero));
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate integer (4 byte) scalar to be vector
+instruct Repl2I(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2I_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
+            "punpcklqdq $dst,$dst" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integer could be loaded into xmm register directly from memory.
+instruct Repl2I_mem(vecD dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "movd    $dst,$mem\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "movd    $dst,$mem\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "movd    $dst,$mem\n\t"
+            "pshufd  $dst,$dst,0x00\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate integer (4 byte) scalar zero to be vector
+instruct Repl2I_zero(vecD dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI zero));
+  format %{ "pxor    $dst,$dst\t! replicate2I" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_zero(vecX dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI zero));
+  format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8I_zero(vecY dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI zero));
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate long (8 byte) scalar to be vector
+#ifdef _LP64
+instruct Repl2L(vecX dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL src));
+  format %{ "movdq   $dst,$src\n\t"
+            "punpcklqdq $dst,$dst\t! replicate2L" %}
+  ins_encode %{
+    __ movdq($dst$$XMMRegister, $src$$Register);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L(vecY dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL src));
+  format %{ "movdq   $dst,$src\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movdq($dst$$XMMRegister, $src$$Register);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#else // _LP64
+instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "punpcklqdq $dst,$dst\t! replicate2L"%}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#endif // _LP64
+
+// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2L_imm(vecX dst, immL con) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L_imm(vecY dst, immL con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Long could be loaded into xmm register directly from memory.
+instruct Repl2L_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL (LoadL mem)));
+  format %{ "movq    $dst,$mem\n\t"
+            "punpcklqdq $dst,$dst\t! replicate2L" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL (LoadL mem)));
+  format %{ "movq    $dst,$mem\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate long (8 byte) scalar zero to be vector
+instruct Repl2L_zero(vecX dst, immL0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL zero));
+  format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4L_zero(vecY dst, immL0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL zero));
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate float (4 byte) scalar to be vector
+instruct Repl2F(vecD dst, regF src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F(vecX dst, regF src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8F(vecY dst, regF src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$src,0x00\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate float (4 byte) scalar zero to be vector
+instruct Repl2F_zero(vecD dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF zero));
+  format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F_zero(vecX dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateF zero));
+  format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8F_zero(vecY dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateF zero));
+  format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate double (8 bytes) scalar to be vector
+instruct Repl2D(vecX dst, regD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateD src));
+  format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4D(vecY dst, regD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateD src));
+  format %{ "pshufd  $dst,$src,0x44\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate double (8 byte) scalar zero to be vector
+instruct Repl2D_zero(vecX dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateD zero));
+  format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
+  ins_encode %{
+    __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4D_zero(vecY dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateD zero));
+  format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// ====================VECTOR ARITHMETIC=======================================
+
+// --------------------------------- ADD --------------------------------------
+
+// Bytes vector add
+instruct vadd4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed4B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed8B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed16B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector add
+instruct vadd2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed2S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed4S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed8S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector add
+instruct vadd2I(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVI dst src));
+  format %{ "paddd   $dst,$src\t! add packed2I" %}
+  ins_encode %{
+    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVI dst src));
+  format %{ "paddd   $dst,$src\t! add packed4I" %}
+  ins_encode %{
+    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src (LoadVector mem)));
+  format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src (LoadVector mem)));
+  format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector add
+instruct vadd2L(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVL dst src));
+  format %{ "paddq   $dst,$src\t! add packed2L" %}
+  ins_encode %{
+    __ paddq($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src1 src2));
+  format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src (LoadVector mem)));
+  format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src1 src2));
+  format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src (LoadVector mem)));
+  format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector add
+instruct vadd2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVF dst src));
+  format %{ "addps   $dst,$src\t! add packed2F" %}
+  ins_encode %{
+    __ addps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVF dst src));
+  format %{ "addps   $dst,$src\t! add packed4F" %}
+  ins_encode %{
+    __ addps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src (LoadVector mem)));
+  format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVF src (LoadVector mem)));
+  format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector add
+instruct vadd2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVD dst src));
+  format %{ "addpd   $dst,$src\t! add packed2D" %}
+  ins_encode %{
+    __ addpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src1 src2));
+  format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src (LoadVector mem)));
+  format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVD src1 src2));
+  format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVD src (LoadVector mem)));
+  format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- SUB --------------------------------------
+
+// Bytes vector sub
+instruct vsub4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed4B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed8B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed16B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector sub
+instruct vsub2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed2S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed4S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed8S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector sub
+instruct vsub2I(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVI dst src));
+  format %{ "psubd   $dst,$src\t! sub packed2I" %}
+  ins_encode %{
+    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVI dst src));
+  format %{ "psubd   $dst,$src\t! sub packed4I" %}
+  ins_encode %{
+    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src (LoadVector mem)));
+  format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src (LoadVector mem)));
+  format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector sub
+instruct vsub2L(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVL dst src));
+  format %{ "psubq   $dst,$src\t! sub packed2L" %}
+  ins_encode %{
+    __ psubq($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src1 src2));
+  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src (LoadVector mem)));
+  format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src1 src2));
+  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src (LoadVector mem)));
+  format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector sub
+instruct vsub2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVF dst src));
+  format %{ "subps   $dst,$src\t! sub packed2F" %}
+  ins_encode %{
+    __ subps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVF dst src));
+  format %{ "subps   $dst,$src\t! sub packed4F" %}
+  ins_encode %{
+    __ subps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src (LoadVector mem)));
+  format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVF src (LoadVector mem)));
+  format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector sub
+instruct vsub2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVD dst src));
+  format %{ "subpd   $dst,$src\t! sub packed2D" %}
+  ins_encode %{
+    __ subpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src1 src2));
+  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src (LoadVector mem)));
+  format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVD src1 src2));
+  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVD src (LoadVector mem)));
+  format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- MUL --------------------------------------
+
+// Shorts/Chars vector mul
+instruct vmul2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw $dst,$src\t! mul packed2S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw  $dst,$src\t! mul packed4S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw  $dst,$src\t! mul packed8S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector mul (sse4_1)
+instruct vmul2I(vecD dst, vecD src) %{
+  predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVI dst src));
+  format %{ "pmulld  $dst,$src\t! mul packed2I" %}
+  ins_encode %{
+    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I(vecX dst, vecX src) %{
+  predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI dst src));
+  format %{ "pmulld  $dst,$src\t! mul packed4I" %}
+  ins_encode %{
+    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src (LoadVector mem)));
+  format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVI src (LoadVector mem)));
+  format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector mul
+instruct vmul2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVF dst src));
+  format %{ "mulps   $dst,$src\t! mul packed2F" %}
+  ins_encode %{
+    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVF dst src));
+  format %{ "mulps   $dst,$src\t! mul packed4F" %}
+  ins_encode %{
+    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src (LoadVector mem)));
+  format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVF src (LoadVector mem)));
+  format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector mul
+instruct vmul2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVD dst src));
+  format %{ "mulpd   $dst,$src\t! mul packed2D" %}
+  ins_encode %{
+    __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src1 src2));
+  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src (LoadVector mem)));
+  format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVD src1 src2));
+  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVD src (LoadVector mem)));
+  format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- DIV --------------------------------------
+
+// Floats vector div
+instruct vdiv2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVF dst src));
+  format %{ "divps   $dst,$src\t! div packed2F" %}
+  ins_encode %{
+    __ divps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (DivVF dst src));
+  format %{ "divps   $dst,$src\t! div packed4F" %}
+  ins_encode %{
+    __ divps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src (LoadVector mem)));
+  format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (DivVF src (LoadVector mem)));
+  format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector div
+instruct vdiv2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVD dst src));
+  format %{ "divpd   $dst,$src\t! div packed2D" %}
+  ins_encode %{
+    __ divpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src1 src2));
+  format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src (LoadVector mem)));
+  format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVD src1 src2));
+  format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVD src (LoadVector mem)));
+  format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+// Shorts/Chars vector left shift
+instruct vsll2S(vecS dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector left shift
+instruct vsll2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector left shift
+instruct vsll2L(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL dst shift));
+  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL dst shift));
+  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    __ psllq($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ----------------------- LogicalRightShift -----------------------------------
+
+// Shorts/Chars vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift.
+
+// Integers vector logical right shift
+instruct vsrl2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector logical right shift
+instruct vsrl2L(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL dst shift));
+  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL dst shift));
+  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------- ArithmeticRightShift -----------------------------------
+
+// Shorts/Chars vector arithmetic right shift
+instruct vsra2S(vecS dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector arithmetic right shift
+instruct vsra2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// There are no longs vector arithmetic right shift instructions.
+
+
+// --------------------------------- AND --------------------------------------
+
+instruct vand4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src (LoadVector mem)));
+  format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src (LoadVector mem)));
+  format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- OR ---------------------------------------
+
+instruct vor4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src (LoadVector mem)));
+  format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src (LoadVector mem)));
+  format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- XOR --------------------------------------
+
+instruct vxor4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src (LoadVector mem)));
+  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV src (LoadVector mem)));
+  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
--- a/src/cpu/x86/vm/x86_32.ad	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Sat Oct 20 00:08:35 2012 -0700
@@ -74,9 +74,6 @@
 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
 
-// Special Registers
-reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
-
 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
 // allocator, and only shows up in the encodings.
 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
@@ -105,27 +102,6 @@
 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 
-// XMM registers.  128-bit registers or 4 words each, labeled a-d.
-// Word a in each register holds a Float, words ab hold a Double.
-// We currently do not use the SIMD capabilities, so registers cd
-// are unused at the moment.
-reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
-reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
-reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
-reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
-reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
-reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
-reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
-reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
-reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
-reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
-reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
-reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
-reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
-reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
-
 // Specify priority of register selection within phases of register
 // allocation.  Highest priority is first.  A useful heuristic is to
 // give registers a low priority when they are required by machine
@@ -138,15 +114,6 @@
                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
                     FPR6L, FPR6H, FPR7L, FPR7H );
 
-alloc_class chunk1( XMM0a, XMM0b,
-                    XMM1a, XMM1b,
-                    XMM2a, XMM2b,
-                    XMM3a, XMM3b,
-                    XMM4a, XMM4b,
-                    XMM5a, XMM5b,
-                    XMM6a, XMM6b,
-                    XMM7a, XMM7b, EFLAGS);
-
 
 //----------Architecture Description Register Classes--------------------------
 // Several register classes are automatically defined based upon information in
@@ -159,12 +126,12 @@
 // Class for all registers
 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 // Class for general registers
-reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
+reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 // Class for general registers which may be used for implicit null checks on win95
 // Also safe for use by tailjump. We don't want to allocate in rbp,
-reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
+reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 // Class of "X" registers
-reg_class x_reg(EBX, ECX, EDX, EAX);
+reg_class int_x_reg(EBX, ECX, EDX, EAX);
 // Class of registers that can appear in an address with no offset.
 // EBP and ESP require an extra instruction byte for zero offset.
 // Used in fast-unlock
@@ -193,8 +160,6 @@
 reg_class sp_reg(ESP);
 // Singleton class for instruction pointer
 // reg_class ip_reg(EIP);
-// Singleton class for condition codes
-reg_class int_flags(EFLAGS);
 // Class of integer register pairs
 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 // Class of integer register pairs that aligns with calling convention
@@ -206,29 +171,18 @@
 // Floating point registers.  Notice FPR0 is not a choice.
 // FPR0 is not ever allocated; we use clever encodings to fake
 // a 2-address instructions out of Intels FP stack.
-reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
-
-// make a register class for SSE registers
-reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
-
-// make a double register class for SSE2 registers
-reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
-                  XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
-
-reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
-                   FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
-                   FPR7L,FPR7H );
-
-reg_class flt_reg0( FPR1L );
-reg_class dbl_reg0( FPR1L,FPR1H );
-reg_class dbl_reg1( FPR2L,FPR2H );
-reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
-                       FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
-
-// XMM6 and XMM7 could be used as temporary registers for long, float and
-// double values for SSE2.
-reg_class xdb_reg6( XMM6a,XMM6b );
-reg_class xdb_reg7( XMM7a,XMM7b );
+reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
+
+reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
+                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
+                      FPR7L,FPR7H );
+
+reg_class fp_flt_reg0( FPR1L );
+reg_class fp_dbl_reg0( FPR1L,FPR1H );
+reg_class fp_dbl_reg1( FPR2L,FPR2H );
+reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
+                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
+
 %}
 
 
@@ -412,7 +366,7 @@
   }
 }
 
-   // eRegI ereg, memory mem) %{    // emit_reg_mem
+   // rRegI ereg, memory mem) %{    // emit_reg_mem
 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
   // There is no index & no scale, use form without SIB byte
   if ((index == 0x4) &&
@@ -787,7 +741,7 @@
 #endif
   }
   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
-  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
+  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   return size+5+offset_size;
 }
 
@@ -821,7 +775,7 @@
     }
 #endif
   }
-  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
+  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
   // Only MOVAPS SSE prefix uses 1 byte.
   int sz = 4;
   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
@@ -903,6 +857,108 @@
   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 }
 
+// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                          int src_hi, int dst_hi, uint ireg, outputStream* st);
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st);
+
+static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
+                                     int dst_offset, uint ireg, outputStream* st) {
+  int calc_size = 0;
+  int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
+  int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
+  switch (ireg) {
+  case Op_VecS:
+    calc_size = 3+src_offset_size + 3+dst_offset_size;
+    break;
+  case Op_VecD:
+    calc_size = 3+src_offset_size + 3+dst_offset_size;
+    src_offset += 4;
+    dst_offset += 4;
+    src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
+    dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
+    calc_size += 3+src_offset_size + 3+dst_offset_size;
+    break;
+  case Op_VecX:
+    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
+    break;
+  case Op_VecY:
+    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    switch (ireg) {
+    case Op_VecS:
+      __ pushl(Address(rsp, src_offset));
+      __ popl (Address(rsp, dst_offset));
+      break;
+    case Op_VecD:
+      __ pushl(Address(rsp, src_offset));
+      __ popl (Address(rsp, dst_offset));
+      __ pushl(Address(rsp, src_offset+4));
+      __ popl (Address(rsp, dst_offset+4));
+      break;
+    case Op_VecX:
+      __ movdqu(Address(rsp, -16), xmm0);
+      __ movdqu(xmm0, Address(rsp, src_offset));
+      __ movdqu(Address(rsp, dst_offset), xmm0);
+      __ movdqu(xmm0, Address(rsp, -16));
+      break;
+    case Op_VecY:
+      __ vmovdqu(Address(rsp, -32), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, src_offset));
+      __ vmovdqu(Address(rsp, dst_offset), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, -32));
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    int size = __ offset() - offset;
+    assert(size == calc_size, "incorrect size calculattion");
+    return size;
+#ifndef PRODUCT
+  } else if (!do_size) {
+    switch (ireg) {
+    case Op_VecS:
+      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
+                "popl    [rsp + #%d]",
+                src_offset, dst_offset);
+      break;
+    case Op_VecD:
+      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
+                "popq    [rsp + #%d]\n\t"
+                "pushl   [rsp + #%d]\n\t"
+                "popq    [rsp + #%d]",
+                src_offset, dst_offset, src_offset+4, dst_offset+4);
+      break;
+     case Op_VecX:
+      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
+                "movdqu  xmm0, [rsp + #%d]\n\t"
+                "movdqu  [rsp + #%d], xmm0\n\t"
+                "movdqu  xmm0, [rsp - #16]",
+                src_offset, dst_offset);
+      break;
+    case Op_VecY:
+      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
+                "vmovdqu xmm0, [rsp + #%d]\n\t"
+                "vmovdqu [rsp + #%d], xmm0\n\t"
+                "vmovdqu xmm0, [rsp - #32]",
+                src_offset, dst_offset);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#endif
+  }
+  return calc_size;
+}
+
 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
   // Get registers to move
   OptoReg::Name src_second = ra_->get_reg_second(in(1));
@@ -923,6 +979,29 @@
   if( src_first == dst_first && src_second == dst_second )
     return size;            // Self copy, no move
 
+  if (bottom_type()->isa_vect() != NULL) {
+    uint ireg = ideal_reg();
+    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
+    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
+    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
+    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
+      // mem -> mem
+      int src_offset = ra_->reg2offset(src_first);
+      int dst_offset = ra_->reg2offset(dst_first);
+      return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
+    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
+      return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
+    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
+      int stack_offset = ra_->reg2offset(dst_first);
+      return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
+    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
+      int stack_offset = ra_->reg2offset(src_first);
+      return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
+    } else {
+      ShouldNotReachHere();
+    }
+  }
+
   // --------------------------------------
   // Check for mem-mem move.  push/pop to move.
   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
@@ -1288,14 +1367,6 @@
   return offset;
 }
 
-
-const bool Matcher::match_rule_supported(int opcode) {
-  if (!has_match_rule(opcode))
-    return false;
-
-  return true;  // Per default match rules are supported.
-}
-
 int Matcher::regnum_to_fpu_offset(int regnum) {
   return regnum - 32; // The FP registers are in the second chunk
 }
@@ -1305,16 +1376,6 @@
   return true;
 }
 
-// Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
-  return UseSSE >= 2 ? 8 : 0;
-}
-
-// Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
-  return Op_RegD;
-}
-
 // Is this branch offset short enough that a short branch can be used?
 //
 // NOTE: If the platform does not provide any short branch variants, then
@@ -1444,7 +1505,7 @@
 // arguments in those registers not be available to the callee.
 bool Matcher::can_be_java_arg( int reg ) {
   if(  reg == ECX_num   || reg == EDX_num   ) return true;
-  if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
+  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
   return false;
 }
@@ -1557,16 +1618,16 @@
     emit_opcode(cbuf,0x66);
   %}
 
-  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
+  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
+  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
     emit_opcode(cbuf,$opcode$$constant);
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class mov_r32_imm0( eRegI dst ) %{
+  enc_class mov_r32_imm0( rRegI dst ) %{
     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
   %}
@@ -1613,7 +1674,7 @@
   %}
 
   // Dense encoding for older common ops
-  enc_class Opc_plus(immI opcode, eRegI reg) %{
+  enc_class Opc_plus(immI opcode, rRegI reg) %{
     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
   %}
 
@@ -1629,7 +1690,7 @@
     }
   %}
 
-  enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
+  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
     // Emit primary opcode and set sign-extend bit
     // Check for 8-bit immediate, and set sign extend bit in opcode
     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
@@ -1674,7 +1735,7 @@
     else                               emit_d32(cbuf,con);
   %}
 
-  enc_class OpcSReg (eRegI dst) %{    // BSWAP
+  enc_class OpcSReg (rRegI dst) %{    // BSWAP
     emit_cc(cbuf, $secondary, $dst$$reg );
   %}
 
@@ -1692,7 +1753,7 @@
     emit_rm(cbuf, 0x3, destlo, desthi);
   %}
 
-  enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
+  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
   %}
 
@@ -1883,20 +1944,20 @@
 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 //   %}
 
-  enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
+  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
     $$$emit8$primary;
     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
     $$$emit8$shift$$constant;
   %}
 
-  enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
+  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
     // Load immediate does not have a zero or sign extended version
     // for 8-bit immediates
     emit_opcode(cbuf, 0xB8 + $dst$$reg);
     $$$emit32$src$$constant;
   %}
 
-  enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
+  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
     // Load immediate does not have a zero or sign extended version
     // for 8-bit immediates
     emit_opcode(cbuf, $primary + $dst$$reg);
@@ -1935,15 +1996,15 @@
 
 
   // Encode a reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_Copy( eRegI dst, eRegI src ) %{
+  enc_class enc_Copy( rRegI dst, rRegI src ) %{
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
   %}
 
-  enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
+  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
   %}
 
-  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
+  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
@@ -1965,7 +2026,7 @@
     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
   %}
 
-  enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
+  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
   %}
 
@@ -2060,7 +2121,7 @@
     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
   %}
 
-  enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
+  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
     int reg_encoding = $ereg$$reg;
     int base  = $mem$$base;
     int index = $mem$$index;
@@ -2124,7 +2185,7 @@
 
   // Clone of RegMem but accepts an extra parameter to access each
   // half of a double in memory; it never needs relocation info.
-  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
+  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
     emit_opcode(cbuf,$opcode$$constant);
     int reg_encoding = $rm_reg$$reg;
     int base     = $mem$$base;
@@ -2160,7 +2221,7 @@
     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
+  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
     int reg_encoding = $dst$$reg;
     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
     int index        = 0x04;            // 0x04 indicates no index
@@ -2170,7 +2231,7 @@
     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
+  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
     // Compare dst,src
     emit_opcode(cbuf,0x3B);
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
@@ -2182,7 +2243,7 @@
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
+  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
     // Compare dst,src
     emit_opcode(cbuf,0x3B);
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
@@ -2213,7 +2274,7 @@
     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class neg_reg(eRegI dst) %{
+  enc_class neg_reg(rRegI dst) %{
     // NEG $dst
     emit_opcode(cbuf,0xF7);
     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
@@ -2243,7 +2304,7 @@
     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
   %}
 
-  enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
+  enc_class enc_cmpLTP_mem(rRegI p, rRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
     int tmpReg = $tmp$$reg;
 
     // SUB $p,$q
@@ -2382,12 +2443,12 @@
   %}
 
   // Special case for moving an integer register to a stack slot.
-  enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
+  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
   %}
 
   // Special case for moving a register to a stack slot.
-  enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
+  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
     // Opcode already emitted
     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
@@ -2528,45 +2589,6 @@
     __ fld_d(Address(rsp, 0));
   %}
 
-  // Compute X^Y using Intel's fast hardware instructions, if possible.
-  // Otherwise return a NaN.
-  enc_class pow_exp_core_encoding %{
-    // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
-    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
-    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
-    emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
-    emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
-    emit_opcode(cbuf,0x1C);
-    emit_d8(cbuf,0x24);
-    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
-    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
-    emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
-    emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
-    encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
-    emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
-    emit_rm(cbuf, 0x3, 0x0, ECX_enc);
-    emit_d32(cbuf,0xFFFFF800);
-    emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
-    emit_rm(cbuf, 0x3, 0x0, EAX_enc);
-    emit_d32(cbuf,1023);
-    emit_opcode(cbuf,0x8B);                          // mov rbx,eax
-    emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
-    emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
-    emit_rm(cbuf,0x3,0x4,EAX_enc);
-    emit_d8(cbuf,20);
-    emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
-    emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
-    emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
-    emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
-    emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
-    encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
-    emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-    emit_d32(cbuf,0);
-    emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
-    encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
-  %}
-
   enc_class Push_Result_Mod_DPR( regDPR src) %{
     if ($src$$reg != FPR1L_enc) {
       // fincstp
@@ -2671,7 +2693,7 @@
 // equal_result    = 0;
 // nan_result      = -1;
 
-  enc_class CmpF_Result(eRegI dst) %{
+  enc_class CmpF_Result(rRegI dst) %{
     // fnstsw_ax();
     emit_opcode( cbuf, 0xDF);
     emit_opcode( cbuf, 0xE0);
@@ -2716,7 +2738,7 @@
 // done:
   %}
 
-  enc_class convert_int_long( regL dst, eRegI src ) %{
+  enc_class convert_int_long( regL dst, rRegI src ) %{
     // mov $dst.lo,$src
     int dst_encoding = $dst$$reg;
     int src_encoding = $src$$reg;
@@ -2785,7 +2807,7 @@
     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
   %}
 
-  enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
+  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
     // Basic idea: lo(result) = lo(x_lo * y_lo)
     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
     // MOV    $tmp,$src.lo
@@ -2811,7 +2833,7 @@
     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
   %}
 
-  enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
+  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
     // Basic idea: lo(result) = lo(src * y_lo)
     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
     // IMUL   $tmp,EDX,$src
@@ -2867,7 +2889,7 @@
     emit_d8(cbuf, 4*4);
   %}
 
-  enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
+  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
     // MOV   $tmp,$src.lo
     emit_opcode(cbuf, 0x8B);
     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
@@ -2888,7 +2910,7 @@
     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
   %}
 
-  enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
+  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
     emit_opcode( cbuf, 0x3B );
     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
@@ -2900,7 +2922,7 @@
     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
   %}
 
-  enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
+  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
     // XOR    $tmp,$tmp
     emit_opcode(cbuf,0x33);  // XOR
     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
@@ -3793,9 +3815,9 @@
     // in SSE2+ mode we want to keep the FPU stack clean so pretend
     // that C functions return float and double results in XMM0.
     if( ideal_reg == Op_RegD && UseSSE>=2 )
-      return OptoRegPair(XMM0b_num,XMM0a_num);
+      return OptoRegPair(XMM0b_num,XMM0_num);
     if( ideal_reg == Op_RegF && UseSSE>=2 )
-      return OptoRegPair(OptoReg::Bad,XMM0a_num);
+      return OptoRegPair(OptoReg::Bad,XMM0_num);
 
     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
   %}
@@ -3806,9 +3828,9 @@
     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
     if( ideal_reg == Op_RegD && UseSSE>=2 )
-      return OptoRegPair(XMM0b_num,XMM0a_num);
+      return OptoRegPair(XMM0b_num,XMM0_num);
     if( ideal_reg == Op_RegF && UseSSE>=1 )
-      return OptoRegPair(OptoReg::Bad,XMM0a_num);
+      return OptoRegPair(OptoReg::Bad,XMM0_num);
     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
   %}
 
@@ -4178,8 +4200,8 @@
 
 // Register Operands
 // Integer Register
-operand eRegI() %{
-  constraint(ALLOC_IN_RC(e_reg));
+operand rRegI() %{
+  constraint(ALLOC_IN_RC(int_reg));
   match(RegI);
   match(xRegI);
   match(eAXRegI);
@@ -4194,8 +4216,8 @@
 %}
 
 // Subset of Integer Register
-operand xRegI(eRegI reg) %{
-  constraint(ALLOC_IN_RC(x_reg));
+operand xRegI(rRegI reg) %{
+  constraint(ALLOC_IN_RC(int_x_reg));
   match(reg);
   match(eAXRegI);
   match(eBXRegI);
@@ -4210,7 +4232,7 @@
 operand eAXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(eax_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EAX" %}
   interface(REG_INTER);
@@ -4220,7 +4242,7 @@
 operand eBXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(ebx_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EBX" %}
   interface(REG_INTER);
@@ -4229,7 +4251,7 @@
 operand eCXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(ecx_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "ECX" %}
   interface(REG_INTER);
@@ -4238,7 +4260,7 @@
 operand eDXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(edx_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EDX" %}
   interface(REG_INTER);
@@ -4247,7 +4269,7 @@
 operand eDIRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(edi_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EDI" %}
   interface(REG_INTER);
@@ -4294,7 +4316,7 @@
 operand eSIRegI(xRegI reg) %{
    constraint(ALLOC_IN_RC(esi_reg));
    match(reg);
-   match(eRegI);
+   match(rRegI);
 
    format %{ "ESI" %}
    interface(REG_INTER);
@@ -4315,7 +4337,7 @@
 %}
 
 operand eRegP() %{
-  constraint(ALLOC_IN_RC(e_reg));
+  constraint(ALLOC_IN_RC(int_reg));
   match(RegP);
   match(eAXRegP);
   match(eBXRegP);
@@ -4328,7 +4350,7 @@
 
 // On windows95, EBP is not safe to use for implicit null tests.
 operand eRegP_no_EBP() %{
-  constraint(ALLOC_IN_RC(e_reg_no_rbp));
+  constraint(ALLOC_IN_RC(int_reg_no_rbp));
   match(RegP);
   match(eAXRegP);
   match(eBXRegP);
@@ -4508,7 +4530,7 @@
 // Float register operands
 operand regDPR() %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_reg));
+  constraint(ALLOC_IN_RC(fp_dbl_reg));
   match(RegD);
   match(regDPR1);
   match(regDPR2);
@@ -4518,7 +4540,7 @@
 
 operand regDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_reg0));
+  constraint(ALLOC_IN_RC(fp_dbl_reg0));
   match(reg);
   format %{ "FPR1" %}
   interface(REG_INTER);
@@ -4526,7 +4548,7 @@
 
 operand regDPR2(regDPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_reg1));
+  constraint(ALLOC_IN_RC(fp_dbl_reg1));
   match(reg);
   format %{ "FPR2" %}
   interface(REG_INTER);
@@ -4534,45 +4556,16 @@
 
 operand regnotDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_notreg0));
+  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
   match(reg);
   format %{ %}
   interface(REG_INTER);
 %}
 
-// XMM Double register operands
-operand regD() %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(xdb_reg));
-  match(RegD);
-  match(regD6);
-  match(regD7);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// XMM6 double register operands
-operand regD6(regD reg) %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(xdb_reg6));
-  match(reg);
-  format %{ "XMM6" %}
-  interface(REG_INTER);
-%}
-
-// XMM7 double register operands
-operand regD7(regD reg) %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(xdb_reg7));
-  match(reg);
-  format %{ "XMM7" %}
-  interface(REG_INTER);
-%}
-
 // Float register operands
 operand regFPR() %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(flt_reg));
+  constraint(ALLOC_IN_RC(fp_flt_reg));
   match(RegF);
   match(regFPR1);
   format %{ %}
@@ -4582,21 +4575,30 @@
 // Float register operands
 operand regFPR1(regFPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(flt_reg0));
+  constraint(ALLOC_IN_RC(fp_flt_reg0));
   match(reg);
   format %{ "FPR1" %}
   interface(REG_INTER);
 %}
 
-// XMM register operands
+// XMM Float register operands
 operand regF() %{
   predicate( UseSSE>=1 );
-  constraint(ALLOC_IN_RC(xmm_reg));
+  constraint(ALLOC_IN_RC(float_reg));
   match(RegF);
   format %{ %}
   interface(REG_INTER);
 %}
 
+// XMM Double register operands
+operand regD() %{
+  predicate( UseSSE>=2 );
+  constraint(ALLOC_IN_RC(double_reg));
+  match(RegD);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 
 //----------Memory Operands----------------------------------------------------
 // Direct Memory Operand
@@ -4614,7 +4616,7 @@
 
 // Indirect Memory Operand
 operand indirect(eRegP reg) %{
-  constraint(ALLOC_IN_RC(e_reg));
+  constraint(ALLOC_IN_RC(int_reg));
   match(reg);
 
   format %{ "[$reg]" %}
@@ -4653,7 +4655,7 @@
 %}
 
 // Indirect Memory Plus Long Offset Operand
-operand indOffset32X(eRegI reg, immP off) %{
+operand indOffset32X(rRegI reg, immP off) %{
   match(AddP off reg);
 
   format %{ "[$reg + $off]" %}
@@ -4666,7 +4668,7 @@
 %}
 
 // Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
+operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
   match(AddP (AddP reg ireg) off);
 
   op_cost(10);
@@ -4680,7 +4682,7 @@
 %}
 
 // Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndex(eRegP reg, eRegI ireg) %{
+operand indIndex(eRegP reg, rRegI ireg) %{
   match(AddP reg ireg);
 
   op_cost(10);
@@ -4698,7 +4700,7 @@
 // // -------------------------------------------------------------------------
 // // Scaled Memory Operands
 // // Indirect Memory Times Scale Plus Offset Operand
-// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
+// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 //   match(AddP off (LShiftI ireg scale));
 //
 //   op_cost(10);
@@ -4712,7 +4714,7 @@
 // %}
 
 // Indirect Memory Times Scale Plus Index Register
-operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
+operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
   match(AddP reg (LShiftI ireg scale));
 
   op_cost(10);
@@ -4726,7 +4728,7 @@
 %}
 
 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
+operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
   match(AddP (AddP reg (LShiftI ireg scale)) off);
 
   op_cost(10);
@@ -4854,7 +4856,7 @@
 // Indirect Memory Operand
 operand indirect_win95_safe(eRegP_no_EBP reg)
 %{
-  constraint(ALLOC_IN_RC(e_reg));
+  constraint(ALLOC_IN_RC(int_reg));
   match(reg);
 
   op_cost(100);
@@ -4898,7 +4900,7 @@
 %}
 
 // Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
+operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
 %{
   match(AddP (AddP reg ireg) off);
 
@@ -4913,7 +4915,7 @@
 %}
 
 // Indirect Memory Times Scale Plus Index Register
-operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
+operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
 %{
   match(AddP reg (LShiftI ireg scale));
 
@@ -4928,7 +4930,7 @@
 %}
 
 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
+operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
 %{
   match(AddP (AddP reg (LShiftI ireg scale)) off);
 
@@ -5117,7 +5119,7 @@
 //   Or: _mem if it requires the big decoder and a memory unit.
 
 // Integer ALU reg operation
-pipe_class ialu_reg(eRegI dst) %{
+pipe_class ialu_reg(rRegI dst) %{
     single_instruction;
     dst    : S4(write);
     dst    : S3(read);
@@ -5135,7 +5137,7 @@
 %}
 
 // Integer ALU reg operation using big decoder
-pipe_class ialu_reg_fat(eRegI dst) %{
+pipe_class ialu_reg_fat(rRegI dst) %{
     single_instruction;
     dst    : S4(write);
     dst    : S3(read);
@@ -5153,7 +5155,7 @@
 %}
 
 // Integer ALU reg-reg operation
-pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
+pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5171,7 +5173,7 @@
 %}
 
 // Integer ALU reg-reg operation
-pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
+pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5189,7 +5191,7 @@
 %}
 
 // Integer ALU reg-mem operation
-pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
+pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
     single_instruction;
     dst    : S5(write);
     mem    : S3(read);
@@ -5218,7 +5220,7 @@
 %}
 
 // Integer Store to Memory
-pipe_class ialu_mem_reg(memory mem, eRegI src) %{
+pipe_class ialu_mem_reg(memory mem, rRegI src) %{
     single_instruction;
     mem    : S3(read);
     src    : S5(read);
@@ -5247,7 +5249,7 @@
 %}
 
 // Integer ALU0 reg-reg operation
-pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
+pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5256,7 +5258,7 @@
 %}
 
 // Integer ALU0 reg-mem operation
-pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
+pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
     single_instruction;
     dst    : S5(write);
     mem    : S3(read);
@@ -5266,7 +5268,7 @@
 %}
 
 // Integer ALU reg-reg operation
-pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
+pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
     single_instruction;
     cr     : S4(write);
     src1   : S3(read);
@@ -5276,7 +5278,7 @@
 %}
 
 // Integer ALU reg-imm operation
-pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
+pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
     single_instruction;
     cr     : S4(write);
     src1   : S3(read);
@@ -5285,7 +5287,7 @@
 %}
 
 // Integer ALU reg-mem operation
-pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
+pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
     single_instruction;
     cr     : S4(write);
     src1   : S3(read);
@@ -5296,7 +5298,7 @@
 %}
 
 // Conditional move reg-reg
-pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
+pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
     instruction_count(4);
     y      : S4(read);
     q      : S3(read);
@@ -5305,7 +5307,7 @@
 %}
 
 // Conditional move reg-reg
-pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
+pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5314,7 +5316,7 @@
 %}
 
 // Conditional move reg-mem
-pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
+pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5565,7 +5567,7 @@
 //               in the encode section of the architecture description.
 
 //----------BSWAP-Instruction--------------------------------------------------
-instruct bytes_reverse_int(eRegI dst) %{
+instruct bytes_reverse_int(rRegI dst) %{
   match(Set dst (ReverseBytesI dst));
 
   format %{ "BSWAP  $dst" %}
@@ -5586,7 +5588,7 @@
   ins_pipe( ialu_reg_reg);
 %}
 
-instruct bytes_reverse_unsigned_short(eRegI dst, eFlagsReg cr) %{
+instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
   match(Set dst (ReverseBytesUS dst));
   effect(KILL cr);
 
@@ -5599,7 +5601,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct bytes_reverse_short(eRegI dst, eFlagsReg cr) %{
+instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
   match(Set dst (ReverseBytesS dst));
   effect(KILL cr);
 
@@ -5615,7 +5617,7 @@
 
 //---------- Zeros Count Instructions ------------------------------------------
 
-instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
   predicate(UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosI src));
   effect(KILL cr);
@@ -5627,7 +5629,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
   predicate(!UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosI src));
   effect(KILL cr);
@@ -5652,7 +5654,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
   predicate(UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosL src));
   effect(TEMP dst, KILL cr);
@@ -5675,7 +5677,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
   predicate(!UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosL src));
   effect(TEMP dst, KILL cr);
@@ -5711,7 +5713,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (CountTrailingZerosI src));
   effect(KILL cr);
 
@@ -5730,7 +5732,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
   match(Set dst (CountTrailingZerosL src));
   effect(TEMP dst, KILL cr);
 
@@ -5762,7 +5764,7 @@
 
 //---------- Population Count Instructions -------------------------------------
 
-instruct popCountI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI src));
   effect(KILL cr);
@@ -5774,7 +5776,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct popCountI_mem(eRegI dst, memory mem, eFlagsReg cr) %{
+instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI (LoadI mem)));
   effect(KILL cr);
@@ -5787,7 +5789,7 @@
 %}
 
 // Note: Long.bitCount(long) returns an int.
-instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountL src));
   effect(KILL cr, TEMP tmp, TEMP dst);
@@ -5804,7 +5806,7 @@
 %}
 
 // Note: Long.bitCount(long) returns an int.
-instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
+instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountL (LoadL mem)));
   effect(KILL cr, TEMP tmp, TEMP dst);
@@ -5908,7 +5910,7 @@
 %}
 
 // Load Short (16bit signed)
-instruct loadS(eRegI dst, memory mem) %{
+instruct loadS(rRegI dst, memory mem) %{
   match(Set dst (LoadS mem));
 
   ins_cost(125);
@@ -5922,7 +5924,7 @@
 %}
 
 // Load Short (16 bit signed) to Byte (8 bit signed)
-instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 
   ins_cost(125);
@@ -5953,7 +5955,7 @@
 %}
 
 // Load Unsigned Short/Char (16bit unsigned)
-instruct loadUS(eRegI dst, memory mem) %{
+instruct loadUS(rRegI dst, memory mem) %{
   match(Set dst (LoadUS mem));
 
   ins_cost(125);
@@ -5967,7 +5969,7 @@
 %}
 
 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
-instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 
   ins_cost(125);
@@ -6028,7 +6030,7 @@
 %}
 
 // Load Integer
-instruct loadI(eRegI dst, memory mem) %{
+instruct loadI(rRegI dst, memory mem) %{
   match(Set dst (LoadI mem));
 
   ins_cost(125);
@@ -6042,7 +6044,7 @@
 %}
 
 // Load Integer (32 bit signed) to Byte (8 bit signed)
-instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 
   ins_cost(125);
@@ -6054,7 +6056,7 @@
 %}
 
 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
-instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
+instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
   match(Set dst (AndI (LoadI mem) mask));
 
   ins_cost(125);
@@ -6066,7 +6068,7 @@
 %}
 
 // Load Integer (32 bit signed) to Short (16 bit signed)
-instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
+instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 
   ins_cost(125);
@@ -6078,7 +6080,7 @@
 %}
 
 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
-instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
+instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
   match(Set dst (AndI (LoadI mem) mask));
 
   ins_cost(125);
@@ -6239,7 +6241,7 @@
 %}
 
 // Load Range
-instruct loadRange(eRegI dst, memory mem) %{
+instruct loadRange(rRegI dst, memory mem) %{
   match(Set dst (LoadRange mem));
 
   ins_cost(125);
@@ -6336,66 +6338,6 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load8B mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Short to XMM register
-instruct loadA4S(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load4S mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Char to XMM register
-instruct loadA4C(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load4C mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Integer to XMM register
-instruct load2IU(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load2I mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Single to XMM
-instruct loadA2F(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load2F mem));
-  ins_cost(145);
-  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Load Effective Address
 instruct leaP8(eRegP dst, indOffset8 mem) %{
   match(Set dst mem);
@@ -6448,7 +6390,7 @@
 %}
 
 // Load Constant
-instruct loadConI(eRegI dst, immI src) %{
+instruct loadConI(rRegI dst, immI src) %{
   match(Set dst src);
 
   format %{ "MOV    $dst,$src" %}
@@ -6457,7 +6399,7 @@
 %}
 
 // Load Constant zero
-instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
+instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
   match(Set dst src);
   effect(KILL cr);
 
@@ -6625,7 +6567,7 @@
 %}
 
 // Load Stack Slot
-instruct loadSSI(eRegI dst, stackSlotI src) %{
+instruct loadSSI(rRegI dst, stackSlotI src) %{
   match(Set dst src);
   ins_cost(125);
 
@@ -6852,7 +6794,7 @@
 %}
 
 // Store Char/Short
-instruct storeC(memory mem, eRegI src) %{
+instruct storeC(memory mem, rRegI src) %{
   match(Set mem (StoreC mem src));
 
   ins_cost(125);
@@ -6863,7 +6805,7 @@
 %}
 
 // Store Integer
-instruct storeI(memory mem, eRegI src) %{
+instruct storeI(memory mem, rRegI src) %{
   match(Set mem (StoreI mem src));
 
   ins_cost(125);
@@ -7007,42 +6949,6 @@
   ins_pipe( ialu_mem_imm );
 %}
 
-// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store8B mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store4C mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store2I mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store CMS card-mark Immediate
 instruct storeImmCM(memory mem, immI8 src) %{
   match(Set mem (StoreCM mem src));
@@ -7104,18 +7010,6 @@
   ins_pipe( pipe_slow );
 %}
 
-// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store2F mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store Float
 instruct storeFPR( memory mem, regFPR1 src) %{
   predicate(UseSSE==0);
@@ -7177,7 +7071,7 @@
 %}
 
 // Store Integer to stack slot
-instruct storeSSI(stackSlotI dst, eRegI src) %{
+instruct storeSSI(stackSlotI dst, rRegI src) %{
   match(Set dst src);
 
   ins_cost(100);
@@ -7302,7 +7196,7 @@
   ins_pipe(empty);
 %}
 
-instruct castP2X(eRegI dst, eRegP src ) %{
+instruct castP2X(rRegI dst, eRegP src ) %{
   match(Set dst (CastP2X src));
   ins_cost(50);
   format %{ "MOV    $dst, $src\t# CastP2X" %}
@@ -7312,7 +7206,7 @@
 
 //----------Conditional Move---------------------------------------------------
 // Conditional move
-instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{
+instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
   predicate(!VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7329,7 +7223,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{
+instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
   predicate(!VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7346,7 +7240,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
+instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7356,7 +7250,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
+instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7366,7 +7260,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
+instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7376,7 +7270,7 @@
 %}
 
 // Conditional move
-instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
+instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -7387,7 +7281,7 @@
 %}
 
 // Conditional move
-instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
+instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -7397,7 +7291,7 @@
   ins_pipe( pipe_cmov_mem );
 %}
 
-instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
+instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -7651,7 +7545,7 @@
 //----------Arithmetic Instructions--------------------------------------------
 //----------Addition Instructions----------------------------------------------
 // Integer Addition Instructions
-instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AddI dst src));
   effect(KILL cr);
 
@@ -7662,7 +7556,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (AddI dst src));
   effect(KILL cr);
 
@@ -7672,7 +7566,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
   predicate(UseIncDec);
   match(Set dst (AddI dst src));
   effect(KILL cr);
@@ -7684,7 +7578,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
+instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
   match(Set dst (AddI src0 src1));
   ins_cost(110);
 
@@ -7704,7 +7598,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
+instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
   predicate(UseIncDec);
   match(Set dst (AddI dst src));
   effect(KILL cr);
@@ -7716,7 +7610,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
+instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AddP dst src));
   effect(KILL cr);
 
@@ -7738,7 +7632,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (AddI dst (LoadI src)));
   effect(KILL cr);
 
@@ -7749,7 +7643,7 @@
   ins_pipe( ialu_reg_mem );
 %}
 
-instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -7811,7 +7705,7 @@
   ins_pipe( empty );
 %}
 
-instruct castII( eRegI dst ) %{
+instruct castII( rRegI dst ) %{
   match(Set dst (CastII dst));
   format %{ "#castII of $dst" %}
   ins_encode( /*empty encoding*/ );
@@ -7831,50 +7725,6 @@
   ins_pipe( ialu_reg_mem );
 %}
 
-// LoadLong-locked - same as a volatile long load when used with compare-swap
-instruct loadLLocked(stackSlotL dst, memory mem) %{
-  predicate(UseSSE<=1);
-  match(Set dst (LoadLLocked mem));
-
-  ins_cost(200);
-  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
-            "FISTp  $dst" %}
-  ins_encode(enc_loadL_volatile(mem,dst));
-  ins_pipe( fpu_reg_mem );
-%}
-
-instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
-  predicate(UseSSE>=2);
-  match(Set dst (LoadLLocked mem));
-  effect(TEMP tmp);
-  ins_cost(180);
-  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
-            "MOVSD  $dst,$tmp" %}
-  ins_encode %{
-    __ movdbl($tmp$$XMMRegister, $mem$$Address);
-    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
-  predicate(UseSSE>=2);
-  match(Set dst (LoadLLocked mem));
-  effect(TEMP tmp);
-  ins_cost(160);
-  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
-            "MOVD   $dst.lo,$tmp\n\t"
-            "PSRLQ  $tmp,32\n\t"
-            "MOVD   $dst.hi,$tmp" %}
-  ins_encode %{
-    __ movdbl($tmp$$XMMRegister, $mem$$Address);
-    __ movdl($dst$$Register, $tmp$$XMMRegister);
-    __ psrlq($tmp$$XMMRegister, 32);
-    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Conditional-store of the updated heap-top.
 // Used during allocation of the shared heap.
 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
@@ -7889,7 +7739,7 @@
 
 // Conditional-store of an int value.
 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
-instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
+instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
   match(Set cr (StoreIConditional mem (Binary oldval newval)));
   effect(KILL oldval);
   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
@@ -7922,7 +7772,7 @@
 
 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 
-instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7935,7 +7785,7 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
-instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
+instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7947,7 +7797,7 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
-instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
+instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7961,7 +7811,7 @@
 
 //----------Subtraction Instructions-------------------------------------------
 // Integer Subtraction Instructions
-instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (SubI dst src));
   effect(KILL cr);
 
@@ -7972,7 +7822,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (SubI dst src));
   effect(KILL cr);
 
@@ -7983,7 +7833,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (SubI dst (LoadI src)));
   effect(KILL cr);
 
@@ -7994,7 +7844,7 @@
   ins_pipe( ialu_reg_mem );
 %}
 
-instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -8006,7 +7856,7 @@
 %}
 
 // Subtract from a pointer
-instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
+instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
   match(Set dst (AddP dst (SubI zero src)));
   effect(KILL cr);
 
@@ -8017,7 +7867,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
+instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
   match(Set dst (SubI zero dst));
   effect(KILL cr);
 
@@ -8032,7 +7882,7 @@
 //----------Multiplication/Division Instructions-------------------------------
 // Integer Multiplication Instructions
 // Multiply Register
-instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (MulI dst src));
   effect(KILL cr);
 
@@ -8045,7 +7895,7 @@
 %}
 
 // Multiply 32-bit Immediate
-instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
+instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
   match(Set dst (MulI src imm));
   effect(KILL cr);
 
@@ -8101,7 +7951,7 @@
 %}
 
 // Multiply Memory 32-bit Immediate
-instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
+instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
   match(Set dst (MulI (LoadI src) imm));
   effect(KILL cr);
 
@@ -8113,7 +7963,7 @@
 %}
 
 // Multiply Memory
-instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
+instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (MulI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8150,7 +8000,7 @@
 %}
 
 // Multiply Register Long
-instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
   ins_cost(4*100+3*400);
@@ -8168,7 +8018,7 @@
 %}
 
 // Multiply Register Long where the left operand's high 32 bits are zero
-instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   predicate(is_operand_hi32_zero(n->in(1)));
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
@@ -8189,7 +8039,7 @@
 %}
 
 // Multiply Register Long where the right operand's high 32 bits are zero
-instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   predicate(is_operand_hi32_zero(n->in(2)));
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
@@ -8225,7 +8075,7 @@
 %}
 
 // Multiply Register Long by small constant
-instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
   ins_cost(2*100+2*400);
@@ -8323,7 +8173,7 @@
 %}
 
 // Divide Register Long (no special case since divisor != -1)
-instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
+instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
   match(Set dst (DivL dst imm));
   effect( TEMP tmp, TEMP tmp2, KILL cr );
   ins_cost(1000);
@@ -8394,7 +8244,7 @@
 %}
 
 // Remainder Register Long (remainder fit into 32 bits)
-instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
+instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
   match(Set dst (ModL dst imm));
   effect( TEMP tmp, TEMP tmp2, KILL cr );
   ins_cost(1000);
@@ -8462,7 +8312,7 @@
 
 // Integer Shift Instructions
 // Shift Left by one
-instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   match(Set dst (LShiftI dst shift));
   effect(KILL cr);
 
@@ -8474,7 +8324,7 @@
 %}
 
 // Shift Left by 8-bit immediate
-instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
   match(Set dst (LShiftI dst shift));
   effect(KILL cr);
 
@@ -8486,7 +8336,7 @@
 %}
 
 // Shift Left by variable
-instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
   match(Set dst (LShiftI dst shift));
   effect(KILL cr);
 
@@ -8498,7 +8348,7 @@
 %}
 
 // Arithmetic shift right by one
-instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   match(Set dst (RShiftI dst shift));
   effect(KILL cr);
 
@@ -8520,7 +8370,7 @@
 %}
 
 // Arithmetic Shift Right by 8-bit immediate
-instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
   match(Set dst (RShiftI dst shift));
   effect(KILL cr);
 
@@ -8543,7 +8393,7 @@
 %}
 
 // Arithmetic Shift Right by variable
-instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
   match(Set dst (RShiftI dst shift));
   effect(KILL cr);
 
@@ -8555,7 +8405,7 @@
 %}
 
 // Logical shift right by one
-instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   match(Set dst (URShiftI dst shift));
   effect(KILL cr);
 
@@ -8567,7 +8417,7 @@
 %}
 
 // Logical Shift Right by 8-bit immediate
-instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
   match(Set dst (URShiftI dst shift));
   effect(KILL cr);
 
@@ -8581,7 +8431,7 @@
 
 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 // This idiom is used by the compiler for the i2b bytecode.
-instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
+instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 
   size(3);
@@ -8594,7 +8444,7 @@
 
 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 // This idiom is used by the compiler the i2s bytecode.
-instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
+instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 
   size(3);
@@ -8607,7 +8457,7 @@
 
 
 // Logical Shift Right by variable
-instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
   match(Set dst (URShiftI dst shift));
   effect(KILL cr);
 
@@ -8623,7 +8473,7 @@
 //----------Integer Logical Instructions---------------------------------------
 // And Instructions
 // And Register with Register
-instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AndI dst src));
   effect(KILL cr);
 
@@ -8635,7 +8485,7 @@
 %}
 
 // And Register with Immediate
-instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (AndI dst src));
   effect(KILL cr);
 
@@ -8647,7 +8497,7 @@
 %}
 
 // And Register with Memory
-instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (AndI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8659,7 +8509,7 @@
 %}
 
 // And Memory with Register
-instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -8685,7 +8535,7 @@
 
 // Or Instructions
 // Or Register with Register
-instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (OrI dst src));
   effect(KILL cr);
 
@@ -8696,7 +8546,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
+instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
   match(Set dst (OrI dst (CastP2X src)));
   effect(KILL cr);
 
@@ -8709,7 +8559,7 @@
 
 
 // Or Register with Immediate
-instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (OrI dst src));
   effect(KILL cr);
 
@@ -8721,7 +8571,7 @@
 %}
 
 // Or Register with Memory
-instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (OrI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8733,7 +8583,7 @@
 %}
 
 // Or Memory with Register
-instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -8759,7 +8609,7 @@
 
 // ROL/ROR
 // ROL expand
-instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   effect(USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROL    $dst, $shift" %}
@@ -8768,7 +8618,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
   effect(USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROL    $dst, $shift" %}
@@ -8788,7 +8638,7 @@
 // end of ROL expand
 
 // ROL 32bit by one once
-instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
+instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 
   expand %{
@@ -8797,7 +8647,7 @@
 %}
 
 // ROL 32bit var by imm8 once
-instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
+instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 
@@ -8825,7 +8675,7 @@
 %}
 
 // ROR expand
-instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   effect(USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROR    $dst, $shift" %}
@@ -8834,7 +8684,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
   effect (USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROR    $dst, $shift" %}
@@ -8854,7 +8704,7 @@
 // end of ROR expand
 
 // ROR right once
-instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
+instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 
   expand %{
@@ -8863,7 +8713,7 @@
 %}
 
 // ROR 32bit by immI8 once
-instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
+instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 
@@ -8892,7 +8742,7 @@
 
 // Xor Instructions
 // Xor Register with Register
-instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (XorI dst src));
   effect(KILL cr);
 
@@ -8904,7 +8754,7 @@
 %}
 
 // Xor Register with Immediate -1
-instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
+instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
   match(Set dst (XorI dst imm));  
 
   size(2);
@@ -8916,7 +8766,7 @@
 %}
 
 // Xor Register with Immediate
-instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (XorI dst src));
   effect(KILL cr);
 
@@ -8928,7 +8778,7 @@
 %}
 
 // Xor Register with Memory
-instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (XorI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8940,7 +8790,7 @@
 %}
 
 // Xor Memory with Register
-instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -8965,14 +8815,14 @@
 
 //----------Convert Int to Boolean---------------------------------------------
 
-instruct movI_nocopy(eRegI dst, eRegI src) %{
+instruct movI_nocopy(rRegI dst, rRegI src) %{
   effect( DEF dst, USE src );
   format %{ "MOV    $dst,$src" %}
   ins_encode( enc_Copy( dst, src) );
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
+instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
   effect( USE_DEF dst, USE src, KILL cr );
 
   size(4);
@@ -8983,7 +8833,7 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
-instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
+instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
   match(Set dst (Conv2B src));
 
   expand %{
@@ -8992,14 +8842,14 @@
   %}
 %}
 
-instruct movP_nocopy(eRegI dst, eRegP src) %{
+instruct movP_nocopy(rRegI dst, eRegP src) %{
   effect( DEF dst, USE src );
   format %{ "MOV    $dst,$src" %}
   ins_encode( enc_Copy( dst, src) );
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
+instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
   effect( USE_DEF dst, USE src, KILL cr );
   format %{ "NEG    $dst\n\t"
             "ADC    $dst,$src" %}
@@ -9008,7 +8858,7 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
-instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
+instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
   match(Set dst (Conv2B src));
 
   expand %{
@@ -9033,7 +8883,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
+instruct cmpLTMask0( rRegI dst, immI0 zero, eFlagsReg cr ) %{
   match(Set dst (CmpLTMask dst zero));
   effect( DEF dst, KILL cr );
   ins_cost(100);
@@ -9505,7 +9355,7 @@
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 zero));
   effect(KILL cr, KILL rax);
@@ -9519,7 +9369,7 @@
 %}
 
 // Compare into -1,0,1
-instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 src2));
   effect(KILL cr, KILL rax);
@@ -10096,161 +9946,67 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
   predicate (UseSSE<=1);
   match(Set Y (PowD X Y));  // Raise X to the Yth power
-  effect(KILL rax, KILL rbx, KILL rcx);
-  format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
-            "FLD_D  $X\n\t"
-            "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
-
-            "FDUP   \t\t\t# Q Q\n\t"
-            "FRNDINT\t\t\t# int(Q) Q\n\t"
-            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
-            "FISTP  dword [ESP]\n\t"
-            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
-            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
-            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
-            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
-            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
-            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
-            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
-            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
-            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
-            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
-            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
-            "MOV    [ESP+0],0\n\t"
-            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
-
-            "ADD    ESP,8"
-             %}
-  ins_encode( push_stack_temp_qword,
-              Push_Reg_DPR(X),
-              Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              pow_exp_core_encoding,
-              pop_stack_temp_qword);
-  ins_pipe( pipe_slow );
-%}
-
-instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
+  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
+  format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
+  ins_encode %{
+    __ subptr(rsp, 8);
+    __ fld_s($X$$reg - 1);
+    __ fast_pow();
+    __ addptr(rsp, 8);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
-  effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
-  format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
-            "MOVSD  [ESP],$src1\n\t"
-            "FLD    FPR1,$src1\n\t"
-            "MOVSD  [ESP],$src0\n\t"
-            "FLD    FPR1,$src0\n\t"
-            "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
-
-            "FDUP   \t\t\t# Q Q\n\t"
-            "FRNDINT\t\t\t# int(Q) Q\n\t"
-            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
-            "FISTP  dword [ESP]\n\t"
-            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
-            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
-            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
-            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
-            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
-            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
-            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
-            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
-            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
-            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
-            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
-            "MOV    [ESP+0],0\n\t"
-            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
-
-            "FST_D  [ESP]\n\t"
-            "MOVSD  $dst,[ESP]\n\t"
-            "ADD    ESP,8"
-             %}
-  ins_encode( push_stack_temp_qword,
-              push_xmm_to_fpr1(src1),
-              push_xmm_to_fpr1(src0),
-              Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              pow_exp_core_encoding,
-              Push_ResultD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
+  format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
+  ins_encode %{
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ fast_pow();
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
   predicate (UseSSE<=1);
   match(Set dpr1 (ExpD dpr1));
-  effect(KILL rax, KILL rbx, KILL rcx);
-  format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
-            "FLDL2E \t\t\t# Ld log2(e) X\n\t"
-            "FMULP  \t\t\t# Q=X*log2(e)\n\t"
-
-            "FDUP   \t\t\t# Q Q\n\t"
-            "FRNDINT\t\t\t# int(Q) Q\n\t"
-            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
-            "FISTP  dword [ESP]\n\t"
-            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
-            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
-            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
-            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
-            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
-            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
-            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
-            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
-            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
-            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
-            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
-            "MOV    [ESP+0],0\n\t"
-            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
-
-            "ADD    ESP,8"
-             %}
-  ins_encode( push_stack_temp_qword,
-              Opcode(0xD9), Opcode(0xEA),   // fldl2e
-              Opcode(0xDE), Opcode(0xC9),   // fmulp
-              pow_exp_core_encoding,
-              pop_stack_temp_qword);
-  ins_pipe( pipe_slow );
-%}
-
-instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+  effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
+  format %{ "fast_exp $dpr1 -> $dpr1  // KILL $rax, $rcx, $rdx" %}
+  ins_encode %{
+    __ fast_exp();
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (ExpD src));
-  effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
-  format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
-            "MOVSD  [ESP],$src\n\t"
-            "FLDL2E \t\t\t# Ld log2(e) X\n\t"
-            "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
-
-            "FDUP   \t\t\t# Q Q\n\t"
-            "FRNDINT\t\t\t# int(Q) Q\n\t"
-            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
-            "FISTP  dword [ESP]\n\t"
-            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
-            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
-            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
-            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
-            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
-            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
-            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
-            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
-            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
-            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
-            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
-            "MOV    [ESP+0],0\n\t"
-            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
-
-            "FST_D  [ESP]\n\t"
-            "MOVSD  $dst,[ESP]\n\t"
-            "ADD    ESP,8"
-             %}
-  ins_encode( Push_SrcD(src),
-              Opcode(0xD9), Opcode(0xEA),   // fldl2e
-              Opcode(0xDE), Opcode(0xC9),   // fmulp
-              pow_exp_core_encoding,
-              Push_ResultD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-
+  effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
+  format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
+  ins_encode %{
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ fast_exp();
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
+  %}
+  ins_pipe( pipe_slow );
+%}
 
 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
@@ -10391,7 +10147,7 @@
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 zero));
   effect(KILL cr, KILL rax);
@@ -10405,7 +10161,7 @@
 %}
 
 // Compare into -1,0,1
-instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 src2));
   effect(KILL cr, KILL rax);
@@ -11325,7 +11081,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct convI2D_reg(regD dst, eRegI src) %{
+instruct convI2D_reg(regD dst, rRegI src) %{
   predicate( UseSSE>=2 && !UseXmmI2D );
   match(Set dst (ConvI2D src));
   format %{ "CVTSI2SD $dst,$src" %}
@@ -11345,7 +11101,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct convXI2D_reg(regD dst, eRegI src)
+instruct convXI2D_reg(regD dst, rRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2D );
   match(Set dst (ConvI2D src));
@@ -11433,7 +11189,7 @@
 %}
 
 // Convert an int to a float in xmm; no rounding step needed.
-instruct convI2F_reg(regF dst, eRegI src) %{
+instruct convI2F_reg(regF dst, rRegI src) %{
   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
   match(Set dst (ConvI2F src));
   format %{ "CVTSI2SS $dst, $src" %}
@@ -11443,7 +11199,7 @@
   ins_pipe( pipe_slow );
 %}
 
- instruct convXI2F_reg(regF dst, eRegI src)
+ instruct convXI2F_reg(regF dst, rRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2F );
   match(Set dst (ConvI2F src));
@@ -11457,7 +11213,7 @@
   ins_pipe(pipe_slow); // XXX
 %}
 
-instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
+instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (ConvI2L src));
   effect(KILL cr);
   ins_cost(375);
@@ -11469,7 +11225,7 @@
 %}
 
 // Zero-extend convert int to long
-instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
+instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
   match(Set dst (AndL (ConvI2L src) mask) );
   effect( KILL flags );
   ins_cost(250);
@@ -11549,7 +11305,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct convL2I_reg( eRegI dst, eRegL src ) %{
+instruct convL2I_reg( rRegI dst, eRegL src ) %{
   match(Set dst (ConvL2I src));
   effect( DEF dst, USE src );
   format %{ "MOV    $dst,$src.lo" %}
@@ -11558,7 +11314,7 @@
 %}
 
 
-instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
+instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
   ins_cost(100);
@@ -11593,7 +11349,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
+instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
@@ -11605,7 +11361,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
+instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
 
@@ -11645,7 +11401,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
+instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
@@ -11779,186 +11535,6 @@
   ins_pipe( pipe_slow );
 %}
 
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate8B src));
-  format %{ "MOVDQA  $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    if ($dst$$reg != $src$$reg) {
-      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
-    }
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate8B src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate8B zero));
-  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4S src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4S src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4S zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4C src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4C src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4C zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2I src));
-  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2I src));
-  format %{ "MOVD   $dst,$src\n\t"
-            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2I zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regF(regD dst, regF src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immF0(regD dst, immF0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2F zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
 
 // =======================================================================
 // fast clearing of an array
@@ -12067,7 +11643,7 @@
 
 //----------Control Flow Instructions------------------------------------------
 // Signed compare Instructions
-instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
+instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
   match(Set cr (CmpI op1 op2));
   effect( DEF cr, USE op1, USE op2 );
   format %{ "CMP    $op1,$op2" %}
@@ -12076,7 +11652,7 @@
   ins_pipe( ialu_cr_reg_reg );
 %}
 
-instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
+instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
   match(Set cr (CmpI op1 op2));
   effect( DEF cr, USE op1 );
   format %{ "CMP    $op1,$op2" %}
@@ -12087,7 +11663,7 @@
 %}
 
 // Cisc-spilled version of cmpI_eReg
-instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
+instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
   match(Set cr (CmpI op1 (LoadI op2)));
 
   format %{ "CMP    $op1,$op2" %}
@@ -12097,7 +11673,7 @@
   ins_pipe( ialu_cr_reg_mem );
 %}
 
-instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
+instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
   match(Set cr (CmpI src zero));
   effect( DEF cr, USE src );
 
@@ -12107,7 +11683,7 @@
   ins_pipe( ialu_cr_reg_imm );
 %}
 
-instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
+instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
   match(Set cr (CmpI (AndI src con) zero));
 
   format %{ "TEST   $src,$con" %}
@@ -12116,7 +11692,7 @@
   ins_pipe( ialu_cr_reg_imm );
 %}
 
-instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
+instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
   match(Set cr (CmpI (AndI src mem) zero));
 
   format %{ "TEST   $src,$mem" %}
@@ -12127,7 +11703,7 @@
 
 // Unsigned compare Instructions; really, same as signed except they
 // produce an eFlagsRegU instead of eFlagsReg.
-instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
+instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
   match(Set cr (CmpU op1 op2));
 
   format %{ "CMPu   $op1,$op2" %}
@@ -12136,7 +11712,7 @@
   ins_pipe( ialu_cr_reg_reg );
 %}
 
-instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
+instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
   match(Set cr (CmpU op1 op2));
 
   format %{ "CMPu   $op1,$op2" %}
@@ -12146,7 +11722,7 @@
 %}
 
 // // Cisc-spilled version of cmpU_eReg
-instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
+instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
   match(Set cr (CmpU op1 (LoadI op2)));
 
   format %{ "CMPu   $op1,$op2" %}
@@ -12157,7 +11733,7 @@
 %}
 
 // // Cisc-spilled version of cmpU_eReg
-//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
+//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
 //  match(Set cr (CmpU (LoadI op1) op2));
 //
 //  format %{ "CMPu   $op1,$op2" %}
@@ -12166,7 +11742,7 @@
 //  ins_encode( OpcP, RegMem( op1, op2) );
 //%}
 
-instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
+instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
   match(Set cr (CmpU src zero));
 
   format %{ "TESTu  $src,$src" %}
@@ -12262,7 +11838,7 @@
 //   *** Min and Max using the conditional move are slower than the
 //   *** branch version on a Pentium III.
 // // Conditional move for min
-//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
 //  effect( USE_DEF op2, USE op1, USE cr );
 //  format %{ "CMOVlt $op2,$op1\t! min" %}
 //  opcode(0x4C,0x0F);
@@ -12271,7 +11847,7 @@
 //%}
 //
 //// Min Register with Register (P6 version)
-//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
 //  predicate(VM_Version::supports_cmov() );
 //  match(Set op2 (MinI op1 op2));
 //  ins_cost(200);
@@ -12283,7 +11859,7 @@
 //%}
 
 // Min Register with Register (generic version)
-instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
   match(Set dst (MinI dst src));
   effect(KILL flags);
   ins_cost(300);
@@ -12298,7 +11874,7 @@
 //   *** Min and Max using the conditional move are slower than the
 //   *** branch version on a Pentium III.
 // // Conditional move for max
-//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
 //  effect( USE_DEF op2, USE op1, USE cr );
 //  format %{ "CMOVgt $op2,$op1\t! max" %}
 //  opcode(0x4F,0x0F);
@@ -12307,7 +11883,7 @@
 //%}
 //
 // // Max Register with Register (P6 version)
-//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
 //  predicate(VM_Version::supports_cmov() );
 //  match(Set op2 (MaxI op1 op2));
 //  ins_cost(200);
@@ -12319,7 +11895,7 @@
 //%}
 
 // Max Register with Register (generic version)
-instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
   match(Set dst (MaxI dst src));
   effect(KILL flags);
   ins_cost(300);
@@ -12380,7 +11956,7 @@
 // ============================================================================
 // Branch Instructions
 // Jump Table
-instruct jumpXtnd(eRegI switch_val) %{
+instruct jumpXtnd(rRegI switch_val) %{
   match(Jump switch_val);
   ins_cost(350);
   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
@@ -12798,7 +12374,7 @@
 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
 // compares.  Can be used for LE or GT compares by reversing arguments.
 // NOT GOOD FOR EQ/NE tests.
-instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
+instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
   match( Set flags (CmpL src1 src2 ));
   effect( TEMP tmp );
   ins_cost(300);
@@ -12844,7 +12420,7 @@
 %}
 
 // Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
@@ -12854,7 +12430,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
+instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -12915,7 +12491,7 @@
 
 //======
 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
-instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
+instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
   match( Set flags (CmpL src zero ));
   effect(TEMP tmp);
   ins_cost(200);
@@ -12972,7 +12548,7 @@
 %}
 
 // Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
@@ -12982,7 +12558,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
+instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -13044,7 +12620,7 @@
 //======
 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
 // Same as cmpL_reg_flags_LEGT except must negate src
-instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
+instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
   match( Set flags (CmpL src zero ));
   effect( TEMP tmp );
   ins_cost(300);
@@ -13058,7 +12634,7 @@
 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
 // requires a commuted test to get the same result.
-instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
+instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
   match( Set flags (CmpL src1 src2 ));
   effect( TEMP tmp );
   ins_cost(300);
@@ -13105,7 +12681,7 @@
 %}
 
 // Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
@@ -13115,7 +12691,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
+instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -13444,11 +13020,11 @@
 // ---------EXAMPLE----------------------------------------------------------
 //
 // // pertinent parts of existing instructions in architecture description
-// instruct movI(eRegI dst, eRegI src) %{
+// instruct movI(rRegI dst, rRegI src) %{
 //   match(Set dst (CopyI src));
 // %}
 //
-// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
 //   match(Set dst (AddI dst src));
 //   effect(KILL cr);
 // %}
@@ -13493,11 +13069,11 @@
 // %}
 
 // // Change load of spilled value to only a spill
-// instruct storeI(memory mem, eRegI src) %{
+// instruct storeI(memory mem, rRegI src) %{
 //   match(Set mem (StoreI mem src));
 // %}
 //
-// instruct loadI(eRegI dst, memory mem) %{
+// instruct loadI(rRegI dst, memory mem) %{
 //   match(Set dst (LoadI mem));
 // %}
 //
--- a/src/cpu/x86/vm/x86_64.ad	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Sat Oct 20 00:08:35 2012 -0700
@@ -131,102 +131,6 @@
 
 // Floating Point Registers
 
-// XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
-// Word a in each register holds a Float, words ab hold a Double.  We
-// currently do not use the SIMD capabilities, so registers cd are
-// unused at the moment.
-// XMM8-XMM15 must be encoded with REX.
-// Linux ABI:   No register preserved across function calls
-//              XMM0-XMM7 might hold parameters
-// Windows ABI: XMM6-XMM15 preserved across function calls
-//              XMM0-XMM3 might hold parameters
-
-reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
-reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
-
-reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
-reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
-
-reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
-reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
-
-reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
-reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
-
-reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
-reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
-
-reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
-reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
-
-#ifdef _WIN64
-
-reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
-reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
-
-reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
-reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
-
-reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
-reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
-
-reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
-reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
-
-reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
-
-reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
-
-reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
-
-reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
-
-reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
-
-reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
-
-#else
-
-reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
-reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
-
-reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
-reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
-
-reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
-reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
-
-reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
-reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
-
-reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
-
-reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
-
-reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
-
-reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
-
-reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
-
-reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
-
-#endif // _WIN64
-
-reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
-
 // Specify priority of register selection within phases of register
 // allocation.  Highest priority is first.  A useful heuristic is to
 // give registers a low priority when they are required by machine
@@ -252,26 +156,6 @@
                    R15,         R15_H,
                    RSP,         RSP_H);
 
-// XXX probably use 8-15 first on Linux
-alloc_class chunk1(XMM0,  XMM0_H,
-                   XMM1,  XMM1_H,
-                   XMM2,  XMM2_H,
-                   XMM3,  XMM3_H,
-                   XMM4,  XMM4_H,
-                   XMM5,  XMM5_H,
-                   XMM6,  XMM6_H,
-                   XMM7,  XMM7_H,
-                   XMM8,  XMM8_H,
-                   XMM9,  XMM9_H,
-                   XMM10, XMM10_H,
-                   XMM11, XMM11_H,
-                   XMM12, XMM12_H,
-                   XMM13, XMM13_H,
-                   XMM14, XMM14_H,
-                   XMM15, XMM15_H);
-
-alloc_class chunk2(RFLAGS);
-
 
 //----------Architecture Description Register Classes--------------------------
 // Several register classes are automatically defined based upon information in
@@ -501,46 +385,7 @@
 // Singleton class for instruction pointer
 // reg_class ip_reg(RIP);
 
-// Singleton class for condition codes
-reg_class int_flags(RFLAGS);
-
-// Class for all float registers
-reg_class float_reg(XMM0,
-                    XMM1,
-                    XMM2,
-                    XMM3,
-                    XMM4,
-                    XMM5,
-                    XMM6,
-                    XMM7,
-                    XMM8,
-                    XMM9,
-                    XMM10,
-                    XMM11,
-                    XMM12,
-                    XMM13,
-                    XMM14,
-                    XMM15);
-
-// Class for all double registers
-reg_class double_reg(XMM0,  XMM0_H,
-                     XMM1,  XMM1_H,
-                     XMM2,  XMM2_H,
-                     XMM3,  XMM3_H,
-                     XMM4,  XMM4_H,
-                     XMM5,  XMM5_H,
-                     XMM6,  XMM6_H,
-                     XMM7,  XMM7_H,
-                     XMM8,  XMM8_H,
-                     XMM9,  XMM9_H,
-                     XMM10, XMM10_H,
-                     XMM11, XMM11_H,
-                     XMM12, XMM12_H,
-                     XMM13, XMM13_H,
-                     XMM14, XMM14_H,
-                     XMM15, XMM15_H);
-%}
-
+%}
 
 //----------SOURCE BLOCK-------------------------------------------------------
 // This is a block of C++ code which provides values, functions, and
@@ -1027,12 +872,84 @@
   return rc_float;
 }
 
+// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                          int src_hi, int dst_hi, uint ireg, outputStream* st);
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st);
+
+static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
+                                      int dst_offset, uint ireg, outputStream* st) {
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    switch (ireg) {
+    case Op_VecS:
+      __ movq(Address(rsp, -8), rax);
+      __ movl(rax, Address(rsp, src_offset));
+      __ movl(Address(rsp, dst_offset), rax);
+      __ movq(rax, Address(rsp, -8));
+      break;
+    case Op_VecD:
+      __ pushq(Address(rsp, src_offset));
+      __ popq (Address(rsp, dst_offset));
+      break;
+    case Op_VecX:
+      __ pushq(Address(rsp, src_offset));
+      __ popq (Address(rsp, dst_offset));
+      __ pushq(Address(rsp, src_offset+8));
+      __ popq (Address(rsp, dst_offset+8));
+      break;
+    case Op_VecY:
+      __ vmovdqu(Address(rsp, -32), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, src_offset));
+      __ vmovdqu(Address(rsp, dst_offset), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, -32));
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#ifndef PRODUCT
+  } else {
+    switch (ireg) {
+    case Op_VecS:
+      st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
+                "movl    rax, [rsp + #%d]\n\t"
+                "movl    [rsp + #%d], rax\n\t"
+                "movq    rax, [rsp - #8]",
+                src_offset, dst_offset);
+      break;
+    case Op_VecD:
+      st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
+                "popq    [rsp + #%d]",
+                src_offset, dst_offset);
+      break;
+     case Op_VecX:
+      st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
+                "popq    [rsp + #%d]\n\t"
+                "pushq   [rsp + #%d]\n\t"
+                "popq    [rsp + #%d]",
+                src_offset, dst_offset, src_offset+8, dst_offset+8);
+      break;
+    case Op_VecY:
+      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
+                "vmovdqu xmm0, [rsp + #%d]\n\t"
+                "vmovdqu [rsp + #%d], xmm0\n\t"
+                "vmovdqu xmm0, [rsp - #32]",
+                src_offset, dst_offset);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#endif
+  }
+}
+
 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
                                        PhaseRegAlloc* ra_,
                                        bool do_size,
-                                       outputStream* st) const
-{
-
+                                       outputStream* st) const {
+  assert(cbuf != NULL || st  != NULL, "sanity");
   // Get registers to move
   OptoReg::Name src_second = ra_->get_reg_second(in(1));
   OptoReg::Name src_first = ra_->get_reg_first(in(1));
@@ -1050,7 +967,30 @@
   if (src_first == dst_first && src_second == dst_second) {
     // Self copy, no move
     return 0;
-  } else if (src_first_rc == rc_stack) {
+  }
+  if (bottom_type()->isa_vect() != NULL) {
+    uint ireg = ideal_reg();
+    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
+    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
+    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
+      // mem -> mem
+      int src_offset = ra_->reg2offset(src_first);
+      int dst_offset = ra_->reg2offset(dst_first);
+      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
+    } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
+      vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
+    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
+      int stack_offset = ra_->reg2offset(dst_first);
+      vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
+    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
+      int stack_offset = ra_->reg2offset(src_first);
+      vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
+    } else {
+      ShouldNotReachHere();
+    }
+    return 0;
+  }
+  if (src_first_rc == rc_stack) {
     // mem ->
     if (dst_first_rc == rc_stack) {
       // mem -> mem
@@ -1061,23 +1001,16 @@
         int src_offset = ra_->reg2offset(src_first);
         int dst_offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xFF);
-          encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
-
-          emit_opcode(*cbuf, 0x8F);
-          encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
-
+          MacroAssembler _masm(cbuf);
+          __ pushq(Address(rsp, src_offset));
+          __ popq (Address(rsp, dst_offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
-                     "popq    [rsp + #%d]",
-                     src_offset,
-                     dst_offset);
+                    "popq    [rsp + #%d]",
+                     src_offset, dst_offset);
 #endif
         }
-        return
-          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
-          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1086,46 +1019,22 @@
         int src_offset = ra_->reg2offset(src_first);
         int dst_offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, Assembler::REX_W);
-          emit_opcode(*cbuf, 0x89);
-          emit_opcode(*cbuf, 0x44);
-          emit_opcode(*cbuf, 0x24);
-          emit_opcode(*cbuf, 0xF8);
-
-          emit_opcode(*cbuf, 0x8B);
-          encode_RegMem(*cbuf,
-                        RAX_enc,
-                        RSP_enc, 0x4, 0, src_offset,
-                        false);
-
-          emit_opcode(*cbuf, 0x89);
-          encode_RegMem(*cbuf,
-                        RAX_enc,
-                        RSP_enc, 0x4, 0, dst_offset,
-                        false);
-
-          emit_opcode(*cbuf, Assembler::REX_W);
-          emit_opcode(*cbuf, 0x8B);
-          emit_opcode(*cbuf, 0x44);
-          emit_opcode(*cbuf, 0x24);
-          emit_opcode(*cbuf, 0xF8);
-
+          MacroAssembler _masm(cbuf);
+          __ movq(Address(rsp, -8), rax);
+          __ movl(rax, Address(rsp, src_offset));
+          __ movl(Address(rsp, dst_offset), rax);
+          __ movq(rax, Address(rsp, -8));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
-                     "movl    rax, [rsp + #%d]\n\t"
-                     "movl    [rsp + #%d], rax\n\t"
-                     "movq    rax, [rsp - #8]",
-                     src_offset,
-                     dst_offset);
+                    "movl    rax, [rsp + #%d]\n\t"
+                    "movl    [rsp + #%d], rax\n\t"
+                    "movq    rax, [rsp - #8]",
+                     src_offset, dst_offset);
 #endif
         }
-        return
-          5 + // movq
-          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
-          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
-          5; // movq
       }
+      return 0;
     } else if (dst_first_rc == rc_int) {
       // mem -> gpr
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1133,52 +1042,32 @@
         // 64-bit
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] < 8) {
-            emit_opcode(*cbuf, Assembler::REX_W);
-          } else {
-            emit_opcode(*cbuf, Assembler::REX_WR);
-          }
-          emit_opcode(*cbuf, 0x8B);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    %s, [rsp + #%d]\t# spill",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x8B);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movl    %s, [rsp + #%d]\t# spill",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 3
-           : 4); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_float) {
       // mem-> xmm
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1189,18 +1078,13 @@
           MacroAssembler _masm(cbuf);
           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("%s  %s, [rsp + #%d]\t# spill",
                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] >= 8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1210,18 +1094,14 @@
           MacroAssembler _masm(cbuf);
           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movss   %s, [rsp + #%d]\t# spill",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] >= 8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       }
+      return 0;
     }
   } else if (src_first_rc == rc_int) {
     // gpr ->
@@ -1232,113 +1112,65 @@
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          if (Matcher::_regEncode[src_first] < 8) {
-            emit_opcode(*cbuf, Assembler::REX_W);
-          } else {
-            emit_opcode(*cbuf, Assembler::REX_WR);
-          }
-          emit_opcode(*cbuf, 0x89);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          if (Matcher::_regEncode[src_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x89);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movl    [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 3
-           : 4); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_int) {
       // gpr -> gpr
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WB);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WR);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x8B);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movq(as_Register(Matcher::_regEncode[dst_first]),
+                  as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return 3; // REX
+        return 0;
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x8B);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movl(as_Register(Matcher::_regEncode[dst_first]),
+                  as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movl    %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 2
-          : 3; // REX
+        return 0;
       }
     } else if (dst_first_rc == rc_float) {
       // gpr -> xmm
@@ -1349,13 +1181,12 @@
           MacroAssembler _masm(cbuf);
           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdq   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return 5; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1364,17 +1195,14 @@
           MacroAssembler _masm(cbuf);
           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdl   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-          ? 5
-          : (4 + ((UseAVX>0)?1:0)); // REX
       }
+      return 0;
     }
   } else if (src_first_rc == rc_float) {
     // xmm ->
@@ -1388,17 +1216,12 @@
           MacroAssembler _masm(cbuf);
           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movsd   [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] >= 8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1408,18 +1231,14 @@
           MacroAssembler _masm(cbuf);
           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movss   [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] >=8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_int) {
       // xmm -> gpr
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1429,13 +1248,12 @@
           MacroAssembler _masm(cbuf);
           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdq   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return 5; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1444,17 +1262,14 @@
           MacroAssembler _masm(cbuf);
           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdl   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-          ? 5
-          : (4 + ((UseAVX>0)?1:0)); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_float) {
       // xmm -> xmm
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1464,17 +1279,13 @@
           MacroAssembler _masm(cbuf);
           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("%s  %s, %s\t# spill",
                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-          ? 5
-          : (4 + ((UseAVX>0)?1:0)); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1483,42 +1294,35 @@
           MacroAssembler _masm(cbuf);
           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("%s  %s, %s\t# spill",
                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return ((UseAVX>0) ? 5:
-          ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-           ? (UseXmmRegToRegMoveAll ? 4 : 5)
-           : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
       }
+      return 0;
     }
   }
 
   assert(0," foo ");
   Unimplemented();
-
   return 0;
 }
 
 #ifndef PRODUCT
-void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
-{
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
   implementation(NULL, ra_, false, st);
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
-{
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   implementation(&cbuf, ra_, false, NULL);
 }
 
-uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
-{
-  return implementation(NULL, ra_, true, NULL);
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
 }
 
 //=============================================================================
@@ -1709,14 +1513,6 @@
   return offset;
 }
 
-
-const bool Matcher::match_rule_supported(int opcode) {
-  if (!has_match_rule(opcode))
-    return false;
-
-  return true;  // Per default match rules are supported.
-}
-
 int Matcher::regnum_to_fpu_offset(int regnum)
 {
   return regnum - 32; // The FP registers are in the second chunk
@@ -1727,16 +1523,6 @@
   return true;
 }
 
-// Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
-  return 8;
-}
-
-// Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
-  return Op_RegD;
-}
-
 // Is this branch offset short enough that a short branch can be used?
 //
 // NOTE: If the platform does not provide any short branch variants, then
@@ -1823,21 +1609,21 @@
 bool Matcher::can_be_java_arg(int reg)
 {
   return
-    reg ==  RDI_num || reg ==  RDI_H_num ||
-    reg ==  RSI_num || reg ==  RSI_H_num ||
-    reg ==  RDX_num || reg ==  RDX_H_num ||
-    reg ==  RCX_num || reg ==  RCX_H_num ||
-    reg ==   R8_num || reg ==   R8_H_num ||
-    reg ==   R9_num || reg ==   R9_H_num ||
-    reg ==  R12_num || reg ==  R12_H_num ||
-    reg == XMM0_num || reg == XMM0_H_num ||
-    reg == XMM1_num || reg == XMM1_H_num ||
-    reg == XMM2_num || reg == XMM2_H_num ||
-    reg == XMM3_num || reg == XMM3_H_num ||
-    reg == XMM4_num || reg == XMM4_H_num ||
-    reg == XMM5_num || reg == XMM5_H_num ||
-    reg == XMM6_num || reg == XMM6_H_num ||
-    reg == XMM7_num || reg == XMM7_H_num;
+    reg ==  RDI_num || reg == RDI_H_num ||
+    reg ==  RSI_num || reg == RSI_H_num ||
+    reg ==  RDX_num || reg == RDX_H_num ||
+    reg ==  RCX_num || reg == RCX_H_num ||
+    reg ==   R8_num || reg ==  R8_H_num ||
+    reg ==   R9_num || reg ==  R9_H_num ||
+    reg ==  R12_num || reg == R12_H_num ||
+    reg == XMM0_num || reg == XMM0b_num ||
+    reg == XMM1_num || reg == XMM1b_num ||
+    reg == XMM2_num || reg == XMM2b_num ||
+    reg == XMM3_num || reg == XMM3b_num ||
+    reg == XMM4_num || reg == XMM4b_num ||
+    reg == XMM5_num || reg == XMM5b_num ||
+    reg == XMM6_num || reg == XMM6b_num ||
+    reg == XMM7_num || reg == XMM7b_num;
 }
 
 bool Matcher::is_spillable_arg(int reg)
@@ -3212,10 +2998,11 @@
       OptoReg::Bad, // Op_RegI
       RAX_H_num,    // Op_RegP
       OptoReg::Bad, // Op_RegF
-      XMM0_H_num,   // Op_RegD
+      XMM0b_num,    // Op_RegD
       RAX_H_num     // Op_RegL
     };
-    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
+    // Excluded flags and vector registers.
+    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
   %}
 %}
@@ -3977,7 +3764,6 @@
   interface(REG_INTER);
 %}
 
-
 //----------Memory Operands----------------------------------------------------
 // Direct Memory Operand
 // operand direct(immP addr)
@@ -5408,61 +5194,6 @@
   ins_pipe(pipe_slow); // XXX
 %}
 
-// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regD dst, memory mem) %{
-  match(Set dst (Load8B mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Short to XMM register
-instruct loadA4S(regD dst, memory mem) %{
-  match(Set dst (Load4S mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Char to XMM register
-instruct loadA4C(regD dst, memory mem) %{
-  match(Set dst (Load4C mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Integer to XMM register
-instruct load2IU(regD dst, memory mem) %{
-  match(Set dst (Load2I mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Single to XMM
-instruct loadA2F(regD dst, memory mem) %{
-  match(Set dst (Load2F mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Load Effective Address
 instruct leaP8(rRegP dst, indOffset8 mem)
 %{
@@ -6192,39 +5923,6 @@
   ins_pipe(ialu_mem_imm);
 %}
 
-// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regD src) %{
-  match(Set mem (Store8B mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regD src) %{
-  match(Set mem (Store4C mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regD src) %{
-  match(Set mem (Store2I mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store CMS card-mark Immediate
 instruct storeImmCM0_reg(memory mem, immI0 zero)
 %{
@@ -6250,17 +5948,6 @@
   ins_pipe(ialu_mem_imm);
 %}
 
-// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regD src) %{
-  match(Set mem (Store2F mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store Float
 instruct storeF(memory mem, regF src)
 %{
@@ -6724,6 +6411,31 @@
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
+// Convert oop into int for vectors alignment masking
+instruct convP2I(rRegI dst, rRegP src)
+%{
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  format %{ "movl    $dst, $src\t# ptr -> int" %}
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg_reg); // XXX
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(rRegI dst, rRegN src)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+  format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg_reg); // XXX
+%}
 
 // Convert oop pointer into compressed form
 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
@@ -7489,18 +7201,6 @@
   ins_pipe(ialu_reg_mem); // XXX
 %}
 
-// LoadL-locked - same as a regular LoadL when used with compare-swap
-instruct loadLLocked(rRegL dst, memory mem)
-%{
-  match(Set dst (LoadLLocked mem));
-
-  ins_cost(125); // XXX
-  format %{ "movq    $dst, $mem\t# long locked" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
-  ins_pipe(ialu_reg_mem); // XXX
-%}
-
 // Conditional-store of the updated heap-top.
 // Used during allocation of the shared heap.
 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
@@ -9820,7 +9520,39 @@
   ins_pipe( pipe_slow );
 %}
 
-
+instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
+  match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
+  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
+  format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
+  ins_encode %{
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ fast_pow();
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
+  match(Set dst (ExpD src));
+  effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
+  format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
+  ins_encode %{
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ fast_exp();
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
+  %}
+  ins_pipe( pipe_slow );
+%}
 
 //----------Arithmetic Conversion Instructions---------------------------------
 
@@ -10326,11 +10058,10 @@
   ins_pipe( pipe_slow );
 %}
 
-// The next instructions have long latency and use Int unit. Set high cost.
 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
   match(Set dst (MoveI2F src));
   effect(DEF dst, USE src);
-  ins_cost(300);
+  ins_cost(100);
   format %{ "movd    $dst,$src\t# MoveI2F" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
@@ -10341,7 +10072,7 @@
 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
-  ins_cost(300);
+  ins_cost(100);
   format %{ "movd    $dst,$src\t# MoveL2D" %}
   ins_encode %{
      __ movdq($dst$$XMMRegister, $src$$Register);
@@ -10349,172 +10080,6 @@
   ins_pipe( pipe_slow );
 %}
 
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regD dst, regD src) %{
-  match(Set dst (Replicate8B src));
-  format %{ "MOVDQA  $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    if ($dst$$reg != $src$$reg) {
-      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
-    }
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate8B src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate8B zero));
-  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regD dst, regD src) %{
-  match(Set dst (Replicate4S src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate4S src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate4S zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regD dst, regD src) %{
-  match(Set dst (Replicate4C src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate4C src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate4C zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regD dst, regD src) %{
-  match(Set dst (Replicate2I src));
-  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate2I src));
-  format %{ "MOVD   $dst,$src\n\t"
-            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate2I zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regD dst, regD src) %{
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regF(regD dst, regF src) %{
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immF0(regD dst, immF0 zero) %{
-  match(Set dst (Replicate2F zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 
 // =======================================================================
 // fast clearing of an array
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -646,16 +646,15 @@
   oop method_type = (oop) p;
 
   // The MethodHandle is in the slot after the arguments
-  oop form = java_lang_invoke_MethodType::form(method_type);
-  int num_vmslots = java_lang_invoke_MethodTypeForm::vmslots(form);
-  assert(argument_slots == num_vmslots + 1, "should be");
+  int num_vmslots = argument_slots - 1;
   oop method_handle = VMSLOTS_OBJECT(num_vmslots);
 
   // InvokeGeneric requires some extra shuffling
   oop mhtype = java_lang_invoke_MethodHandle::type(method_handle);
   bool is_exact = mhtype == method_type;
   if (!is_exact) {
-    if (method->intrinsic_id() == vmIntrinsics::_invokeExact) {
+    if (true || // FIXME
+        method->intrinsic_id() == vmIntrinsics::_invokeExact) {
       CALL_VM_NOCHECK_NOFIX(
         SharedRuntime::throw_WrongMethodTypeException(
           thread, method_type, mhtype));
@@ -670,8 +669,8 @@
     // NB the x86 code for this (in methodHandles_x86.cpp, search for
     // "genericInvoker") is really really odd.  I'm hoping it's trying
     // to accomodate odd VM/class library combinations I can ignore.
-    oop adapter = java_lang_invoke_MethodTypeForm::genericInvoker(form);
-    if (adapter == NULL) {
+    oop adapter = NULL; //FIXME: load the adapter from the CP cache
+    IF (adapter == NULL) {
       CALL_VM_NOCHECK_NOFIX(
         SharedRuntime::throw_WrongMethodTypeException(
           thread, method_type, mhtype));
@@ -761,7 +760,7 @@
           return;
       }
       if (entry_kind != MethodHandles::_invokespecial_mh) {
-        int index = java_lang_invoke_DirectMethodHandle::vmindex(method_handle);
+        intptr_t index = java_lang_invoke_DirectMethodHandle::vmindex(method_handle);
         instanceKlass* rcvrKlass =
           (instanceKlass *) receiver->klass()->klass_part();
         if (entry_kind == MethodHandles::_invokevirtual_mh) {
@@ -1179,8 +1178,7 @@
 intptr_t* CppInterpreter::calculate_unwind_sp(ZeroStack* stack,
                                               oop method_handle) {
   oop method_type = java_lang_invoke_MethodHandle::type(method_handle);
-  oop form = java_lang_invoke_MethodType::form(method_type);
-  int argument_slots = java_lang_invoke_MethodTypeForm::vmslots(form);
+  int argument_slots = java_lang_invoke_MethodType::ptype_slot_count(method_type);
 
   return stack->sp() + argument_slots;
 }
--- a/src/cpu/zero/vm/globals_zero.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/zero/vm/globals_zero.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -60,4 +60,7 @@
 
 // GC Ergo Flags
 define_pd_global(intx, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)
+
 #endif // CPU_ZERO_VM_GLOBALS_ZERO_HPP
--- a/src/cpu/zero/vm/interpreterGenerator_zero.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/zero/vm/interpreterGenerator_zero.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -38,6 +38,5 @@
   address generate_empty_entry();
   address generate_accessor_entry();
   address generate_Reference_get_entry();
-  address generate_method_handle_entry();
 
 #endif // CPU_ZERO_VM_INTERPRETERGENERATOR_ZERO_HPP
--- a/src/cpu/zero/vm/interpreter_zero.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/cpu/zero/vm/interpreter_zero.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -70,14 +70,6 @@
   return generate_entry((address) ShouldNotCallThisEntry());
 }
 
-address InterpreterGenerator::generate_method_handle_entry() {
-#ifdef CC_INTERP
-  return generate_entry((address) CppInterpreter::method_handle_entry);
-#else
-  return generate_entry((address) ShouldNotCallThisEntry());
-#endif // CC_INTERP
-}
-
 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
   return true;
 }
--- a/src/os/bsd/vm/decoder_machO.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/bsd/vm/decoder_machO.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,6 +26,139 @@
 
 #ifdef __APPLE__
 #include "decoder_machO.hpp"
+
+#include <cxxabi.h>
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+
+
+bool MachODecoder::demangle(const char* symbol, char *buf, int buflen) {
+  int   status;
+  char* result;
+  size_t size = (size_t)buflen;
+  // Don't pass buf to __cxa_demangle. In case of the 'buf' is too small,
+  // __cxa_demangle will call system "realloc" for additional memory, which
+  // may use different malloc/realloc mechanism that allocates 'buf'.
+  if ((result = abi::__cxa_demangle(symbol, NULL, NULL, &status)) != NULL) {
+    jio_snprintf(buf, buflen, "%s", result);
+      // call c library's free
+      ::free(result);
+      return true;
+  }
+  return false;
+}
+
+bool MachODecoder::decode(address addr, char *buf,
+      int buflen, int *offset, const void *mach_base) {
+  struct symtab_command * symt = (struct symtab_command *)
+    mach_find_command((struct mach_header_64 *)mach_base, LC_SYMTAB);
+  if (symt == NULL) {
+    DEBUG_ONLY(tty->print_cr("no symtab in mach file at 0x%lx", mach_base));
+    return false;
+  }
+  uint32_t off = symt->symoff;          /* symbol table offset (within this mach file) */
+  uint32_t nsyms = symt->nsyms;         /* number of symbol table entries */
+  uint32_t stroff = symt->stroff;       /* string table offset */
+  uint32_t strsize = symt->strsize;     /* string table size in bytes */
+
+  // iterate through symbol table trying to match our offset
+
+  uint32_t addr_relative = (uintptr_t) mach_base - (uintptr_t) addr; // offset we seek in the symtab
+  void * symtab_addr = (void*) ((uintptr_t) mach_base + off);
+  struct nlist_64 *cur_nlist = (struct nlist_64 *) symtab_addr;
+  struct nlist_64 *last_nlist = cur_nlist;  // no size stored in an entry, so keep previously seen nlist
+
+  int32_t found_strx = 0;
+  int32_t found_symval = 0;
+
+  for (uint32_t i=0; i < nsyms; i++) {
+    uint32_t this_value = cur_nlist->n_value;
+
+    if (addr_relative == this_value) {
+      found_strx =  cur_nlist->n_un.n_strx;
+      found_symval = this_value;
+      break;
+    } else if (addr_relative > this_value) {
+      // gone past it, use previously seen nlist:
+      found_strx = last_nlist->n_un.n_strx;
+      found_symval = last_nlist->n_value;
+      break;
+    }
+    last_nlist = cur_nlist;
+    cur_nlist = cur_nlist + sizeof(struct nlist_64);
+  }
+  if (found_strx == 0) {
+    return false;
+  }
+  // write the offset:
+  *offset = addr_relative - found_symval;
+
+  // lookup found_strx in the string table
+  char * symname = mach_find_in_stringtable((char*) ((uintptr_t)mach_base + stroff), strsize, found_strx);
+  if (symname) {
+      strncpy(buf, symname, buflen);
+      return true;
+  }
+  DEBUG_ONLY(tty->print_cr("no string or null string found."));
+  return false;
+}
+
+void* MachODecoder::mach_find_command(struct mach_header_64 * mach_base, uint32_t command_wanted) {
+  // possibly verify it is a mach_header, use magic number.
+  // commands begin immediately after the header.
+  struct load_command *pos = (struct load_command *) mach_base + sizeof(struct mach_header_64);
+  for (uint32_t i = 0; i < mach_base->ncmds; i++) {
+    struct load_command *this_cmd = (struct load_command *) pos;
+    if (this_cmd->cmd == command_wanted) {
+       return pos;
+    }
+    int cmdsize = this_cmd->cmdsize;
+    pos += cmdsize;
+  }
+  return NULL;
+}
+
+char* MachODecoder::mach_find_in_stringtable(char *strtab, uint32_t tablesize, int strx_wanted) {
+
+  if (strx_wanted == 0) {
+    return NULL;
+  }
+  char *strtab_end = strtab + tablesize;
+
+  // find the first string, skip over the space char
+  // (or the four zero bytes we see e.g. in libclient)
+  if (*strtab == ' ') {
+      strtab++;
+      if (*strtab != 0) {
+          DEBUG_ONLY(tty->print_cr("string table has leading space but no following zero."));
+          return NULL;
+      }
+      strtab++;
+  } else {
+      if ((uint32_t) *strtab != 0) {
+          DEBUG_ONLY(tty->print_cr("string table without leading space or leading int of zero."));
+          return NULL;
+      }
+      strtab+=4;
+  }
+  // read the real strings starting at index 1
+  int cur_strx = 1;
+  while (strtab < strtab_end) {
+    if (cur_strx == strx_wanted) {
+        return strtab;
+    }
+    // find start of next string
+    while (*strtab != 0) {
+        strtab++;
+    }
+    strtab++; // skip the terminating zero
+    cur_strx++;
+  }
+  DEBUG_ONLY(tty->print_cr("string number %d not found.", strx_wanted));
+  return NULL;
+}
+
+
 #endif
 
 
--- a/src/os/bsd/vm/decoder_machO.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/bsd/vm/decoder_machO.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,10 +31,25 @@
 
 // Just a placehold for now, a real implementation should derive
 // from AbstractDecoder
-class MachODecoder : public NullDecoder {
-public:
+class MachODecoder : public AbstractDecoder {
+ public:
   MachODecoder() { }
   ~MachODecoder() { }
+  virtual bool can_decode_C_frame_in_vm() const {
+    return true;
+  }
+  virtual bool demangle(const char* symbol, char* buf, int buflen);
+  virtual bool decode(address pc, char* buf, int buflen, int* offset,
+                      const void* base);
+  virtual bool decode(address pc, char* buf, int buflen, int* offset,
+                      const char* module_path = NULL) {
+    ShouldNotReachHere();
+    return false;
+  }
+
+ private:
+  void * mach_find_command(struct mach_header_64 * mach_base, uint32_t command_wanted);
+  char * mach_find_in_stringtable(char *strtab, uint32_t tablesize, int strx_wanted);
 };
 
 #endif
--- a/src/os/bsd/vm/osThread_bsd.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/bsd/vm/osThread_bsd.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -42,26 +42,19 @@
 #ifdef _ALLBSD_SOURCE
 
 #ifdef __APPLE__
-  thread_t  _thread_id;
+  typedef thread_t thread_id_t;
 #else
-  pthread_t _thread_id;
+  typedef pthread_t thread_id_t;
+#endif
+
+#else
+  typedef pid_t thread_id_t;
 #endif
 
   // _pthread_id is the pthread id, which is used by library calls
   // (e.g. pthread_kill).
   pthread_t _pthread_id;
 
-#else
-  // _thread_id is kernel thread id (similar to LWP id on Solaris). Each
-  // thread has a unique thread_id (BsdThreads or NPTL). It can be used
-  // to access /proc.
-  pid_t     _thread_id;
-
-  // _pthread_id is the pthread id, which is used by library calls
-  // (e.g. pthread_kill).
-  pthread_t _pthread_id;
-#endif
-
   sigset_t _caller_sigmask; // Caller's signal mask
 
  public:
@@ -70,25 +63,11 @@
   sigset_t  caller_sigmask() const       { return _caller_sigmask; }
   void    set_caller_sigmask(sigset_t sigmask)  { _caller_sigmask = sigmask; }
 
-#ifdef _ALLBSD_SOURCE
-#ifdef __APPLE__
-  thread_t thread_id() const {
-    return _thread_id;
-  }
-#else
-  pthread_t thread_id() const {
-    return _thread_id;
-  }
-#endif
-#else
-  pid_t thread_id() const {
-    return _thread_id;
-  }
-#endif
 #ifndef PRODUCT
   // Used for debugging, return a unique integer for each thread.
   intptr_t thread_identifier() const   { return (intptr_t)_pthread_id; }
 #endif
+
 #ifdef ASSERT
   // We expect no reposition failures so kill vm if we get one.
   //
@@ -96,21 +75,7 @@
     return false;
   }
 #endif // ASSERT
-#ifdef _ALLBSD_SOURCE
-#ifdef __APPLE__
-  void set_thread_id(thread_t id) {
-    _thread_id = id;
-  }
-#else
-  void set_thread_id(pthread_t id) {
-    _thread_id = id;
-  }
-#endif
-#else
-  void set_thread_id(pid_t id) {
-    _thread_id = id;
-  }
-#endif
+
   pthread_t pthread_id() const {
     return _pthread_id;
   }
--- a/src/os/bsd/vm/os_bsd.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/bsd/vm/os_bsd.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -440,7 +440,7 @@
   // code needs to be changed accordingly.
 
   // The next few definitions allow the code to be verbatim:
-#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n))
+#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n), mtInternal)
 #define getenv(n) ::getenv(n)
 
 /*
@@ -1913,11 +1913,11 @@
     // release the storage
     for (int i = 0 ; i < n ; i++) {
       if (pelements[i] != NULL) {
-        FREE_C_HEAP_ARRAY(char, pelements[i]);
+        FREE_C_HEAP_ARRAY(char, pelements[i], mtInternal);
       }
     }
     if (pelements != NULL) {
-      FREE_C_HEAP_ARRAY(char*, pelements);
+      FREE_C_HEAP_ARRAY(char*, pelements, mtInternal);
     }
   } else {
     snprintf(buffer, buflen, "%s/" JNI_LIB_PREFIX "%s" JNI_LIB_SUFFIX, pname, fname);
@@ -1946,10 +1946,16 @@
   return false;
 }
 
+
+#define MACH_MAXSYMLEN 256
+
 bool os::dll_address_to_function_name(address addr, char *buf,
                                       int buflen, int *offset) {
   Dl_info dlinfo;
-
+  char localbuf[MACH_MAXSYMLEN];
+
+  // dladdr will find names of dynamic functions only, but does
+  // it set dli_fbase with mach_header address when it "fails" ?
   if (dladdr((void*)addr, &dlinfo) && dlinfo.dli_sname != NULL) {
     if (buf != NULL) {
       if(!Decoder::demangle(dlinfo.dli_sname, buf, buflen)) {
@@ -1965,6 +1971,14 @@
     }
   }
 
+  // Handle non-dymanic manually:
+  if (dlinfo.dli_fbase != NULL &&
+      Decoder::decode(addr, localbuf, MACH_MAXSYMLEN, offset, dlinfo.dli_fbase)) {
+    if(!Decoder::demangle(localbuf, buf, buflen)) {
+      jio_snprintf(buf, buflen, "%s", localbuf);
+    }
+    return true;
+  }
   if (buf != NULL) buf[0] = '\0';
   if (offset != NULL) *offset = -1;
   return false;
@@ -2766,7 +2780,7 @@
 //       All it does is to check if there are enough free pages
 //       left at the time of mmap(). This could be a potential
 //       problem.
-bool os::commit_memory(char* addr, size_t size, bool exec) {
+bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
   int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
 #ifdef __OpenBSD__
   // XXX: Work-around mmap/MAP_FIXED bug temporarily on OpenBSD
@@ -2790,7 +2804,7 @@
 #endif
 #endif
 
-bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
 #ifndef _ALLBSD_SOURCE
   if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
@@ -2806,7 +2820,7 @@
   return commit_memory(addr, size, exec);
 }
 
-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
 #ifndef _ALLBSD_SOURCE
   if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
     // We don't check the return value: madvise(MADV_HUGEPAGE) may not
@@ -2816,7 +2830,7 @@
 #endif
 }
 
-void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
+void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
   ::madvise(addr, bytes, MADV_DONTNEED);
 }
 
@@ -2958,7 +2972,7 @@
 unsigned long* os::Bsd::_numa_all_nodes;
 #endif
 
-bool os::uncommit_memory(char* addr, size_t size) {
+bool os::pd_uncommit_memory(char* addr, size_t size) {
 #ifdef __OpenBSD__
   // XXX: Work-around mmap/MAP_FIXED bug temporarily on OpenBSD
   return ::mprotect(addr, size, PROT_NONE) == 0;
@@ -2969,7 +2983,7 @@
 #endif
 }
 
-bool os::create_stack_guard_pages(char* addr, size_t size) {
+bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
   return os::commit_memory(addr, size);
 }
 
@@ -3023,12 +3037,12 @@
   return ::munmap(addr, size) == 0;
 }
 
-char* os::reserve_memory(size_t bytes, char* requested_addr,
+char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
                          size_t alignment_hint) {
   return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
 }
 
-bool os::release_memory(char* addr, size_t size) {
+bool os::pd_release_memory(char* addr, size_t size) {
   return anon_munmap(addr, size);
 }
 
@@ -3331,7 +3345,7 @@
 // Reserve memory at an arbitrary address, only if that area is
 // available (and not reserved for something else).
 
-char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
   const int max_tries = 10;
   char* base[max_tries];
   size_t size[max_tries];
@@ -4987,7 +5001,7 @@
 }
 
 // Map a block of memory.
-char* os::map_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset,
                      char *addr, size_t bytes, bool read_only,
                      bool allow_exec) {
   int prot;
@@ -5019,7 +5033,7 @@
 
 
 // Remap a block of memory.
-char* os::remap_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset,
                        char *addr, size_t bytes, bool read_only,
                        bool allow_exec) {
   // same as map_memory() on this OS
@@ -5029,7 +5043,7 @@
 
 
 // Unmap a block of memory.
-bool os::unmap_memory(char* addr, size_t bytes) {
+bool os::pd_unmap_memory(char* addr, size_t bytes) {
   return munmap(addr, bytes) == 0;
 }
 
@@ -5801,3 +5815,14 @@
 
     return true;
 }
+
+// Get the default path to the core file
+// Returns the length of the string
+int os::get_core_path(char* buffer, size_t bufferSize) {
+  int n = jio_snprintf(buffer, bufferSize, "/cores");
+
+  // Truncate if theoretical string was longer than bufferSize
+  n = MIN2(n, (int)bufferSize);
+
+  return n;
+}
--- a/src/os/bsd/vm/os_bsd.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/bsd/vm/os_bsd.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -202,7 +202,7 @@
   static void fast_thread_clock_init(void);
 #endif
 
-  static bool supports_monotonic_clock() {
+  static inline bool supports_monotonic_clock() {
     return _clock_gettime != NULL;
   }
 
@@ -312,7 +312,7 @@
 };
 
 
-class PlatformEvent : public CHeapObj {
+class PlatformEvent : public CHeapObj<mtInternal> {
   private:
     double CachePad [4] ;   // increase odds that _mutex is sole occupant of cache line
     volatile int _Event ;
@@ -347,7 +347,7 @@
     void SetAssociation (Thread * a) { _Assoc = a ; }
 } ;
 
-class PlatformParker : public CHeapObj {
+class PlatformParker : public CHeapObj<mtInternal> {
   protected:
     pthread_mutex_t _mutex [1] ;
     pthread_cond_t  _cond  [1] ;
--- a/src/os/bsd/vm/os_bsd.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/bsd/vm/os_bsd.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -95,7 +95,7 @@
 
 
 // On Bsd, reservations are made on a page by page basis, nothing to do.
-inline void os::split_reserved_memory(char *base, size_t size,
+inline void os::pd_split_reserved_memory(char *base, size_t size,
                                       size_t split, bool realloc) {
 }
 
--- a/src/os/bsd/vm/perfMemory_bsd.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/bsd/vm/perfMemory_bsd.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -126,7 +126,7 @@
       }
     }
   }
-  FREE_C_HEAP_ARRAY(char, destfile);
+  FREE_C_HEAP_ARRAY(char, destfile, mtInternal);
 }
 
 
@@ -153,7 +153,7 @@
   const char* tmpdir = os::get_temp_directory();
   const char* perfdir = PERFDATA_NAME;
   size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
-  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   // construct the path name to user specific tmp directory
   snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user);
@@ -246,7 +246,7 @@
   if (bufsize == -1)
     bufsize = 1024;
 
-  char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize);
+  char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
 
   // POSIX interface to getpwuid_r is used on LINUX
   struct passwd* p;
@@ -278,14 +278,14 @@
                                      "pw_name zero length");
       }
     }
-    FREE_C_HEAP_ARRAY(char, pwbuf);
+    FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
     return NULL;
   }
 
-  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1);
+  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1, mtInternal);
   strcpy(user_name, p->pw_name);
 
-  FREE_C_HEAP_ARRAY(char, pwbuf);
+  FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
   return user_name;
 }
 
@@ -328,7 +328,7 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname));
+  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
   while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
 
@@ -338,7 +338,7 @@
     }
 
     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
-                              strlen(tmpdirname) + strlen(dentry->d_name) + 2);
+                 strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal);
     strcpy(usrdir_name, tmpdirname);
     strcat(usrdir_name, "/");
     strcat(usrdir_name, dentry->d_name);
@@ -346,7 +346,7 @@
     DIR* subdirp = os::opendir(usrdir_name);
 
     if (subdirp == NULL) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       continue;
     }
 
@@ -357,13 +357,13 @@
     // symlink can be exploited.
     //
     if (!is_directory_secure(usrdir_name)) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       os::closedir(subdirp);
       continue;
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name));
+    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
     while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
 
@@ -372,7 +372,7 @@
         int result;
 
         char* filename = NEW_C_HEAP_ARRAY(char,
-                            strlen(usrdir_name) + strlen(udentry->d_name) + 2);
+                 strlen(usrdir_name) + strlen(udentry->d_name) + 2, mtInternal);
 
         strcpy(filename, usrdir_name);
         strcat(filename, "/");
@@ -381,13 +381,13 @@
         // don't follow symbolic links for the file
         RESTARTABLE(::lstat(filename, &statbuf), result);
         if (result == OS_ERR) {
-           FREE_C_HEAP_ARRAY(char, filename);
+           FREE_C_HEAP_ARRAY(char, filename, mtInternal);
            continue;
         }
 
         // skip over files that are not regular files.
         if (!S_ISREG(statbuf.st_mode)) {
-          FREE_C_HEAP_ARRAY(char, filename);
+          FREE_C_HEAP_ARRAY(char, filename, mtInternal);
           continue;
         }
 
@@ -397,23 +397,23 @@
           if (statbuf.st_ctime > oldest_ctime) {
             char* user = strchr(dentry->d_name, '_') + 1;
 
-            if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user);
-            oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1);
+            if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user, mtInternal);
+            oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal);
 
             strcpy(oldest_user, user);
             oldest_ctime = statbuf.st_ctime;
           }
         }
 
-        FREE_C_HEAP_ARRAY(char, filename);
+        FREE_C_HEAP_ARRAY(char, filename, mtInternal);
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf);
-    FREE_C_HEAP_ARRAY(char, usrdir_name);
+    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
+    FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf);
+  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(oldest_user);
 }
@@ -434,7 +434,7 @@
   // add 2 for the file separator and a null terminator.
   size_t nbytes = strlen(dirname) + UINT_CHARS + 2;
 
-  char* name = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
   snprintf(name, nbytes, "%s/%d", dirname, vmid);
 
   return name;
@@ -472,7 +472,7 @@
 static void remove_file(const char* dirname, const char* filename) {
 
   size_t nbytes = strlen(dirname) + strlen(filename) + 2;
-  char* path = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* path = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   strcpy(path, dirname);
   strcat(path, "/");
@@ -480,7 +480,7 @@
 
   remove_file(path);
 
-  FREE_C_HEAP_ARRAY(char, path);
+  FREE_C_HEAP_ARRAY(char, path, mtInternal);
 }
 
 
@@ -517,7 +517,7 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname));
+  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
   errno = 0;
   while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
 
@@ -556,7 +556,7 @@
     errno = 0;
   }
   os::closedir(dirp);
-  FREE_C_HEAP_ARRAY(char, dbuf);
+  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // make the user specific temporary directory. Returns true if
@@ -723,11 +723,11 @@
 
   fd = create_sharedmem_resources(dirname, filename, size);
 
-  FREE_C_HEAP_ARRAY(char, user_name);
-  FREE_C_HEAP_ARRAY(char, dirname);
+  FREE_C_HEAP_ARRAY(char, user_name, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
 
   if (fd == -1) {
-    FREE_C_HEAP_ARRAY(char, filename);
+    FREE_C_HEAP_ARRAY(char, filename, mtInternal);
     return NULL;
   }
 
@@ -743,7 +743,7 @@
       warning("mmap failed -  %s\n", strerror(errno));
     }
     remove_file(filename);
-    FREE_C_HEAP_ARRAY(char, filename);
+    FREE_C_HEAP_ARRAY(char, filename, mtInternal);
     return NULL;
   }
 
@@ -869,7 +869,7 @@
   // store file, we don't follow them when attaching either.
   //
   if (!is_directory_secure(dirname)) {
-    FREE_C_HEAP_ARRAY(char, dirname);
+    FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "Process not found");
   }
@@ -884,9 +884,9 @@
   strcpy(rfilename, filename);
 
   // free the c heap resources that are no longer needed
-  if (luser != user) FREE_C_HEAP_ARRAY(char, luser);
-  FREE_C_HEAP_ARRAY(char, dirname);
-  FREE_C_HEAP_ARRAY(char, filename);
+  if (luser != user) FREE_C_HEAP_ARRAY(char, luser, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
+  FREE_C_HEAP_ARRAY(char, filename, mtInternal);
 
   // open the shared memory file for the give vmid
   fd = open_sharedmem_file(rfilename, file_flags, CHECK);
--- a/src/os/linux/vm/osThread_linux.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/linux/vm/osThread_linux.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,8 @@
 
 #ifndef OS_LINUX_VM_OSTHREAD_LINUX_HPP
 #define OS_LINUX_VM_OSTHREAD_LINUX_HPP
+ public:
+  typedef pid_t thread_id_t;
 
  private:
   int _thread_type;
@@ -37,13 +39,6 @@
     _thread_type = type;
   }
 
- private:
-
-  // _thread_id is kernel thread id (similar to LWP id on Solaris). Each
-  // thread has a unique thread_id (LinuxThreads or NPTL). It can be used
-  // to access /proc.
-  pid_t     _thread_id;
-
   // _pthread_id is the pthread id, which is used by library calls
   // (e.g. pthread_kill).
   pthread_t _pthread_id;
@@ -56,9 +51,6 @@
   sigset_t  caller_sigmask() const       { return _caller_sigmask; }
   void    set_caller_sigmask(sigset_t sigmask)  { _caller_sigmask = sigmask; }
 
-  pid_t thread_id() const {
-    return _thread_id;
-  }
 #ifndef PRODUCT
   // Used for debugging, return a unique integer for each thread.
   int thread_identifier() const   { return _thread_id; }
@@ -70,9 +62,6 @@
     return false;
   }
 #endif // ASSERT
-  void set_thread_id(pid_t id) {
-    _thread_id = id;
-  }
   pthread_t pthread_id() const {
     return _pthread_id;
   }
--- a/src/os/linux/vm/os_linux.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -371,7 +371,7 @@
   // code needs to be changed accordingly.
 
   // The next few definitions allow the code to be verbatim:
-#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n))
+#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n), mtInternal)
 #define getenv(n) ::getenv(n)
 
 /*
@@ -639,7 +639,7 @@
 
   size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
   if (n > 0) {
-     char *str = (char *)malloc(n);
+     char *str = (char *)malloc(n, mtInternal);
      confstr(_CS_GNU_LIBC_VERSION, str, n);
      os::Linux::set_glibc_version(str);
   } else {
@@ -652,7 +652,7 @@
 
   n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
   if (n > 0) {
-     char *str = (char *)malloc(n);
+     char *str = (char *)malloc(n, mtInternal);
      confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
      // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells
      // us "NPTL-0.29" even we are running with LinuxThreads. Check if this
@@ -1685,11 +1685,11 @@
     // release the storage
     for (int i = 0 ; i < n ; i++) {
       if (pelements[i] != NULL) {
-        FREE_C_HEAP_ARRAY(char, pelements[i]);
+        FREE_C_HEAP_ARRAY(char, pelements[i], mtInternal);
       }
     }
     if (pelements != NULL) {
-      FREE_C_HEAP_ARRAY(char*, pelements);
+      FREE_C_HEAP_ARRAY(char*, pelements, mtInternal);
     }
   } else {
     snprintf(buffer, buflen, "%s/lib%s.so", pname, fname);
@@ -2469,7 +2469,7 @@
 //       All it does is to check if there are enough free pages
 //       left at the time of mmap(). This could be a potential
 //       problem.
-bool os::commit_memory(char* addr, size_t size, bool exec) {
+bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
   int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
   uintptr_t res = (uintptr_t) ::mmap(addr, size, prot,
                                    MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
@@ -2492,7 +2492,7 @@
 #define MADV_HUGEPAGE 14
 #endif
 
-bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
   if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
     int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
@@ -2516,7 +2516,7 @@
   return false;
 }
 
-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
   if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
     // We don't check the return value: madvise(MADV_HUGEPAGE) may not
     // be supported or the memory may already be backed by huge pages.
@@ -2524,8 +2524,15 @@
   }
 }
 
-void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
-  commit_memory(addr, bytes, alignment_hint, false);
+void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  // This method works by doing an mmap over an existing mmaping and effectively discarding
+  // the existing pages. However it won't work for SHM-based large pages that cannot be
+  // uncommitted at all. We don't do anything in this case to avoid creating a segment with
+  // small pages on top of the SHM segment. This method always works for small pages, so we
+  // allow that in any case.
+  if (alignment_hint <= (size_t)os::vm_page_size() || !UseSHM) {
+    commit_memory(addr, bytes, alignment_hint, false);
+  }
 }
 
 void os::numa_make_global(char *addr, size_t bytes) {
@@ -2639,7 +2646,7 @@
       if (numa_available() != -1) {
         set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
         // Create a cpu -> node mapping
-        _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
+        _cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
         rebuild_cpu_to_node_map();
         return true;
       }
@@ -2669,7 +2676,7 @@
   cpu_to_node()->at_grow(cpu_num - 1);
   size_t node_num = numa_get_groups_num();
 
-  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
+  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal);
   for (size_t i = 0; i < node_num; i++) {
     if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
       for (size_t j = 0; j < cpu_map_valid_size; j++) {
@@ -2683,7 +2690,7 @@
       }
     }
   }
-  FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
+  FREE_C_HEAP_ARRAY(unsigned long, cpu_map, mtInternal);
 }
 
 int os::Linux::get_node_by_cpu(int cpu_id) {
@@ -2702,7 +2709,7 @@
 os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
 unsigned long* os::Linux::_numa_all_nodes;
 
-bool os::uncommit_memory(char* addr, size_t size) {
+bool os::pd_uncommit_memory(char* addr, size_t size) {
   uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
                 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0);
   return res  != (uintptr_t) MAP_FAILED;
@@ -2767,7 +2774,7 @@
 // munmap() the guard pages we don't leave a hole in the stack
 // mapping. This only affects the main/initial thread, but guard
 // against future OS changes
-bool os::create_stack_guard_pages(char* addr, size_t size) {
+bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
   uintptr_t stack_extent, stack_base;
   bool chk_bounds = NOT_DEBUG(os::Linux::is_initial_thread()) DEBUG_ONLY(true);
   if (chk_bounds && get_stack_bounds(&stack_extent, &stack_base)) {
@@ -2840,12 +2847,12 @@
   return ::munmap(addr, size) == 0;
 }
 
-char* os::reserve_memory(size_t bytes, char* requested_addr,
+char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
                          size_t alignment_hint) {
   return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
 }
 
-bool os::release_memory(char* addr, size_t size) {
+bool os::pd_release_memory(char* addr, size_t size) {
   return anon_munmap(addr, size);
 }
 
@@ -3142,7 +3149,7 @@
 // Reserve memory at an arbitrary address, only if that area is
 // available (and not reserved for something else).
 
-char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
   const int max_tries = 10;
   char* base[max_tries];
   size_t size[max_tries];
@@ -4664,7 +4671,7 @@
 }
 
 // Map a block of memory.
-char* os::map_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset,
                      char *addr, size_t bytes, bool read_only,
                      bool allow_exec) {
   int prot;
@@ -4694,7 +4701,7 @@
 
 
 // Remap a block of memory.
-char* os::remap_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset,
                        char *addr, size_t bytes, bool read_only,
                        bool allow_exec) {
   // same as map_memory() on this OS
@@ -4704,7 +4711,7 @@
 
 
 // Unmap a block of memory.
-bool os::unmap_memory(char* addr, size_t bytes) {
+bool os::pd_unmap_memory(char* addr, size_t bytes) {
   return munmap(addr, bytes) == 0;
 }
 
@@ -5440,6 +5447,18 @@
     return true;
 }
 
+// Get the default path to the core file
+// Returns the length of the string
+int os::get_core_path(char* buffer, size_t bufferSize) {
+  const char* p = get_current_directory(buffer, bufferSize);
+
+  if (p == NULL) {
+    assert(p != NULL, "failed to get current directory");
+    return 0;
+  }
+
+  return strlen(buffer);
+}
 
 #ifdef JAVASE_EMBEDDED
 //
--- a/src/os/linux/vm/os_linux.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/linux/vm/os_linux.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -178,7 +178,7 @@
   // fast POSIX clocks support
   static void fast_thread_clock_init(void);
 
-  static bool supports_monotonic_clock() {
+  static inline bool supports_monotonic_clock() {
     return _clock_gettime != NULL;
   }
 
@@ -287,7 +287,7 @@
 };
 
 
-class PlatformEvent : public CHeapObj {
+class PlatformEvent : public CHeapObj<mtInternal> {
   private:
     double CachePad [4] ;   // increase odds that _mutex is sole occupant of cache line
     volatile int _Event ;
@@ -322,7 +322,7 @@
     void SetAssociation (Thread * a) { _Assoc = a ; }
 } ;
 
-class PlatformParker : public CHeapObj {
+class PlatformParker : public CHeapObj<mtInternal> {
   protected:
     pthread_mutex_t _mutex [1] ;
     pthread_cond_t  _cond  [1] ;
--- a/src/os/linux/vm/os_linux.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/linux/vm/os_linux.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -99,7 +99,7 @@
 
 
 // On Linux, reservations are made on a page by page basis, nothing to do.
-inline void os::split_reserved_memory(char *base, size_t size,
+inline void os::pd_split_reserved_memory(char *base, size_t size,
                                       size_t split, bool realloc) {
 }
 
--- a/src/os/linux/vm/perfMemory_linux.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/linux/vm/perfMemory_linux.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -126,7 +126,7 @@
       }
     }
   }
-  FREE_C_HEAP_ARRAY(char, destfile);
+  FREE_C_HEAP_ARRAY(char, destfile, mtInternal);
 }
 
 
@@ -153,7 +153,7 @@
   const char* tmpdir = os::get_temp_directory();
   const char* perfdir = PERFDATA_NAME;
   size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
-  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   // construct the path name to user specific tmp directory
   snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user);
@@ -246,7 +246,7 @@
   if (bufsize == -1)
     bufsize = 1024;
 
-  char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize);
+  char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
 
   // POSIX interface to getpwuid_r is used on LINUX
   struct passwd* p;
@@ -278,14 +278,14 @@
                                      "pw_name zero length");
       }
     }
-    FREE_C_HEAP_ARRAY(char, pwbuf);
+    FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
     return NULL;
   }
 
-  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1);
+  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1, mtInternal);
   strcpy(user_name, p->pw_name);
 
-  FREE_C_HEAP_ARRAY(char, pwbuf);
+  FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
   return user_name;
 }
 
@@ -328,7 +328,7 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname));
+  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
   while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
 
@@ -338,7 +338,7 @@
     }
 
     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
-                              strlen(tmpdirname) + strlen(dentry->d_name) + 2);
+                     strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal);
     strcpy(usrdir_name, tmpdirname);
     strcat(usrdir_name, "/");
     strcat(usrdir_name, dentry->d_name);
@@ -346,7 +346,7 @@
     DIR* subdirp = os::opendir(usrdir_name);
 
     if (subdirp == NULL) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       continue;
     }
 
@@ -357,13 +357,13 @@
     // symlink can be exploited.
     //
     if (!is_directory_secure(usrdir_name)) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       os::closedir(subdirp);
       continue;
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name));
+    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
     while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
 
@@ -372,7 +372,7 @@
         int result;
 
         char* filename = NEW_C_HEAP_ARRAY(char,
-                            strlen(usrdir_name) + strlen(udentry->d_name) + 2);
+                   strlen(usrdir_name) + strlen(udentry->d_name) + 2, mtInternal);
 
         strcpy(filename, usrdir_name);
         strcat(filename, "/");
@@ -381,13 +381,13 @@
         // don't follow symbolic links for the file
         RESTARTABLE(::lstat(filename, &statbuf), result);
         if (result == OS_ERR) {
-           FREE_C_HEAP_ARRAY(char, filename);
+           FREE_C_HEAP_ARRAY(char, filename, mtInternal);
            continue;
         }
 
         // skip over files that are not regular files.
         if (!S_ISREG(statbuf.st_mode)) {
-          FREE_C_HEAP_ARRAY(char, filename);
+          FREE_C_HEAP_ARRAY(char, filename, mtInternal);
           continue;
         }
 
@@ -397,23 +397,23 @@
           if (statbuf.st_ctime > oldest_ctime) {
             char* user = strchr(dentry->d_name, '_') + 1;
 
-            if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user);
-            oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1);
+            if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user, mtInternal);
+            oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal);
 
             strcpy(oldest_user, user);
             oldest_ctime = statbuf.st_ctime;
           }
         }
 
-        FREE_C_HEAP_ARRAY(char, filename);
+        FREE_C_HEAP_ARRAY(char, filename, mtInternal);
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf);
-    FREE_C_HEAP_ARRAY(char, usrdir_name);
+    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
+    FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf);
+  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(oldest_user);
 }
@@ -434,7 +434,7 @@
   // add 2 for the file separator and a null terminator.
   size_t nbytes = strlen(dirname) + UINT_CHARS + 2;
 
-  char* name = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
   snprintf(name, nbytes, "%s/%d", dirname, vmid);
 
   return name;
@@ -472,7 +472,7 @@
 static void remove_file(const char* dirname, const char* filename) {
 
   size_t nbytes = strlen(dirname) + strlen(filename) + 2;
-  char* path = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* path = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   strcpy(path, dirname);
   strcat(path, "/");
@@ -480,7 +480,7 @@
 
   remove_file(path);
 
-  FREE_C_HEAP_ARRAY(char, path);
+  FREE_C_HEAP_ARRAY(char, path, mtInternal);
 }
 
 
@@ -517,7 +517,7 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname));
+  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
   errno = 0;
   while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
 
@@ -556,7 +556,7 @@
     errno = 0;
   }
   os::closedir(dirp);
-  FREE_C_HEAP_ARRAY(char, dbuf);
+  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // make the user specific temporary directory. Returns true if
@@ -723,11 +723,11 @@
 
   fd = create_sharedmem_resources(dirname, filename, size);
 
-  FREE_C_HEAP_ARRAY(char, user_name);
-  FREE_C_HEAP_ARRAY(char, dirname);
+  FREE_C_HEAP_ARRAY(char, user_name, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
 
   if (fd == -1) {
-    FREE_C_HEAP_ARRAY(char, filename);
+    FREE_C_HEAP_ARRAY(char, filename, mtInternal);
     return NULL;
   }
 
@@ -743,7 +743,7 @@
       warning("mmap failed -  %s\n", strerror(errno));
     }
     remove_file(filename);
-    FREE_C_HEAP_ARRAY(char, filename);
+    FREE_C_HEAP_ARRAY(char, filename, mtInternal);
     return NULL;
   }
 
@@ -869,7 +869,7 @@
   // store file, we don't follow them when attaching either.
   //
   if (!is_directory_secure(dirname)) {
-    FREE_C_HEAP_ARRAY(char, dirname);
+    FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "Process not found");
   }
@@ -884,9 +884,9 @@
   strcpy(rfilename, filename);
 
   // free the c heap resources that are no longer needed
-  if (luser != user) FREE_C_HEAP_ARRAY(char, luser);
-  FREE_C_HEAP_ARRAY(char, dirname);
-  FREE_C_HEAP_ARRAY(char, filename);
+  if (luser != user) FREE_C_HEAP_ARRAY(char, luser, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
+  FREE_C_HEAP_ARRAY(char, filename, mtInternal);
 
   // open the shared memory file for the give vmid
   fd = open_sharedmem_file(rfilename, file_flags, CHECK);
--- a/src/os/posix/launcher/launcher.script	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/posix/launcher/launcher.script	Sat Oct 20 00:08:35 2012 -0700
@@ -29,7 +29,7 @@
 # inside Emacs".
 #
 # If the first parameter is "-dbx", HotSpot will be launched inside dbx.
-# 
+#
 # If the first parameter is "-valgrind", HotSpot will be launched
 # inside Valgrind (http://valgrind.kde.org) using the Memcheck skin,
 # and with memory leak detection enabled.  This currently (2005jan19)
@@ -45,19 +45,19 @@
 
 # This is the name of the gdb binary to use
 if [ ! "$GDB" ]
-then 
+then
     GDB=gdb
 fi
 
 # This is the name of the gdb binary to use
 if [ ! "$DBX" ]
-then 
+then
     DBX=dbx
 fi
 
 # This is the name of the Valgrind binary to use
 if [ ! "$VALGRIND" ]
-then 
+then
     VALGRIND=valgrind
 fi
 
@@ -98,7 +98,7 @@
 JDK=
 if [ "${ALT_JAVA_HOME}" = "" ]; then
     . ${MYDIR}/jdkpath.sh
-else 
+else
     JDK=${ALT_JAVA_HOME%%/jre};
 fi
 
@@ -114,22 +114,34 @@
 # any.
 JRE=$JDK/jre
 JAVA_HOME=$JDK
+export JAVA_HOME
+
 ARCH=@@LIBARCH@@
-
 SBP=${MYDIR}:${JRE}/lib/${ARCH}
 
-# Set up a suitable LD_LIBRARY_PATH
 
-if [ -z "$LD_LIBRARY_PATH" ]
+# Set up a suitable LD_LIBRARY_PATH or DYLD_LIBRARY_PATH
+OS=`uname -s`
+if [ "${OS}" = "Darwin" ]
 then
-    LD_LIBRARY_PATH="$SBP"
+    if [ -z "$DYLD_LIBRARY_PATH" ]
+    then
+        DYLD_LIBRARY_PATH="$SBP"
+    else
+        DYLD_LIBRARY_PATH="$SBP:$DYLD_LIBRARY_PATH"
+    fi
+    export DYLD_LIBRARY_PATH
 else
-    LD_LIBRARY_PATH="$SBP:$LD_LIBRARY_PATH"
+    # not 'Darwin'
+    if [ -z "$LD_LIBRARY_PATH" ]
+    then
+        LD_LIBRARY_PATH="$SBP"
+    else
+        LD_LIBRARY_PATH="$SBP:$LD_LIBRARY_PATH"
+    fi
+    export LD_LIBRARY_PATH
 fi
 
-export LD_LIBRARY_PATH
-export JAVA_HOME
-
 JPARMS="$@ $JAVA_ARGS";
 
 # Locate the gamma development launcher
--- a/src/os/posix/vm/os_posix.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/posix/vm/os_posix.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -23,6 +23,7 @@
 */
 
 #include "prims/jvm.h"
+#include "runtime/frame.inline.hpp"
 #include "runtime/os.hpp"
 #include "utilities/vmError.hpp"
 
@@ -33,19 +34,19 @@
 
 // Check core dump limit and report possible place where core can be found
 void os::check_or_create_dump(void* exceptionRecord, void* contextRecord, char* buffer, size_t bufferSize) {
+  int n;
   struct rlimit rlim;
-  static char cwd[O_BUFLEN];
   bool success;
 
-  get_current_directory(cwd, sizeof(cwd));
+  n = get_core_path(buffer, bufferSize);
 
   if (getrlimit(RLIMIT_CORE, &rlim) != 0) {
-    jio_snprintf(buffer, bufferSize, "%s/core or core.%d (may not exist)", cwd, current_process_id());
+    jio_snprintf(buffer + n, bufferSize - n, "/core or core.%d (may not exist)", current_process_id());
     success = true;
   } else {
     switch(rlim.rlim_cur) {
       case RLIM_INFINITY:
-        jio_snprintf(buffer, bufferSize, "%s/core or core.%d", cwd, current_process_id());
+        jio_snprintf(buffer + n, bufferSize - n, "/core or core.%d", current_process_id());
         success = true;
         break;
       case 0:
@@ -53,7 +54,7 @@
         success = false;
         break;
       default:
-        jio_snprintf(buffer, bufferSize, "%s/core or core.%d (max size %lu kB). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", cwd, current_process_id(), (unsigned long)(rlim.rlim_cur >> 10));
+        jio_snprintf(buffer + n, bufferSize - n, "/core or core.%d (max size %lu kB). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", current_process_id(), (unsigned long)(rlim.rlim_cur >> 10));
         success = true;
         break;
     }
@@ -61,6 +62,23 @@
   VMError::report_coredump_status(buffer, success);
 }
 
+address os::get_caller_pc(int n) {
+#ifdef _NMT_NOINLINE_
+  n ++;
+#endif
+  frame fr = os::current_frame();
+  while (n > 0 && fr.pc() &&
+    !os::is_first_C_frame(&fr) && fr.sender_pc()) {
+    fr = os::get_sender_for_C_frame(&fr);
+    n --;
+  }
+  if (n == 0) {
+    return fr.pc();
+  } else {
+    return NULL;
+  }
+}
+
 int os::get_last_error() {
   return errno;
 }
--- a/src/os/solaris/dtrace/generateJvmOffsets.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/dtrace/generateJvmOffsets.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -220,10 +220,10 @@
   printf("\n");
 
   GEN_OFFS(methodOopDesc, _constMethod);
-  GEN_OFFS(methodOopDesc, _constants);
   GEN_OFFS(methodOopDesc, _access_flags);
   printf("\n");
 
+  GEN_OFFS(constMethodOopDesc, _constants);
   GEN_OFFS(constMethodOopDesc, _flags);
   GEN_OFFS(constMethodOopDesc, _code_size);
   GEN_OFFS(constMethodOopDesc, _name_index);
--- a/src/os/solaris/dtrace/hs_private.d	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/dtrace/hs_private.d	Sat Oct 20 00:08:35 2012 -0700
@@ -23,7 +23,6 @@
  */
 
 provider hs_private {
-  probe hashtable__new_entry(void*, uintptr_t, void*); 
   probe safepoint__begin();
   probe safepoint__end();
   probe cms__initmark__begin();
--- a/src/os/solaris/dtrace/jhelper.d	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/dtrace/jhelper.d	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -118,7 +118,7 @@
   copyin_offset(OFFSET_Symbol_body);
 
   copyin_offset(OFFSET_methodOopDesc_constMethod);
-  copyin_offset(OFFSET_methodOopDesc_constants);
+  copyin_offset(OFFSET_constMethodOopDesc_constants);
   copyin_offset(OFFSET_constMethodOopDesc_name_index);
   copyin_offset(OFFSET_constMethodOopDesc_signature_index);
 
@@ -359,8 +359,8 @@
   this->signatureIndex = copyin_uint16(this->constMethod +
       OFFSET_constMethodOopDesc_signature_index);
 
-  this->constantPool = copyin_ptr(this->methodOopPtr +
-      OFFSET_methodOopDesc_constants);
+  this->constantPool = copyin_ptr(this->constMethod +
+      OFFSET_constMethodOopDesc_constants);
 
   this->nameSymbol = copyin_ptr(this->constantPool +
       this->nameIndex * sizeof (pointer) + SIZE_constantPoolOopDesc);
--- a/src/os/solaris/dtrace/libjvm_db.c	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/dtrace/libjvm_db.c	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -514,9 +514,9 @@
   char * signatureString = NULL;
   int err;
 
-  err = read_pointer(J, methodOopPtr + OFFSET_methodOopDesc_constants, &constantPool);
+  err = read_pointer(J, methodOopPtr + OFFSET_methodOopDesc_constMethod, &constMethod);
   CHECK_FAIL(err);
-  err = read_pointer(J, methodOopPtr + OFFSET_methodOopDesc_constMethod, &constMethod);
+  err = read_pointer(J, constMethod + OFFSET_constMethodOopDesc_constants, &constantPool);
   CHECK_FAIL(err);
 
   /* To get name string */
--- a/src/os/solaris/vm/attachListener_solaris.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/vm/attachListener_solaris.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -336,7 +336,9 @@
     // Return 0 (success) + file descriptor, or non-0 (error)
     if (res == 0) {
       door_desc_t desc;
-      desc.d_attributes = DOOR_DESCRIPTOR;
+      // DOOR_RELEASE flag makes sure fd is closed after passing it to
+      // the client.  See door_return(3DOOR) man page.
+      desc.d_attributes = DOOR_DESCRIPTOR | DOOR_RELEASE;
       desc.d_data.d_desc.d_descriptor = return_fd;
       door_return((char*)&res, sizeof(res), &desc, 1);
     } else {
--- a/src/os/solaris/vm/dtraceJSDT_solaris.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/vm/dtraceJSDT_solaris.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -626,45 +626,6 @@
   }
 }
 
-/**
- * This prints out hex data in a 'windbg' or 'xxd' form, where each line is:
- *   <hex-address>: 8 * <hex-halfword> <ascii translation>
- * example:
- * 0000000: 7f44 4f46 0102 0102 0000 0000 0000 0000  .DOF............
- * 0000010: 0000 0000 0000 0040 0000 0020 0000 0005  .......@... ....
- * 0000020: 0000 0000 0000 0040 0000 0000 0000 015d  .......@.......]
- * ...
- */
-static void printDOFRawData(void* dof) {
-  size_t size = ((dof_hdr_t*)dof)->dofh_loadsz;
-  size_t limit = (size + 16) / 16 * 16;
-  for (size_t i = 0; i < limit; ++i) {
-    if (i % 16 == 0) {
-      tty->print("%07x:", i);
-    }
-    if (i % 2 == 0) {
-      tty->print(" ");
-    }
-    if (i < size) {
-      tty->print("%02x", ((unsigned char*)dof)[i]);
-    } else {
-      tty->print("  ");
-    }
-    if ((i + 1) % 16 == 0) {
-      tty->print("  ");
-      for (size_t j = 0; j < 16; ++j) {
-        size_t idx = i + j - 15;
-        char c = ((char*)dof)[idx];
-        if (idx < size) {
-          tty->print("%c", c >= 32 && c <= 126 ? c : '.');
-        }
-      }
-      tty->print_cr("");
-    }
-  }
-  tty->print_cr("");
-}
-
 static void printDOFHelper(dof_helper_t* helper) {
   tty->print_cr("// dof_helper_t {");
   tty->print_cr("//   dofhp_mod = \"%s\"", helper->dofhp_mod);
@@ -672,7 +633,8 @@
   tty->print_cr("//   dofhp_dof = 0x%016llx", helper->dofhp_dof);
   printDOF((void*)helper->dofhp_dof);
   tty->print_cr("// }");
-  printDOFRawData((void*)helper->dofhp_dof);
+  size_t len = ((dof_hdr_t*)helper)->dofh_loadsz;
+  tty->print_data((void*)helper->dofhp_dof, len, true);
 }
 
 #else // ndef HAVE_DTRACE_H
--- a/src/os/solaris/vm/osThread_solaris.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/vm/osThread_solaris.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,9 +26,10 @@
 #define OS_SOLARIS_VM_OSTHREAD_SOLARIS_HPP
 
 // This is embedded via include into the class OSThread
+ public:
+  typedef thread_t thread_id_t;
 
  private:
-  thread_t _thread_id;         // Solaris thread id
   uint     _lwp_id;            // lwp ID, only used with bound threads
   int      _native_priority;   // Saved native priority when starting
                                // a bound thread
@@ -36,7 +37,6 @@
   bool     _vm_created_thread; // true if the VM created this thread,
                                // false if primary thread or attached thread
  public:
-  thread_t thread_id() const       { return _thread_id; }
   uint     lwp_id() const          { return _lwp_id; }
   int      native_priority() const { return _native_priority; }
 
@@ -62,7 +62,6 @@
     return true;
   }
 #endif
-  void set_thread_id(thread_t id)    { _thread_id = id; }
   void set_lwp_id(uint id)           { _lwp_id = id; }
   void set_native_priority(int prio) { _native_priority = prio; }
 
--- a/src/os/solaris/vm/os_solaris.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -546,7 +546,7 @@
   // Find the number of processors in the processor set.
   if (pset_info(pset, NULL, id_length, NULL) == 0) {
     // Make up an array to hold their ids.
-    *id_array = NEW_C_HEAP_ARRAY(processorid_t, *id_length);
+    *id_array = NEW_C_HEAP_ARRAY(processorid_t, *id_length, mtInternal);
     // Fill in the array with their processor ids.
     if (pset_info(pset, NULL, id_length, *id_array) == 0) {
       result = true;
@@ -577,7 +577,7 @@
   // Find the number of processors online.
   *id_length = sysconf(_SC_NPROCESSORS_ONLN);
   // Make up an array to hold their ids.
-  *id_array = NEW_C_HEAP_ARRAY(processorid_t, *id_length);
+  *id_array = NEW_C_HEAP_ARRAY(processorid_t, *id_length, mtInternal);
   // Processors need not be numbered consecutively.
   long found = 0;
   processorid_t next = 0;
@@ -629,7 +629,7 @@
   // The next id, to limit loops.
   const processorid_t limit_id = max_id + 1;
   // Make up markers for available processors.
-  bool* available_id = NEW_C_HEAP_ARRAY(bool, limit_id);
+  bool* available_id = NEW_C_HEAP_ARRAY(bool, limit_id, mtInternal);
   for (uint c = 0; c < limit_id; c += 1) {
     available_id[c] = false;
   }
@@ -666,7 +666,7 @@
     }
   }
   if (available_id != NULL) {
-    FREE_C_HEAP_ARRAY(bool, available_id);
+    FREE_C_HEAP_ARRAY(bool, available_id, mtInternal);
   }
   return true;
 }
@@ -698,7 +698,7 @@
     }
   }
   if (id_array != NULL) {
-    FREE_C_HEAP_ARRAY(processorid_t, id_array);
+    FREE_C_HEAP_ARRAY(processorid_t, id_array, mtInternal);
   }
   return result;
 }
@@ -771,8 +771,8 @@
   // code needs to be changed accordingly.
 
   // The next few definitions allow the code to be verbatim:
-#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n))
-#define free(p) FREE_C_HEAP_ARRAY(char, p)
+#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n), mtInternal)
+#define free(p) FREE_C_HEAP_ARRAY(char, p, mtInternal)
 #define getenv(n) ::getenv(n)
 
 #define EXTENSIONS_DIR  "/lib/ext"
@@ -1013,15 +1013,6 @@
   // use debugger to set breakpoint here
 }
 
-// Returns an estimate of the current stack pointer. Result must be guaranteed to
-// point into the calling threads stack, and be no lower than the current stack
-// pointer.
-address os::current_stack_pointer() {
-  volatile int dummy;
-  address sp = (address)&dummy + 8;     // %%%% need to confirm if this is right
-  return sp;
-}
-
 static thread_t main_thread;
 
 // Thread start routine for all new Java threads
@@ -1936,11 +1927,11 @@
     // release the storage
     for (int i = 0 ; i < n ; i++) {
       if (pelements[i] != NULL) {
-        FREE_C_HEAP_ARRAY(char, pelements[i]);
+        FREE_C_HEAP_ARRAY(char, pelements[i], mtInternal);
       }
     }
     if (pelements != NULL) {
-      FREE_C_HEAP_ARRAY(char*, pelements);
+      FREE_C_HEAP_ARRAY(char*, pelements, mtInternal);
     }
   } else {
     snprintf(buffer, buflen, "%s/lib%s.so", pname, fname);
@@ -2671,17 +2662,17 @@
 
   // pending_signals has one int per signal
   // The additional signal is for SIGEXIT - exit signal to signal_thread
-  pending_signals = (jint *)os::malloc(sizeof(jint) * (Sigexit+1));
+  pending_signals = (jint *)os::malloc(sizeof(jint) * (Sigexit+1), mtInternal);
   memset(pending_signals, 0, (sizeof(jint) * (Sigexit+1)));
 
   if (UseSignalChaining) {
      chainedsigactions = (struct sigaction *)malloc(sizeof(struct sigaction)
-       * (Maxsignum + 1));
+       * (Maxsignum + 1), mtInternal);
      memset(chainedsigactions, 0, (sizeof(struct sigaction) * (Maxsignum + 1)));
-     preinstalled_sigs = (int *)os::malloc(sizeof(int) * (Maxsignum + 1));
+     preinstalled_sigs = (int *)os::malloc(sizeof(int) * (Maxsignum + 1), mtInternal);
      memset(preinstalled_sigs, 0, (sizeof(int) * (Maxsignum + 1)));
   }
-  ourSigFlags = (int*)malloc(sizeof(int) * (Maxsignum + 1 ));
+  ourSigFlags = (int*)malloc(sizeof(int) * (Maxsignum + 1 ), mtInternal);
   memset(ourSigFlags, 0, sizeof(int) * (Maxsignum + 1));
 }
 
@@ -2769,7 +2760,7 @@
   return page_size;
 }
 
-bool os::commit_memory(char* addr, size_t bytes, bool exec) {
+bool os::pd_commit_memory(char* addr, size_t bytes, bool exec) {
   int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
   size_t size = bytes;
   char *res = Solaris::mmap_chunk(addr, size, MAP_PRIVATE|MAP_FIXED, prot);
@@ -2782,7 +2773,7 @@
   return false;
 }
 
-bool os::commit_memory(char* addr, size_t bytes, size_t alignment_hint,
+bool os::pd_commit_memory(char* addr, size_t bytes, size_t alignment_hint,
                        bool exec) {
   if (commit_memory(addr, bytes, exec)) {
     if (UseMPSS && alignment_hint > (size_t)vm_page_size()) {
@@ -2812,14 +2803,14 @@
 }
 
 // Uncommit the pages in a specified region.
-void os::free_memory(char* addr, size_t bytes, size_t alignment_hint) {
+void os::pd_free_memory(char* addr, size_t bytes, size_t alignment_hint) {
   if (madvise(addr, bytes, MADV_FREE) < 0) {
     debug_only(warning("MADV_FREE failed."));
     return;
   }
 }
 
-bool os::create_stack_guard_pages(char* addr, size_t size) {
+bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
   return os::commit_memory(addr, size);
 }
 
@@ -2828,7 +2819,7 @@
 }
 
 // Change the page size in a given range.
-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
   assert((intptr_t)addr % alignment_hint == 0, "Address should be aligned.");
   assert((intptr_t)(addr + bytes) % alignment_hint == 0, "End should be aligned.");
   if (UseLargePages && UseMPSS) {
@@ -3015,7 +3006,7 @@
   return end;
 }
 
-bool os::uncommit_memory(char* addr, size_t bytes) {
+bool os::pd_uncommit_memory(char* addr, size_t bytes) {
   size_t size = bytes;
   // Map uncommitted pages PROT_NONE so we fail early if we touch an
   // uncommitted page. Otherwise, the read/write might succeed if we
@@ -3054,7 +3045,7 @@
   return mmap_chunk(addr, bytes, flags, PROT_NONE);
 }
 
-char* os::reserve_memory(size_t bytes, char* requested_addr, size_t alignment_hint) {
+char* os::pd_reserve_memory(size_t bytes, char* requested_addr, size_t alignment_hint) {
   char* addr = Solaris::anon_mmap(requested_addr, bytes, alignment_hint, (requested_addr != NULL));
 
   guarantee(requested_addr == NULL || requested_addr == addr,
@@ -3065,7 +3056,7 @@
 // Reserve memory at an arbitrary address, only if that area is
 // available (and not reserved for something else).
 
-char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
   const int max_tries = 10;
   char* base[max_tries];
   size_t size[max_tries];
@@ -3187,7 +3178,7 @@
   return (i < max_tries) ? requested_addr : NULL;
 }
 
-bool os::release_memory(char* addr, size_t bytes) {
+bool os::pd_release_memory(char* addr, size_t bytes) {
   size_t size = bytes;
   return munmap(addr, size) == 0;
 }
@@ -4801,7 +4792,7 @@
   lwpSize = 16*1024;
   for (;;) {
     ::lseek64 (lwpFile, 0, SEEK_SET);
-    lwpArray = (prheader_t *)NEW_C_HEAP_ARRAY(char, lwpSize);
+    lwpArray = (prheader_t *)NEW_C_HEAP_ARRAY(char, lwpSize, mtInternal);
     if (::read(lwpFile, lwpArray, lwpSize) < 0) {
       if (ThreadPriorityVerbose) warning("Error reading /proc/self/lstatus\n");
       break;
@@ -4819,10 +4810,10 @@
       break;
     }
     lwpSize = lwpArray->pr_nent * lwpArray->pr_entsize;
-    FREE_C_HEAP_ARRAY(char, lwpArray);  // retry.
-  }
-
-  FREE_C_HEAP_ARRAY(char, lwpArray);
+    FREE_C_HEAP_ARRAY(char, lwpArray, mtInternal);  // retry.
+  }
+
+  FREE_C_HEAP_ARRAY(char, lwpArray, mtInternal);
   ::close (lwpFile);
   if (ThreadPriorityVerbose) {
     if (isT2) tty->print_cr("We are running with a T2 libthread\n");
@@ -5146,9 +5137,9 @@
       UseNUMA = false;
     } else {
       size_t lgrp_limit = os::numa_get_groups_num();
-      int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit);
+      int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit, mtInternal);
       size_t lgrp_num = os::numa_get_leaf_groups(lgrp_ids, lgrp_limit);
-      FREE_C_HEAP_ARRAY(int, lgrp_ids);
+      FREE_C_HEAP_ARRAY(int, lgrp_ids, mtInternal);
       if (lgrp_num < 2) {
         // There's only one locality group, disable NUMA.
         UseNUMA = false;
@@ -5494,7 +5485,7 @@
 }
 
 // Map a block of memory.
-char* os::map_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset,
                      char *addr, size_t bytes, bool read_only,
                      bool allow_exec) {
   int prot;
@@ -5526,7 +5517,7 @@
 
 
 // Remap a block of memory.
-char* os::remap_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset,
                        char *addr, size_t bytes, bool read_only,
                        bool allow_exec) {
   // same as map_memory() on this OS
@@ -5536,7 +5527,7 @@
 
 
 // Unmap a block of memory.
-bool os::unmap_memory(char* addr, size_t bytes) {
+bool os::pd_unmap_memory(char* addr, size_t bytes) {
   return munmap(addr, bytes) == 0;
 }
 
@@ -6546,3 +6537,16 @@
    INTERRUPTIBLE_RETURN_INT_NORESTART(::bind(fd, him, len),\
                                       os::Solaris::clear_interrupted);
 }
+
+// Get the default path to the core file
+// Returns the length of the string
+int os::get_core_path(char* buffer, size_t bufferSize) {
+  const char* p = get_current_directory(buffer, bufferSize);
+
+  if (p == NULL) {
+    assert(p != NULL, "failed to get current directory");
+    return 0;
+  }
+
+  return strlen(buffer);
+}
--- a/src/os/solaris/vm/os_solaris.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/vm/os_solaris.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -346,7 +346,7 @@
 
 };
 
-class PlatformEvent : public CHeapObj {
+class PlatformEvent : public CHeapObj<mtInternal> {
   private:
     double CachePad [4] ;   // increase odds that _mutex is sole occupant of cache line
     volatile int _Event ;
@@ -383,7 +383,7 @@
     void unpark () ;
 } ;
 
-class PlatformParker : public CHeapObj {
+class PlatformParker : public CHeapObj<mtInternal> {
   protected:
     mutex_t _mutex [1] ;
     cond_t  _cond  [1] ;
--- a/src/os/solaris/vm/os_solaris.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/vm/os_solaris.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -71,7 +71,7 @@
 
 
 // On Solaris, reservations are made on a page by page basis, nothing to do.
-inline void os::split_reserved_memory(char *base, size_t size,
+inline void os::pd_split_reserved_memory(char *base, size_t size,
                                       size_t split, bool realloc) {
 }
 
--- a/src/os/solaris/vm/perfMemory_solaris.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -128,7 +128,7 @@
       }
     }
   }
-  FREE_C_HEAP_ARRAY(char, destfile);
+  FREE_C_HEAP_ARRAY(char, destfile, mtInternal);
 }
 
 
@@ -155,7 +155,7 @@
   const char* tmpdir = os::get_temp_directory();
   const char* perfdir = PERFDATA_NAME;
   size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
-  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   // construct the path name to user specific tmp directory
   snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user);
@@ -248,7 +248,7 @@
   if (bufsize == -1)
     bufsize = 1024;
 
-  char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize);
+  char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
 
 #ifdef _GNU_SOURCE
   struct passwd* p = NULL;
@@ -269,14 +269,14 @@
                                      "pw_name zero length");
       }
     }
-    FREE_C_HEAP_ARRAY(char, pwbuf);
+    FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
     return NULL;
   }
 
-  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1);
+  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1, mtInternal);
   strcpy(user_name, p->pw_name);
 
-  FREE_C_HEAP_ARRAY(char, pwbuf);
+  FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
   return user_name;
 }
 
@@ -319,7 +319,7 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname));
+  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
   while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
 
@@ -329,7 +329,7 @@
     }
 
     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
-                              strlen(tmpdirname) + strlen(dentry->d_name) + 2);
+                  strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal);
     strcpy(usrdir_name, tmpdirname);
     strcat(usrdir_name, "/");
     strcat(usrdir_name, dentry->d_name);
@@ -337,7 +337,7 @@
     DIR* subdirp = os::opendir(usrdir_name);
 
     if (subdirp == NULL) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       continue;
     }
 
@@ -348,13 +348,13 @@
     // symlink can be exploited.
     //
     if (!is_directory_secure(usrdir_name)) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       os::closedir(subdirp);
       continue;
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name));
+    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
     while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
 
@@ -363,7 +363,7 @@
         int result;
 
         char* filename = NEW_C_HEAP_ARRAY(char,
-                            strlen(usrdir_name) + strlen(udentry->d_name) + 2);
+                 strlen(usrdir_name) + strlen(udentry->d_name) + 2, mtInternal);
 
         strcpy(filename, usrdir_name);
         strcat(filename, "/");
@@ -372,13 +372,13 @@
         // don't follow symbolic links for the file
         RESTARTABLE(::lstat(filename, &statbuf), result);
         if (result == OS_ERR) {
-           FREE_C_HEAP_ARRAY(char, filename);
+           FREE_C_HEAP_ARRAY(char, filename, mtInternal);
            continue;
         }
 
         // skip over files that are not regular files.
         if (!S_ISREG(statbuf.st_mode)) {
-          FREE_C_HEAP_ARRAY(char, filename);
+          FREE_C_HEAP_ARRAY(char, filename, mtInternal);
           continue;
         }
 
@@ -388,23 +388,23 @@
           if (statbuf.st_ctime > oldest_ctime) {
             char* user = strchr(dentry->d_name, '_') + 1;
 
-            if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user);
-            oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1);
+            if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user, mtInternal);
+            oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal);
 
             strcpy(oldest_user, user);
             oldest_ctime = statbuf.st_ctime;
           }
         }
 
-        FREE_C_HEAP_ARRAY(char, filename);
+        FREE_C_HEAP_ARRAY(char, filename, mtInternal);
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf);
-    FREE_C_HEAP_ARRAY(char, usrdir_name);
+    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
+    FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf);
+  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(oldest_user);
 }
@@ -471,7 +471,7 @@
   // add 2 for the file separator and a NULL terminator.
   size_t nbytes = strlen(dirname) + UINT_CHARS + 2;
 
-  char* name = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
   snprintf(name, nbytes, "%s/%d", dirname, vmid);
 
   return name;
@@ -509,7 +509,7 @@
 static void remove_file(const char* dirname, const char* filename) {
 
   size_t nbytes = strlen(dirname) + strlen(filename) + 2;
-  char* path = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* path = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   strcpy(path, dirname);
   strcat(path, "/");
@@ -517,7 +517,7 @@
 
   remove_file(path);
 
-  FREE_C_HEAP_ARRAY(char, path);
+  FREE_C_HEAP_ARRAY(char, path, mtInternal);
 }
 
 
@@ -554,7 +554,7 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname));
+  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
   errno = 0;
   while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
 
@@ -593,7 +593,7 @@
     errno = 0;
   }
   os::closedir(dirp);
-  FREE_C_HEAP_ARRAY(char, dbuf);
+  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // make the user specific temporary directory. Returns true if
@@ -738,11 +738,11 @@
 
   fd = create_sharedmem_resources(dirname, filename, size);
 
-  FREE_C_HEAP_ARRAY(char, user_name);
-  FREE_C_HEAP_ARRAY(char, dirname);
+  FREE_C_HEAP_ARRAY(char, user_name, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
 
   if (fd == -1) {
-    FREE_C_HEAP_ARRAY(char, filename);
+    FREE_C_HEAP_ARRAY(char, filename, mtInternal);
     return NULL;
   }
 
@@ -758,7 +758,7 @@
       warning("mmap failed -  %s\n", strerror(errno));
     }
     remove_file(filename);
-    FREE_C_HEAP_ARRAY(char, filename);
+    FREE_C_HEAP_ARRAY(char, filename, mtInternal);
     return NULL;
   }
 
@@ -884,7 +884,7 @@
   // store file, we don't follow them when attaching either.
   //
   if (!is_directory_secure(dirname)) {
-    FREE_C_HEAP_ARRAY(char, dirname);
+    FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "Process not found");
   }
@@ -899,9 +899,9 @@
   strcpy(rfilename, filename);
 
   // free the c heap resources that are no longer needed
-  if (luser != user) FREE_C_HEAP_ARRAY(char, luser);
-  FREE_C_HEAP_ARRAY(char, dirname);
-  FREE_C_HEAP_ARRAY(char, filename);
+  if (luser != user) FREE_C_HEAP_ARRAY(char, luser, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
+  FREE_C_HEAP_ARRAY(char, filename, mtInternal);
 
   // open the shared memory file for the give vmid
   fd = open_sharedmem_file(rfilename, file_flags, CHECK);
--- a/src/os/windows/vm/decoder_windows.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/windows/vm/decoder_windows.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -72,10 +72,10 @@
 
      // find out if jvm.dll contains private symbols, by decoding
      // current function and comparing the result
-     address addr = (address)Decoder::decode;
+     address addr = (address)Decoder::demangle;
      char buf[MAX_PATH];
      if (decode(addr, buf, sizeof(buf), NULL)) {
-       _can_decode_in_vm = !strcmp(buf, "Decoder::decode");
+       _can_decode_in_vm = !strcmp(buf, "Decoder::demangle");
      }
   }
 }
--- a/src/os/windows/vm/decoder_windows.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/windows/vm/decoder_windows.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -45,6 +45,10 @@
   bool can_decode_C_frame_in_vm() const;
   bool demangle(const char* symbol, char *buf, int buflen);
   bool decode(address addr, char *buf, int buflen, int* offset, const char* modulepath = NULL);
+  bool decode(address addr, char *buf, int buflen, int* offset, const void* base) {
+    ShouldNotReachHere();
+    return false;
+  }
 
 private:
   void initialize();
--- a/src/os/windows/vm/jvm_windows.h	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/windows/vm/jvm_windows.h	Sat Oct 20 00:08:35 2012 -0700
@@ -59,7 +59,7 @@
 
 #include <Tlhelp32.h>
 
-typedef unsigned int socklen_t;
+typedef int socklen_t;
 
 // #include "jni.h"
 
--- a/src/os/windows/vm/osThread_windows.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/windows/vm/osThread_windows.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,12 +25,13 @@
 #ifndef OS_WINDOWS_VM_OSTHREAD_WINDOWS_HPP
 #define OS_WINDOWS_VM_OSTHREAD_WINDOWS_HPP
 
-typedef void* HANDLE;
+  typedef void* HANDLE;
+ public:
+  typedef unsigned long thread_id_t;
 
  private:
   // Win32-specific thread information
   HANDLE _thread_handle;        // Win32 thread handle
-  unsigned long _thread_id;     // Win32 thread id
   HANDLE _interrupt_event;      // Event signalled on thread interrupt
   ThreadState _last_state;
 
@@ -42,7 +43,6 @@
   HANDLE interrupt_event() const                   { return _interrupt_event; }
   void set_interrupt_event(HANDLE interrupt_event) { _interrupt_event = interrupt_event; }
 
-  unsigned long thread_id() const                  { return _thread_id; }
 #ifndef PRODUCT
   // Used for debugging, return a unique integer for each thread.
   int thread_identifier() const                    { return _thread_id; }
@@ -54,8 +54,6 @@
     return false;
   }
 #endif // ASSERT
-  void set_thread_id(unsigned long thread_id)      { _thread_id = thread_id; }
-
   bool is_try_mutex_enter()                        { return false; }
 
   // This is a temporary fix for the thread states during
--- a/src/os/windows/vm/os_windows.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -96,7 +96,6 @@
 #include <io.h>
 #include <process.h>              // For _beginthreadex(), _endthreadex()
 #include <imagehlp.h>             // For os::dll_address_to_function_name
-
 /* for enumerating dll libraries */
 #include <vdmdbg.h>
 
@@ -214,13 +213,13 @@
           }
       }
 
-      home_path = NEW_C_HEAP_ARRAY(char, strlen(home_dir) + 1);
+      home_path = NEW_C_HEAP_ARRAY(char, strlen(home_dir) + 1, mtInternal);
       if (home_path == NULL)
           return;
       strcpy(home_path, home_dir);
       Arguments::set_java_home(home_path);
 
-      dll_path = NEW_C_HEAP_ARRAY(char, strlen(home_dir) + strlen(bin) + 1);
+      dll_path = NEW_C_HEAP_ARRAY(char, strlen(home_dir) + strlen(bin) + 1, mtInternal);
       if (dll_path == NULL)
           return;
       strcpy(dll_path, home_dir);
@@ -251,7 +250,7 @@
     char *path_str = ::getenv("PATH");
 
     library_path = NEW_C_HEAP_ARRAY(char, MAX_PATH * 5 + sizeof(PACKAGE_DIR) +
-        sizeof(BIN_DIR) + (path_str ? strlen(path_str) : 0) + 10);
+        sizeof(BIN_DIR) + (path_str ? strlen(path_str) : 0) + 10, mtInternal);
 
     library_path[0] = '\0';
 
@@ -280,7 +279,7 @@
     strcat(library_path, ";.");
 
     Arguments::set_library_path(library_path);
-    FREE_C_HEAP_ARRAY(char, library_path);
+    FREE_C_HEAP_ARRAY(char, library_path, mtInternal);
   }
 
   /* Default extensions directory */
@@ -300,7 +299,7 @@
   {
     #define ENDORSED_DIR "\\lib\\endorsed"
     size_t len = strlen(Arguments::get_java_home()) + sizeof(ENDORSED_DIR);
-    char * buf = NEW_C_HEAP_ARRAY(char, len);
+    char * buf = NEW_C_HEAP_ARRAY(char, len, mtInternal);
     sprintf(buf, "%s%s", Arguments::get_java_home(), ENDORSED_DIR);
     Arguments::set_endorsed_dirs(buf);
     #undef ENDORSED_DIR
@@ -324,15 +323,22 @@
   os::breakpoint();
 }
 
-// Returns an estimate of the current stack pointer. Result must be guaranteed
-// to point into the calling threads stack, and be no lower than the current
-// stack pointer.
-
-address os::current_stack_pointer() {
-  int dummy;
-  address sp = (address)&dummy;
-  return sp;
-}
+/*
+ * RtlCaptureStackBackTrace Windows API may not exist prior to Windows XP.
+ * So far, this method is only used by Native Memory Tracking, which is
+ * only supported on Windows XP or later.
+ */
+address os::get_caller_pc(int n) {
+#ifdef _NMT_NOINLINE_
+  n ++;
+#endif
+  address pc;
+  if (os::Kernel32Dll::RtlCaptureStackBackTrace(n + 1, 1, (PVOID*)&pc, NULL) == 1) {
+    return pc;
+  }
+  return NULL;
+}
+
 
 // os::current_stack_base()
 //
@@ -1024,7 +1030,7 @@
 os::opendir(const char *dirname)
 {
     assert(dirname != NULL, "just checking");   // hotspot change
-    DIR *dirp = (DIR *)malloc(sizeof(DIR));
+    DIR *dirp = (DIR *)malloc(sizeof(DIR), mtInternal);
     DWORD fattr;                                // hotspot change
     char alt_dirname[4] = { 0, 0, 0, 0 };
 
@@ -1046,9 +1052,9 @@
         dirname = alt_dirname;
     }
 
-    dirp->path = (char *)malloc(strlen(dirname) + 5);
+    dirp->path = (char *)malloc(strlen(dirname) + 5, mtInternal);
     if (dirp->path == 0) {
-        free(dirp);
+        free(dirp, mtInternal);
         errno = ENOMEM;
         return 0;
     }
@@ -1056,13 +1062,13 @@
 
     fattr = GetFileAttributes(dirp->path);
     if (fattr == 0xffffffff) {
-        free(dirp->path);
-        free(dirp);
+        free(dirp->path, mtInternal);
+        free(dirp, mtInternal);
         errno = ENOENT;
         return 0;
     } else if ((fattr & FILE_ATTRIBUTE_DIRECTORY) == 0) {
-        free(dirp->path);
-        free(dirp);
+        free(dirp->path, mtInternal);
+        free(dirp, mtInternal);
         errno = ENOTDIR;
         return 0;
     }
@@ -1080,8 +1086,8 @@
     dirp->handle = FindFirstFile(dirp->path, &dirp->find_data);
     if (dirp->handle == INVALID_HANDLE_VALUE) {
         if (GetLastError() != ERROR_FILE_NOT_FOUND) {
-            free(dirp->path);
-            free(dirp);
+            free(dirp->path, mtInternal);
+            free(dirp, mtInternal);
             errno = EACCES;
             return 0;
         }
@@ -1124,8 +1130,8 @@
         }
         dirp->handle = INVALID_HANDLE_VALUE;
     }
-    free(dirp->path);
-    free(dirp);
+    free(dirp->path, mtInternal);
+    free(dirp, mtInternal);
     return 0;
 }
 
@@ -1186,11 +1192,11 @@
     // release the storage
     for (int i = 0 ; i < n ; i++) {
       if (pelements[i] != NULL) {
-        FREE_C_HEAP_ARRAY(char, pelements[i]);
+        FREE_C_HEAP_ARRAY(char, pelements[i], mtInternal);
       }
     }
     if (pelements != NULL) {
-      FREE_C_HEAP_ARRAY(char*, pelements);
+      FREE_C_HEAP_ARRAY(char*, pelements, mtInternal);
     }
   } else {
     jio_snprintf(buffer, buflen, "%s\\%s.dll", pname, fname);
@@ -2647,7 +2653,7 @@
 
   void free_node_list() {
     if (_numa_used_node_list != NULL) {
-      FREE_C_HEAP_ARRAY(int, _numa_used_node_list);
+      FREE_C_HEAP_ARRAY(int, _numa_used_node_list, mtInternal);
     }
   }
 
@@ -2669,7 +2675,7 @@
     ULONG highest_node_number;
     if (!os::Kernel32Dll::GetNumaHighestNodeNumber(&highest_node_number)) return false;
     free_node_list();
-    _numa_used_node_list = NEW_C_HEAP_ARRAY(int, highest_node_number + 1);
+    _numa_used_node_list = NEW_C_HEAP_ARRAY(int, highest_node_number + 1, mtInternal);
     for (unsigned int i = 0; i <= highest_node_number; i++) {
       ULONGLONG proc_mask_numa_node;
       if (!os::Kernel32Dll::GetNumaNodeProcessorMask(i, &proc_mask_numa_node)) return false;
@@ -2928,7 +2934,7 @@
 // On win32, one cannot release just a part of reserved memory, it's an
 // all or nothing deal.  When we split a reservation, we must break the
 // reservation into two reservations.
-void os::split_reserved_memory(char *base, size_t size, size_t split,
+void os::pd_split_reserved_memory(char *base, size_t size, size_t split,
                               bool realloc) {
   if (size > 0) {
     release_memory(base, size);
@@ -2941,7 +2947,7 @@
   }
 }
 
-char* os::reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
+char* os::pd_reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
   assert((size_t)addr % os::vm_allocation_granularity() == 0,
          "reserve alignment");
   assert(bytes % os::vm_allocation_granularity() == 0, "reserve block size");
@@ -2974,7 +2980,7 @@
 
 // Reserve memory at an arbitrary address, only if that area is
 // available (and not reserved for something else).
-char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
   // Windows os::reserve_memory() fails of the requested address range is
   // not avilable.
   return reserve_memory(bytes, requested_addr);
@@ -3037,7 +3043,7 @@
 void os::print_statistics() {
 }
 
-bool os::commit_memory(char* addr, size_t bytes, bool exec) {
+bool os::pd_commit_memory(char* addr, size_t bytes, bool exec) {
   if (bytes == 0) {
     // Don't bother the OS with noops.
     return true;
@@ -3085,26 +3091,26 @@
   return true;
 }
 
-bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
   return commit_memory(addr, size, exec);
 }
 
-bool os::uncommit_memory(char* addr, size_t bytes) {
+bool os::pd_uncommit_memory(char* addr, size_t bytes) {
   if (bytes == 0) {
     // Don't bother the OS with noops.
     return true;
   }
   assert((size_t) addr % os::vm_page_size() == 0, "uncommit on page boundaries");
   assert(bytes % os::vm_page_size() == 0, "uncommit in page-sized chunks");
-  return VirtualFree(addr, bytes, MEM_DECOMMIT) != 0;
-}
-
-bool os::release_memory(char* addr, size_t bytes) {
+  return (VirtualFree(addr, bytes, MEM_DECOMMIT) != 0);
+}
+
+bool os::pd_release_memory(char* addr, size_t bytes) {
   return VirtualFree(addr, 0, MEM_RELEASE) != 0;
 }
 
-bool os::create_stack_guard_pages(char* addr, size_t size) {
+bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
   return os::commit_memory(addr, size);
 }
 
@@ -3151,8 +3157,8 @@
   return VirtualProtect(addr, bytes, PAGE_READWRITE, &old_status) != 0;
 }
 
-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes, size_t alignment_hint)    { }
+void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
+void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) { }
 void os::numa_make_global(char *addr, size_t bytes)    { }
 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
@@ -4286,14 +4292,14 @@
     numEvents = MAX_INPUT_EVENTS;
   }
 
-  lpBuffer = (INPUT_RECORD *)os::malloc(numEvents * sizeof(INPUT_RECORD));
+  lpBuffer = (INPUT_RECORD *)os::malloc(numEvents * sizeof(INPUT_RECORD), mtInternal);
   if (lpBuffer == NULL) {
     return FALSE;
   }
 
   error = ::PeekConsoleInput(han, lpBuffer, numEvents, &numEventsRead);
   if (error == 0) {
-    os::free(lpBuffer);
+    os::free(lpBuffer, mtInternal);
     return FALSE;
   }
 
@@ -4314,7 +4320,7 @@
   }
 
   if(lpBuffer != NULL) {
-    os::free(lpBuffer);
+    os::free(lpBuffer, mtInternal);
   }
 
   *pbytes = (long) actualLength;
@@ -4322,7 +4328,7 @@
 }
 
 // Map a block of memory.
-char* os::map_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset,
                      char *addr, size_t bytes, bool read_only,
                      bool allow_exec) {
   HANDLE hFile;
@@ -4442,7 +4448,7 @@
 
 
 // Remap a block of memory.
-char* os::remap_memory(int fd, const char* file_name, size_t file_offset,
+char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset,
                        char *addr, size_t bytes, bool read_only,
                        bool allow_exec) {
   // This OS does not allow existing memory maps to be remapped so we
@@ -4455,15 +4461,15 @@
   // call above and the map_memory() call below where a thread in native
   // code may be able to access an address that is no longer mapped.
 
-  return os::map_memory(fd, file_name, file_offset, addr, bytes, read_only,
-                        allow_exec);
+  return os::map_memory(fd, file_name, file_offset, addr, bytes,
+           read_only, allow_exec);
 }
 
 
 // Unmap a block of memory.
 // Returns true=success, otherwise false.
 
-bool os::unmap_memory(char* addr, size_t bytes) {
+bool os::pd_unmap_memory(char* addr, size_t bytes) {
   BOOL result = UnmapViewOfFile(addr);
   if (result == 0) {
     if (PrintMiscellaneous && Verbose) {
@@ -4847,99 +4853,92 @@
   return (struct hostent*)os::WinSock2Dll::gethostbyname(name);
 }
 
-
 int os::socket_close(int fd) {
-  ShouldNotReachHere();
-  return 0;
+  return ::closesocket(fd);
 }
 
 int os::socket_available(int fd, jint *pbytes) {
-  ShouldNotReachHere();
-  return 0;
+  int ret = ::ioctlsocket(fd, FIONREAD, (u_long*)pbytes);
+  return (ret < 0) ? 0 : 1;
 }
 
 int os::socket(int domain, int type, int protocol) {
-  ShouldNotReachHere();
-  return 0;
+  return ::socket(domain, type, protocol);
 }
 
 int os::listen(int fd, int count) {
-  ShouldNotReachHere();
-  return 0;
+  return ::listen(fd, count);
 }
 
 int os::connect(int fd, struct sockaddr* him, socklen_t len) {
-  ShouldNotReachHere();
-  return 0;
+  return ::connect(fd, him, len);
 }
 
 int os::accept(int fd, struct sockaddr* him, socklen_t* len) {
-  ShouldNotReachHere();
-  return 0;
+  return ::accept(fd, him, len);
 }
 
 int os::sendto(int fd, char* buf, size_t len, uint flags,
                struct sockaddr* to, socklen_t tolen) {
-  ShouldNotReachHere();
-  return 0;
+
+  return ::sendto(fd, buf, (int)len, flags, to, tolen);
 }
 
 int os::recvfrom(int fd, char *buf, size_t nBytes, uint flags,
                  sockaddr* from, socklen_t* fromlen) {
-  ShouldNotReachHere();
-  return 0;
+
+  return ::recvfrom(fd, buf, (int)nBytes, flags, from, fromlen);
 }
 
 int os::recv(int fd, char* buf, size_t nBytes, uint flags) {
-  ShouldNotReachHere();
-  return 0;
+  return ::recv(fd, buf, (int)nBytes, flags);
 }
 
 int os::send(int fd, char* buf, size_t nBytes, uint flags) {
-  ShouldNotReachHere();
-  return 0;
+  return ::send(fd, buf, (int)nBytes, flags);
 }
 
 int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
-  ShouldNotReachHere();
-  return 0;
+  return ::send(fd, buf, (int)nBytes, flags);
 }
 
 int os::timeout(int fd, long timeout) {
-  ShouldNotReachHere();
-  return 0;
+  fd_set tbl;
+  struct timeval t;
+
+  t.tv_sec  = timeout / 1000;
+  t.tv_usec = (timeout % 1000) * 1000;
+
+  tbl.fd_count    = 1;
+  tbl.fd_array[0] = fd;
+
+  return ::select(1, &tbl, 0, 0, &t);
 }
 
 int os::get_host_name(char* name, int namelen) {
-  ShouldNotReachHere();
-  return 0;
+  return ::gethostname(name, namelen);
 }
 
 int os::socket_shutdown(int fd, int howto) {
-  ShouldNotReachHere();
-  return 0;
+  return ::shutdown(fd, howto);
 }
 
 int os::bind(int fd, struct sockaddr* him, socklen_t len) {
-  ShouldNotReachHere();
-  return 0;
+  return ::bind(fd, him, len);
 }
 
 int os::get_sock_name(int fd, struct sockaddr* him, socklen_t* len) {
-  ShouldNotReachHere();
-  return 0;
+  return ::getsockname(fd, him, len);
 }
 
 int os::get_sock_opt(int fd, int level, int optname,
                      char* optval, socklen_t* optlen) {
-  ShouldNotReachHere();
-  return 0;
+  return ::getsockopt(fd, level, optname, optval, optlen);
 }
 
 int os::set_sock_opt(int fd, int level, int optname,
                      const char* optval, socklen_t optlen) {
-  ShouldNotReachHere();
-  return 0;
+  return ::setsockopt(fd, level, optname, optval, optlen);
 }
 
 
@@ -4948,11 +4947,15 @@
 typedef LPVOID (WINAPI *VirtualAllocExNuma_Fn) (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD);
 typedef BOOL (WINAPI *GetNumaHighestNodeNumber_Fn) (PULONG);
 typedef BOOL (WINAPI *GetNumaNodeProcessorMask_Fn) (UCHAR, PULONGLONG);
+typedef USHORT (WINAPI* RtlCaptureStackBackTrace_Fn)(ULONG, ULONG, PVOID*, PULONG);
 
 GetLargePageMinimum_Fn      os::Kernel32Dll::_GetLargePageMinimum = NULL;
 VirtualAllocExNuma_Fn       os::Kernel32Dll::_VirtualAllocExNuma = NULL;
 GetNumaHighestNodeNumber_Fn os::Kernel32Dll::_GetNumaHighestNodeNumber = NULL;
 GetNumaNodeProcessorMask_Fn os::Kernel32Dll::_GetNumaNodeProcessorMask = NULL;
+RtlCaptureStackBackTrace_Fn os::Kernel32Dll::_RtlCaptureStackBackTrace = NULL;
+
+
 BOOL                        os::Kernel32Dll::initialized = FALSE;
 SIZE_T os::Kernel32Dll::GetLargePageMinimum() {
   assert(initialized && _GetLargePageMinimum != NULL,
@@ -4995,6 +4998,19 @@
   return _GetNumaNodeProcessorMask(node, proc_mask);
 }
 
+USHORT os::Kernel32Dll::RtlCaptureStackBackTrace(ULONG FrameToSkip,
+  ULONG FrameToCapture, PVOID* BackTrace, PULONG BackTraceHash) {
+    if (!initialized) {
+      initialize();
+    }
+
+    if (_RtlCaptureStackBackTrace != NULL) {
+      return _RtlCaptureStackBackTrace(FrameToSkip, FrameToCapture,
+        BackTrace, BackTraceHash);
+    } else {
+      return 0;
+    }
+}
 
 void os::Kernel32Dll::initializeCommon() {
   if (!initialized) {
@@ -5004,6 +5020,7 @@
     _VirtualAllocExNuma = (VirtualAllocExNuma_Fn)::GetProcAddress(handle, "VirtualAllocExNuma");
     _GetNumaHighestNodeNumber = (GetNumaHighestNodeNumber_Fn)::GetProcAddress(handle, "GetNumaHighestNodeNumber");
     _GetNumaNodeProcessorMask = (GetNumaNodeProcessorMask_Fn)::GetProcAddress(handle, "GetNumaNodeProcessorMask");
+    _RtlCaptureStackBackTrace = (RtlCaptureStackBackTrace_Fn)::GetProcAddress(handle, "RtlCaptureStackBackTrace");
     initialized = TRUE;
   }
 }
@@ -5118,7 +5135,6 @@
 Module32Next_Fn             os::Kernel32Dll::_Module32Next = NULL;
 GetNativeSystemInfo_Fn      os::Kernel32Dll::_GetNativeSystemInfo = NULL;
 
-
 void os::Kernel32Dll::initialize() {
   if (!initialized) {
     HMODULE handle = ::GetModuleHandle("Kernel32.dll");
@@ -5196,8 +5212,6 @@
   _GetNativeSystemInfo(lpSystemInfo);
 }
 
-
-
 // PSAPI API
 
 
--- a/src/os/windows/vm/os_windows.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/windows/vm/os_windows.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -98,7 +98,7 @@
   static LONG WINAPI serialize_fault_filter(struct _EXCEPTION_POINTERS* e);
 };
 
-class PlatformEvent : public CHeapObj {
+class PlatformEvent : public CHeapObj<mtInternal> {
   private:
     double CachePad [4] ;   // increase odds that _Event is sole occupant of cache line
     volatile int _Event ;
@@ -124,7 +124,7 @@
 
 
 
-class PlatformParker : public CHeapObj {
+class PlatformParker : public CHeapObj<mtInternal> {
   protected:
     HANDLE _ParkEvent ;
 
@@ -182,6 +182,9 @@
   static BOOL GetNumaHighestNodeNumber(PULONG);
   static BOOL GetNumaNodeProcessorMask(UCHAR, PULONGLONG);
 
+  // Stack walking
+  static USHORT RtlCaptureStackBackTrace(ULONG, ULONG, PVOID*, PULONG);
+
 private:
   // GetLargePageMinimum available on Windows Vista/Windows Server 2003
   // and later
@@ -191,6 +194,7 @@
   static LPVOID (WINAPI *_VirtualAllocExNuma) (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD);
   static BOOL (WINAPI *_GetNumaHighestNodeNumber) (PULONG);
   static BOOL (WINAPI *_GetNumaNodeProcessorMask) (UCHAR, PULONGLONG);
+  static USHORT (WINAPI *_RtlCaptureStackBackTrace)(ULONG, ULONG, PVOID*, PULONG);
   static BOOL initialized;
 
   static void initialize();
--- a/src/os/windows/vm/perfMemory_windows.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os/windows/vm/perfMemory_windows.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -120,7 +120,7 @@
     }
   }
 
-  FREE_C_HEAP_ARRAY(char, destfile);
+  FREE_C_HEAP_ARRAY(char, destfile, mtInternal);
 }
 
 // Shared Memory Implementation Details
@@ -157,7 +157,7 @@
   const char* tmpdir = os::get_temp_directory();
   const char* perfdir = PERFDATA_NAME;
   size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
-  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* dirname = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   // construct the path name to user specific tmp directory
   _snprintf(dirname, nbytes, "%s\\%s_%s", tmpdir, perfdir, user);
@@ -281,7 +281,7 @@
     }
   }
 
-  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(user)+1);
+  char* user_name = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal);
   strcpy(user_name, user);
 
   return user_name;
@@ -315,7 +315,7 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname));
+  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
   while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
 
@@ -325,7 +325,7 @@
     }
 
     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
-                              strlen(tmpdirname) + strlen(dentry->d_name) + 2);
+        strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal);
     strcpy(usrdir_name, tmpdirname);
     strcat(usrdir_name, "\\");
     strcat(usrdir_name, dentry->d_name);
@@ -333,7 +333,7 @@
     DIR* subdirp = os::opendir(usrdir_name);
 
     if (subdirp == NULL) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       continue;
     }
 
@@ -344,13 +344,13 @@
     // symlink can be exploited.
     //
     if (!is_directory_secure(usrdir_name)) {
-      FREE_C_HEAP_ARRAY(char, usrdir_name);
+      FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       os::closedir(subdirp);
       continue;
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name));
+    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
     while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
 
@@ -358,20 +358,20 @@
         struct stat statbuf;
 
         char* filename = NEW_C_HEAP_ARRAY(char,
-                            strlen(usrdir_name) + strlen(udentry->d_name) + 2);
+           strlen(usrdir_name) + strlen(udentry->d_name) + 2, mtInternal);
 
         strcpy(filename, usrdir_name);
         strcat(filename, "\\");
         strcat(filename, udentry->d_name);
 
         if (::stat(filename, &statbuf) == OS_ERR) {
-           FREE_C_HEAP_ARRAY(char, filename);
+           FREE_C_HEAP_ARRAY(char, filename, mtInternal);
            continue;
         }
 
         // skip over files that are not regular files.
         if ((statbuf.st_mode & S_IFMT) != S_IFREG) {
-          FREE_C_HEAP_ARRAY(char, filename);
+          FREE_C_HEAP_ARRAY(char, filename, mtInternal);
           continue;
         }
 
@@ -393,22 +393,22 @@
         if (statbuf.st_ctime > latest_ctime) {
           char* user = strchr(dentry->d_name, '_') + 1;
 
-          if (latest_user != NULL) FREE_C_HEAP_ARRAY(char, latest_user);
-          latest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1);
+          if (latest_user != NULL) FREE_C_HEAP_ARRAY(char, latest_user, mtInternal);
+          latest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal);
 
           strcpy(latest_user, user);
           latest_ctime = statbuf.st_ctime;
         }
 
-        FREE_C_HEAP_ARRAY(char, filename);
+        FREE_C_HEAP_ARRAY(char, filename, mtInternal);
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf);
-    FREE_C_HEAP_ARRAY(char, usrdir_name);
+    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
+    FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf);
+  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(latest_user);
 }
@@ -453,7 +453,7 @@
   // about a name containing a '-' characters.
   //
   nbytes += UINT_CHARS;
-  char* name = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
   _snprintf(name, nbytes, "%s_%s_%u", PERFDATA_NAME, user, vmid);
 
   return name;
@@ -469,7 +469,7 @@
   // add 2 for the file separator and a null terminator.
   size_t nbytes = strlen(dirname) + UINT_CHARS + 2;
 
-  char* name = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
   _snprintf(name, nbytes, "%s\\%d", dirname, vmid);
 
   return name;
@@ -485,7 +485,7 @@
 static void remove_file(const char* dirname, const char* filename) {
 
   size_t nbytes = strlen(dirname) + strlen(filename) + 2;
-  char* path = NEW_C_HEAP_ARRAY(char, nbytes);
+  char* path = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   strcpy(path, dirname);
   strcat(path, "\\");
@@ -500,7 +500,7 @@
     }
   }
 
-  FREE_C_HEAP_ARRAY(char, path);
+  FREE_C_HEAP_ARRAY(char, path, mtInternal);
 }
 
 // returns true if the process represented by pid is alive, otherwise
@@ -638,7 +638,7 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname));
+  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
   errno = 0;
   while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
 
@@ -681,7 +681,7 @@
     errno = 0;
   }
   os::closedir(dirp);
-  FREE_C_HEAP_ARRAY(char, dbuf);
+  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // create a file mapping object with the requested name, and size
@@ -747,11 +747,11 @@
     // be an ACL we enlisted. free the resources.
     //
     if (success && exists && pACL != NULL && !isdefault) {
-      FREE_C_HEAP_ARRAY(char, pACL);
+      FREE_C_HEAP_ARRAY(char, pACL, mtInternal);
     }
 
     // free the security descriptor
-    FREE_C_HEAP_ARRAY(char, pSD);
+    FREE_C_HEAP_ARRAY(char, pSD, mtInternal);
   }
 }
 
@@ -766,7 +766,7 @@
     lpSA->lpSecurityDescriptor = NULL;
 
     // free the security attributes structure
-    FREE_C_HEAP_ARRAY(char, lpSA);
+    FREE_C_HEAP_ARRAY(char, lpSA, mtInternal);
   }
 }
 
@@ -805,7 +805,7 @@
     }
   }
 
-  token_buf = (PTOKEN_USER) NEW_C_HEAP_ARRAY(char, rsize);
+  token_buf = (PTOKEN_USER) NEW_C_HEAP_ARRAY(char, rsize, mtInternal);
 
   // get the user token information
   if (!GetTokenInformation(hAccessToken, TokenUser, token_buf, rsize, &rsize)) {
@@ -813,28 +813,28 @@
       warning("GetTokenInformation failure: lasterror = %d,"
               " rsize = %d\n", GetLastError(), rsize);
     }
-    FREE_C_HEAP_ARRAY(char, token_buf);
+    FREE_C_HEAP_ARRAY(char, token_buf, mtInternal);
     CloseHandle(hAccessToken);
     return NULL;
   }
 
   DWORD nbytes = GetLengthSid(token_buf->User.Sid);
-  PSID pSID = NEW_C_HEAP_ARRAY(char, nbytes);
+  PSID pSID = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 
   if (!CopySid(nbytes, pSID, token_buf->User.Sid)) {
     if (PrintMiscellaneous && Verbose) {
       warning("GetTokenInformation failure: lasterror = %d,"
               " rsize = %d\n", GetLastError(), rsize);
     }
-    FREE_C_HEAP_ARRAY(char, token_buf);
-    FREE_C_HEAP_ARRAY(char, pSID);
+    FREE_C_HEAP_ARRAY(char, token_buf, mtInternal);
+    FREE_C_HEAP_ARRAY(char, pSID, mtInternal);
     CloseHandle(hAccessToken);
     return NULL;
   }
 
   // close the access token.
   CloseHandle(hAccessToken);
-  FREE_C_HEAP_ARRAY(char, token_buf);
+  FREE_C_HEAP_ARRAY(char, token_buf, mtInternal);
 
   return pSID;
 }
@@ -912,13 +912,13 @@
   }
 
   // create the new ACL
-  newACL = (PACL) NEW_C_HEAP_ARRAY(char, newACLsize);
+  newACL = (PACL) NEW_C_HEAP_ARRAY(char, newACLsize, mtInternal);
 
   if (!InitializeAcl(newACL, newACLsize, ACL_REVISION)) {
     if (PrintMiscellaneous && Verbose) {
       warning("InitializeAcl failure: lasterror = %d \n", GetLastError());
     }
-    FREE_C_HEAP_ARRAY(char, newACL);
+    FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
     return false;
   }
 
@@ -931,7 +931,7 @@
         if (PrintMiscellaneous && Verbose) {
           warning("InitializeAcl failure: lasterror = %d \n", GetLastError());
         }
-        FREE_C_HEAP_ARRAY(char, newACL);
+        FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
         return false;
       }
       if (((ACCESS_ALLOWED_ACE *)ace)->Header.AceFlags && INHERITED_ACE) {
@@ -958,7 +958,7 @@
           if (PrintMiscellaneous && Verbose) {
             warning("AddAce failure: lasterror = %d \n", GetLastError());
           }
-          FREE_C_HEAP_ARRAY(char, newACL);
+          FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
           return false;
         }
       }
@@ -974,7 +974,7 @@
         warning("AddAccessAllowedAce failure: lasterror = %d \n",
                 GetLastError());
       }
-      FREE_C_HEAP_ARRAY(char, newACL);
+      FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
       return false;
     }
   }
@@ -989,7 +989,7 @@
         if (PrintMiscellaneous && Verbose) {
           warning("InitializeAcl failure: lasterror = %d \n", GetLastError());
         }
-        FREE_C_HEAP_ARRAY(char, newACL);
+        FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
         return false;
       }
       if (!AddAce(newACL, ACL_REVISION, MAXDWORD, ace,
@@ -997,7 +997,7 @@
         if (PrintMiscellaneous && Verbose) {
           warning("AddAce failure: lasterror = %d \n", GetLastError());
         }
-        FREE_C_HEAP_ARRAY(char, newACL);
+        FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
         return false;
       }
       ace_index++;
@@ -1010,7 +1010,7 @@
       warning("SetSecurityDescriptorDacl failure:"
               " lasterror = %d \n", GetLastError());
     }
-    FREE_C_HEAP_ARRAY(char, newACL);
+    FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
     return false;
   }
 
@@ -1030,7 +1030,7 @@
         warning("SetSecurityDescriptorControl failure:"
                 " lasterror = %d \n", GetLastError());
       }
-      FREE_C_HEAP_ARRAY(char, newACL);
+      FREE_C_HEAP_ARRAY(char, newACL, mtInternal);
       return false;
     }
   }
@@ -1054,7 +1054,7 @@
 
   // allocate space for a security descriptor
   PSECURITY_DESCRIPTOR pSD = (PSECURITY_DESCRIPTOR)
-                         NEW_C_HEAP_ARRAY(char, SECURITY_DESCRIPTOR_MIN_LENGTH);
+     NEW_C_HEAP_ARRAY(char, SECURITY_DESCRIPTOR_MIN_LENGTH, mtInternal);
 
   // initialize the security descriptor
   if (!InitializeSecurityDescriptor(pSD, SECURITY_DESCRIPTOR_REVISION)) {
@@ -1076,7 +1076,7 @@
   // return it to the caller.
   //
   LPSECURITY_ATTRIBUTES lpSA = (LPSECURITY_ATTRIBUTES)
-                            NEW_C_HEAP_ARRAY(char, sizeof(SECURITY_ATTRIBUTES));
+    NEW_C_HEAP_ARRAY(char, sizeof(SECURITY_ATTRIBUTES), mtInternal);
   lpSA->nLength = sizeof(SECURITY_ATTRIBUTES);
   lpSA->lpSecurityDescriptor = pSD;
   lpSA->bInheritHandle = FALSE;
@@ -1147,7 +1147,7 @@
   // create a security attributes structure with access control
   // entries as initialized above.
   LPSECURITY_ATTRIBUTES lpSA = make_security_attr(aces, 3);
-  FREE_C_HEAP_ARRAY(char, aces[0].pSid);
+  FREE_C_HEAP_ARRAY(char, aces[0].pSid, mtInternal);
   FreeSid(everybodySid);
   FreeSid(administratorsSid);
   return(lpSA);
@@ -1462,15 +1462,15 @@
   assert(((size != 0) && (size % os::vm_page_size() == 0)),
          "unexpected PerfMemry region size");
 
-  FREE_C_HEAP_ARRAY(char, user);
+  FREE_C_HEAP_ARRAY(char, user, mtInternal);
 
   // create the shared memory resources
   sharedmem_fileMapHandle =
                create_sharedmem_resources(dirname, filename, objectname, size);
 
-  FREE_C_HEAP_ARRAY(char, filename);
-  FREE_C_HEAP_ARRAY(char, objectname);
-  FREE_C_HEAP_ARRAY(char, dirname);
+  FREE_C_HEAP_ARRAY(char, filename, mtInternal);
+  FREE_C_HEAP_ARRAY(char, objectname, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
 
   if (sharedmem_fileMapHandle == NULL) {
     return NULL;
@@ -1621,7 +1621,7 @@
   // store file, we also don't following them when attaching
   //
   if (!is_directory_secure(dirname)) {
-    FREE_C_HEAP_ARRAY(char, dirname);
+    FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "Process not found");
   }
@@ -1640,10 +1640,10 @@
   strcpy(robjectname, objectname);
 
   // free the c heap resources that are no longer needed
-  if (luser != user) FREE_C_HEAP_ARRAY(char, luser);
-  FREE_C_HEAP_ARRAY(char, dirname);
-  FREE_C_HEAP_ARRAY(char, filename);
-  FREE_C_HEAP_ARRAY(char, objectname);
+  if (luser != user) FREE_C_HEAP_ARRAY(char, luser, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
+  FREE_C_HEAP_ARRAY(char, filename, mtInternal);
+  FREE_C_HEAP_ARRAY(char, objectname, mtInternal);
 
   if (*sizep == 0) {
     size = sharedmem_filesize(rfilename, CHECK);
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1132,3 +1132,8 @@
                       : "r" (fpu_cntrl) : "memory");
 #endif // !AMD64
 }
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
--- a/src/os_cpu/bsd_x86/vm/vmStructs_bsd_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/bsd_x86/vm/vmStructs_bsd_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,18 +29,12 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#ifdef __APPLE__
-#define OS_THREAD_ID_TYPE thread_t
-#else
-#define OS_THREAD_ID_TYPE pthread_t
-#endif
-
 #define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
-  nonstatic_field(OSThread,                      _thread_id,                                      OS_THREAD_ID_TYPE)                 \
+  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
   nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)                         \
   /* This must be the last entry, and must be present */                                                                             \
   last_entry()
@@ -52,7 +46,7 @@
   /* Posix Thread IDs   */                                                \
   /**********************/                                                \
                                                                           \
-  declare_unsigned_integer_type(thread_t)                                 \
+  declare_unsigned_integer_type(OSThread::thread_id_t)                    \
   declare_unsigned_integer_type(pthread_t)                                \
                                                                           \
   /* This must be the last entry, and must be present */                  \
--- a/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -562,3 +562,8 @@
   }
 };
 #endif // !_LP64
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
--- a/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -756,3 +756,8 @@
   // guard page, only enable glibc guard page for non-Java threads.
   return (thr_type == java_thread ? 0 : page_size());
 }
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
--- a/src/os_cpu/linux_sparc/vm/vmStructs_linux_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/linux_sparc/vm/vmStructs_linux_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,7 @@
   /******************************/                                                                                                   \
                                                                                                                                      \
   nonstatic_field(JavaThread,                  _base_of_stack_pointer,                        intptr_t*)                             \
-  nonstatic_field(OSThread,                    _thread_id,                                    pid_t)                                 \
+  nonstatic_field(OSThread,                    _thread_id,                                    OSThread::thread_id_t)                 \
   nonstatic_field(OSThread,                    _pthread_id,                                   pthread_t)                             \
   /* This must be the last entry, and must be present */                                                                             \
   last_entry()
@@ -48,7 +48,7 @@
   /* POSIX Thread IDs */                                                  \
   /**********************/                                                \
                                                                           \
-  declare_integer_type(pid_t)                                             \
+  declare_integer_type(OSThread::thread_id_t)                             \
   declare_unsigned_integer_type(pthread_t)                                \
                                                                           \
   /* This must be the last entry, and must be present */                  \
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -862,3 +862,11 @@
                       : "r" (fpu_cntrl) : "memory");
 #endif // !AMD64
 }
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+#ifdef AMD64
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+#endif
+}
+#endif
--- a/src/os_cpu/linux_x86/vm/vmStructs_linux_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/linux_x86/vm/vmStructs_linux_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
-  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
+  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
   nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)                         \
   /* This must be the last entry, and must be present */                                                                             \
   last_entry()
@@ -46,7 +46,7 @@
   /* Posix Thread IDs   */                                                \
   /**********************/                                                \
                                                                           \
-  declare_integer_type(pid_t)                                             \
+  declare_integer_type(OSThread::thread_id_t)                             \
   declare_unsigned_integer_type(pthread_t)                                \
                                                                           \
   /* This must be the last entry, and must be present */                  \
--- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -506,3 +506,8 @@
   }
 };
 #endif // !_LP64
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
--- a/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -251,6 +251,15 @@
   return frame(fr->sender_sp(), frame::unpatchable, fr->sender_pc());
 }
 
+// Returns an estimate of the current stack pointer. Result must be guaranteed to
+// point into the calling threads stack, and be no lower than the current stack
+// pointer.
+address os::current_stack_pointer() {
+  volatile int dummy;
+  address sp = (address)&dummy + 8;     // %%%% need to confirm if this is right
+  return sp;
+}
+
 frame os::current_frame() {
   intptr_t* sp = StubRoutines::Sparc::flush_callers_register_windows_func()();
   frame myframe(sp, frame::unpatchable,
@@ -815,3 +824,8 @@
    __asm__ __volatile__ ("wr %%g0, 0, %%fprs \n\t" : : :);
   }
 #endif //defined(__sparc) && defined(COMPILER2)
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+}
+#endif
--- a/src/os_cpu/solaris_sparc/vm/vmStructs_solaris_sparc.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/solaris_sparc/vm/vmStructs_solaris_sparc.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,7 @@
   /******************************/                                                                                                   \
                                                                                                                                      \
   nonstatic_field(JavaThread,                  _base_of_stack_pointer,                        intptr_t*)                             \
-  nonstatic_field(OSThread,                    _thread_id,                                    thread_t)                              \
+  nonstatic_field(OSThread,                    _thread_id,                                    OSThread::thread_id_t)                 \
   /* This must be the last entry, and must be present */                                                                             \
   last_entry()
 
@@ -47,7 +47,7 @@
   /* Solaris Thread IDs */                                                \
   /**********************/                                                \
                                                                           \
-  declare_unsigned_integer_type(thread_t)                                 \
+  declare_unsigned_integer_type(OSThread::thread_id_t)                    \
                                                                           \
   /* This must be the last entry, and must be present */                  \
   last_entry()
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -201,13 +201,23 @@
               impl[i] = (char)toupper((uint)impl[i]);
             if (strstr(impl, "SPARC64") != NULL) {
               features |= sparc64_family_m;
+            } else if (strstr(impl, "SPARC-M") != NULL) {
+              // M-series SPARC is based on T-series.
+              features |= (M_family_m | T_family_m);
             } else if (strstr(impl, "SPARC-T") != NULL) {
               features |= T_family_m;
               if (strstr(impl, "SPARC-T1") != NULL) {
                 features |= T1_model_m;
               }
             } else {
-              assert(strstr(impl, "SPARC") != NULL, "should be sparc");
+              if (strstr(impl, "SPARC") == NULL) {
+#ifndef PRODUCT
+                // kstat on Solaris 8 virtual machines (branded zones)
+                // returns "(unsupported)" implementation.
+                warning("kstat cpu_info implementation = '%s', should contain SPARC", impl);
+#endif
+                implementation = "SPARC";
+              }
             }
             free((void*)impl);
             break;
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -237,6 +237,12 @@
   return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
 }
 
+extern "C" intptr_t *_get_current_sp();  // in .il file
+
+address os::current_stack_pointer() {
+  return (address)_get_current_sp();
+}
+
 extern "C" intptr_t *_get_current_fp();  // in .il file
 
 frame os::current_frame() {
@@ -954,3 +960,11 @@
   _solaris_raw_setup_fpu(fpu_cntrl);
 }
 #endif // AMD64
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+#ifdef AMD64
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+#endif
+}
+#endif
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Sat Oct 20 00:08:35 2012 -0700
@@ -37,6 +37,12 @@
       movl     %gs:0, %eax 
       .end
 
+  // Get current sp
+      .inline _get_current_sp,0
+      .volatile
+      movl     %esp, %eax 
+      .end
+
   // Get current fp
       .inline _get_current_fp,0
       .volatile
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_64.il	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_64.il	Sat Oct 20 00:08:35 2012 -0700
@@ -30,6 +30,12 @@
       movq     %fs:0, %rax 
       .end
 
+  // Get current sp
+      .inline _get_current_sp,0
+      .volatile
+      movq     %rsp, %rax 
+      .end
+
   // Get current fp
       .inline _get_current_fp,0
       .volatile
--- a/src/os_cpu/solaris_x86/vm/vmStructs_solaris_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/solaris_x86/vm/vmStructs_solaris_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
                                                                                                                                      \
-  nonstatic_field(OSThread,                      _thread_id,                                    thread_t)                              \
+  nonstatic_field(OSThread,                      _thread_id,                                    OSThread::thread_id_t)               \
                                                                                                                                      \
   /* This must be the last entry, and must be present */                                                                             \
   last_entry()
@@ -46,7 +46,7 @@
   /* Solaris Thread IDs */                                                \
   /**********************/                                                \
                                                                           \
-  declare_unsigned_integer_type(thread_t)                                 \
+  declare_unsigned_integer_type(OSThread::thread_id_t)                    \
                                                                           \
   /* This must be the last entry, and must be present */                  \
   last_entry()
--- a/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -370,6 +370,26 @@
   return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
 }
 
+#ifndef AMD64
+// Returns an estimate of the current stack pointer. Result must be guaranteed
+// to point into the calling threads stack, and be no lower than the current
+// stack pointer.
+address os::current_stack_pointer() {
+  int dummy;
+  address sp = (address)&dummy;
+  return sp;
+}
+#else
+// Returns the current stack pointer. Accurate value needed for
+// os::verify_stack_alignment().
+address os::current_stack_pointer() {
+  typedef address get_sp_func();
+  get_sp_func* func = CAST_TO_FN_PTR(get_sp_func*,
+                                     StubRoutines::x86::get_previous_sp_entry());
+  return (*func)();
+}
+#endif
+
 
 #ifndef AMD64
 intptr_t* _get_previous_fp() {
@@ -546,3 +566,11 @@
   __asm fldcw fpu_cntrl_word;
 #endif // !AMD64
 }
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+#ifdef AMD64
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+#endif
+}
+#endif
--- a/src/os_cpu/windows_x86/vm/vmStructs_windows_x86.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/os_cpu/windows_x86/vm/vmStructs_windows_x86.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
                                                                                                                                      \
-  nonstatic_field(OSThread,                    _thread_id,                                    unsigned long)                         \
+  nonstatic_field(OSThread,                    _thread_id,                                    OSThread::thread_id_t)                 \
   unchecked_nonstatic_field(OSThread,          _thread_handle,                                sizeof(HANDLE)) /* NOTE: no type */    \
                                                                                                                                      \
   /* This must be the last entry, and must be present */                                                                             \
@@ -43,6 +43,7 @@
 
 #define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
                                                                           \
+  declare_unsigned_integer_type(OSThread::thread_id_t)                    \
   /* This must be the last entry, and must be present */                  \
   last_entry()
 
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Compilation.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Compilation.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,7 @@
     private boolean osr;
     private Method method;
     private CallSite call = new CallSite();
+    private CallSite lateInlineCall = new CallSite();
     private int osrBci;
     private String icount;
     private String bcount;
@@ -80,6 +81,13 @@
             sb.append(site);
             sb.append("\n");
         }
+        if (getLateInlineCall().getCalls() != null) {
+            sb.append("late inline:\n");
+            for (CallSite site : getLateInlineCall().getCalls()) {
+                sb.append(site);
+                sb.append("\n");
+            }
+        }
         return sb.toString();
     }
 
@@ -115,6 +123,12 @@
                     site.print(stream, indent + 2);
                 }
             }
+            if (printInlining && lateInlineCall.getCalls() != null) {
+                stream.println("late inline:");
+                for (CallSite site : lateInlineCall.getCalls()) {
+                    site.print(stream, indent + 2);
+                }
+            }
         }
     }
 
@@ -215,7 +229,11 @@
     }
 
     public void setMethod(Method method) {
-        this.method = method;
+        // Don't change method if it is already set to avoid changing
+        // it by post parse inlining info.
+        if (getMethod() == null) {
+            this.method = method;
+        }
     }
 
     public CallSite getCall() {
@@ -226,6 +244,10 @@
         this.call = call;
     }
 
+    public CallSite getLateInlineCall() {
+        return lateInlineCall;
+    }
+
     public double getElapsedTime() {
         return end - start;
     }
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -146,6 +146,7 @@
     private CallSite site;
     private Stack<Phase> phaseStack = new Stack<Phase>();
     private UncommonTrapEvent currentTrap;
+    private Stack<CallSite> late_inline_scope;
 
     long parseLong(String l) {
         try {
@@ -302,6 +303,7 @@
             }
             events.add(compile);
             compiles.put(makeId(atts), compile);
+            site = compile.getCall();
         } else if (qname.equals("type")) {
             type(search(atts, "id"), search(atts, "name"));
         } else if (qname.equals("bc")) {
@@ -360,12 +362,22 @@
                 // uncommon trap inserted during parsing.
                 // ignore for now
             }
+        } else if (qname.equals("late_inline")) {
+            late_inline_scope = new Stack<CallSite>();
+            site = new CallSite(-999, method(search(atts, "method")));
+            late_inline_scope.push(site);
         } else if (qname.equals("jvms")) {
             // <jvms bci='4' method='java/io/DataInputStream readChar ()C' bytes='40' count='5815' iicount='20815'/>
             if (currentTrap != null) {
                 currentTrap.addJVMS(atts.getValue("method"), Integer.parseInt(atts.getValue("bci")));
+            } else if (late_inline_scope != null) {
+                bci = Integer.parseInt(search(atts, "bci"));
+                site = new CallSite(bci, method(search(atts, "method")));
+                late_inline_scope.push(site);
             } else {
-                // Ignore <eliminate_allocation type='667'> and <eliminate_lock lock='1'>
+                // Ignore <eliminate_allocation type='667'>,
+                //        <eliminate_lock lock='1'>,
+                //        <replace_string_concat arguments='2' string_alloc='0' multiple='0'>
             }
         } else if (qname.equals("nmethod")) {
             String id = makeId(atts);
@@ -379,7 +391,7 @@
             Method m = method(search(atts, "method"));
             if (scopes.size() == 0) {
                 compile.setMethod(m);
-                scopes.push(compile.getCall());
+                scopes.push(site);
             } else {
                 if (site.getMethod() == m) {
                     scopes.push(site);
@@ -393,7 +405,7 @@
             }
         } else if (qname.equals("parse_done")) {
             CallSite call = scopes.pop();
-            call.setEndNodes(Integer.parseInt(search(atts, "nodes")));
+            call.setEndNodes(Integer.parseInt(search(atts, "nodes", "1")));
             call.setTimeStamp(Double.parseDouble(search(atts, "stamp")));
             scopes.push(call);
         }
@@ -408,6 +420,43 @@
             scopes.pop();
         } else if (qname.equals("uncommon_trap")) {
             currentTrap = null;
+        } else if (qname.equals("late_inline")) {
+            // Populate late inlining info.
+
+            // late_inline scopes are specified in reverse order:
+            // compiled method should be on top of stack.
+            CallSite caller = late_inline_scope.pop();
+            Method m = compile.getMethod();
+            if (m != caller.getMethod()) {
+                System.out.println(m);
+                System.out.println(caller.getMethod() + " bci: " + bci);
+                throw new InternalError("call site and late_inline info don't match");
+            }
+
+            // late_inline contains caller+bci info, convert it
+            // to bci+callee info used by LogCompilation.
+            site = compile.getLateInlineCall();
+            do {
+                bci = caller.getBci();
+                // Next inlined call.
+                caller = late_inline_scope.pop();
+                CallSite callee =  new CallSite(bci, caller.getMethod());
+                site.add(callee);
+                site = callee;
+            } while (!late_inline_scope.empty());
+
+            if (caller.getBci() != -999) {
+                System.out.println(caller.getMethod());
+                throw new InternalError("broken late_inline info");
+            }
+            if (site.getMethod() != caller.getMethod()) {
+                System.out.println(site.getMethod());
+                System.out.println(caller.getMethod());
+                throw new InternalError("call site and late_inline info don't match");
+            }
+            // late_inline is followed by parse with scopes.size() == 0,
+            // 'site' will be pushed to scopes.
+            late_inline_scope = null;
         } else if (qname.equals("task")) {
             types.clear();
             methods.clear();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/tools/whitebox/sun/hotspot/WhiteBox.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.hotspot;
+import java.security.BasicPermission;
+import sun.hotspot.parser.DiagnosticCommand;
+
+public class WhiteBox {
+
+  @SuppressWarnings("serial")
+  public static class WhiteBoxPermission extends BasicPermission {
+    public WhiteBoxPermission(String s) {
+      super(s);
+    }
+  }
+
+  private WhiteBox() {}
+  private static final WhiteBox instance = new WhiteBox();
+  private static native void registerNatives();
+
+  /**
+   * Returns the singleton WhiteBox instance.
+   *
+   * The returned WhiteBox object should be carefully guarded
+   * by the caller, since it can be used to read and write data
+   * at arbitrary memory addresses. It must never be passed to
+   * untrusted code.
+   */
+  public synchronized static WhiteBox getWhiteBox() {
+    SecurityManager sm = System.getSecurityManager();
+    if (sm != null) {
+      sm.checkPermission(new WhiteBoxPermission("getInstance"));
+    }
+    return instance;
+  }
+
+  static {
+    registerNatives();
+  }
+
+  // Memory
+  public native long getObjectAddress(Object o);
+  public native int  getHeapOopSize();
+
+  // G1
+  public native boolean g1InConcurrentMark();
+  public native boolean g1IsHumongous(Object o);
+  public native long    g1NumFreeRegions();
+  public native int     g1RegionSize();
+  public native Object[]    parseCommandLine(String commandline, DiagnosticCommand[] args);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/tools/whitebox/sun/hotspot/parser/DiagnosticCommand.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,43 @@
+package sun.hotspot.parser;
+
+public class DiagnosticCommand {
+
+    public enum DiagnosticArgumentType {
+        JLONG, BOOLEAN, STRING, NANOTIME, STRINGARRAY, MEMORYSIZE
+    }
+
+    private String name;
+    private String desc;
+    private DiagnosticArgumentType type;
+    private boolean mandatory;
+    private String defaultValue;
+
+    public DiagnosticCommand(String name, String desc, DiagnosticArgumentType type,
+            boolean mandatory, String defaultValue) {
+        this.name = name;
+        this.desc = desc;
+        this.type = type;
+        this.mandatory = mandatory;
+        this.defaultValue = defaultValue;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public String getDesc() {
+        return desc;
+    }
+
+    public DiagnosticArgumentType getType() {
+        return type;
+    }
+
+    public boolean isMandatory() {
+        return mandatory;
+    }
+
+    public String getDefaultValue() {
+        return defaultValue;
+    }
+}
--- a/src/share/vm/adlc/adlparse.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/adlparse.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,6 +115,12 @@
       parse_err(SYNERR, "expected one of - instruct, operand, ins_attrib, op_attrib, source, register, pipeline, encode\n     Found %s",ident);
     }
   }
+  // Add reg_class spill_regs after parsing.
+  RegisterForm *regBlock = _AD.get_registers();
+  if (regBlock == NULL) {
+    parse_err(SEMERR, "Did not declare 'register' definitions");
+  }
+  regBlock->addSpillRegClass();
 
   // Done with parsing, check consistency.
 
@@ -768,11 +774,12 @@
 
 //------------------------------reg_parse--------------------------------------
 void ADLParser::reg_parse(void) {
-
-  // Create the RegisterForm for the architecture description.
-  RegisterForm *regBlock = new RegisterForm();    // Build new Source object
-  regBlock->_linenum = linenum();
-  _AD.addForm(regBlock);
+  RegisterForm *regBlock = _AD.get_registers(); // Information about registers encoding
+  if (regBlock == NULL) {
+    // Create the RegisterForm for the architecture description.
+    regBlock = new RegisterForm();    // Build new Source object
+    _AD.addForm(regBlock);
+  }
 
   skipws();                       // Skip leading whitespace
   if (_curchar == '%' && *(_ptr+1) == '{') {
@@ -796,15 +803,11 @@
     parse_err(SYNERR, "Missing %c{ ... %c} block after register keyword.\n",'%','%');
     return;
   }
-
-  // Add reg_class spill_regs
-  regBlock->addSpillRegClass();
 }
 
 //------------------------------encode_parse-----------------------------------
 void ADLParser::encode_parse(void) {
   EncodeForm *encBlock;         // Information about instruction/operand encoding
-  char       *desc = NULL;      // String representation of encode rule
 
   _AD.getForm(&encBlock);
   if ( encBlock == NULL) {
--- a/src/share/vm/adlc/archDesc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/archDesc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -911,12 +911,24 @@
   // Find last character in idealOp, it specifies the type
   char  last_char = 0;
   const char *ptr = idealOp;
-  for( ; *ptr != '\0'; ++ptr) {
+  for (; *ptr != '\0'; ++ptr) {
     last_char = *ptr;
   }
 
+  // Match Vector types.
+  if (strncmp(idealOp, "Vec",3)==0) {
+    switch(last_char) {
+    case 'S':  return "TypeVect::VECTS";
+    case 'D':  return "TypeVect::VECTD";
+    case 'X':  return "TypeVect::VECTX";
+    case 'Y':  return "TypeVect::VECTY";
+    default:
+      internal_err("Vector type %s with unrecognized type\n",idealOp);
+    }
+  }
+
   // !!!!!
-  switch( last_char ) {
+  switch(last_char) {
   case 'I':    return "TypeInt::INT";
   case 'P':    return "TypePtr::BOTTOM";
   case 'N':    return "TypeNarrowOop::BOTTOM";
--- a/src/share/vm/adlc/forms.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/forms.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -261,52 +261,26 @@
   if( strcmp(opType,"LoadL")==0 )  return Form::idealL;
   if( strcmp(opType,"LoadL_unaligned")==0 )  return Form::idealL;
   if( strcmp(opType,"LoadPLocked")==0 )  return Form::idealP;
-  if( strcmp(opType,"LoadLLocked")==0 )  return Form::idealL;
   if( strcmp(opType,"LoadP")==0 )  return Form::idealP;
   if( strcmp(opType,"LoadN")==0 )  return Form::idealN;
   if( strcmp(opType,"LoadRange")==0 )  return Form::idealI;
   if( strcmp(opType,"LoadS")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load16B")==0 )  return Form::idealB;
-  if( strcmp(opType,"Load8B")==0 )  return Form::idealB;
-  if( strcmp(opType,"Load4B")==0 )  return Form::idealB;
-  if( strcmp(opType,"Load8C")==0 )  return Form::idealC;
-  if( strcmp(opType,"Load4C")==0 )  return Form::idealC;
-  if( strcmp(opType,"Load2C")==0 )  return Form::idealC;
-  if( strcmp(opType,"Load8S")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load4S")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load2S")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load2D")==0 )  return Form::idealD;
-  if( strcmp(opType,"Load4F")==0 )  return Form::idealF;
-  if( strcmp(opType,"Load2F")==0 )  return Form::idealF;
-  if( strcmp(opType,"Load4I")==0 )  return Form::idealI;
-  if( strcmp(opType,"Load2I")==0 )  return Form::idealI;
-  if( strcmp(opType,"Load2L")==0 )  return Form::idealL;
+  if( strcmp(opType,"LoadVector")==0 )  return Form::idealV;
   assert( strcmp(opType,"Load") != 0, "Must type Loads" );
   return Form::none;
 }
 
 Form::DataType Form::is_store_to_memory(const char *opType) const {
   if( strcmp(opType,"StoreB")==0)  return Form::idealB;
-  if( strcmp(opType,"StoreCM")==0)  return Form::idealB;
+  if( strcmp(opType,"StoreCM")==0) return Form::idealB;
   if( strcmp(opType,"StoreC")==0)  return Form::idealC;
   if( strcmp(opType,"StoreD")==0)  return Form::idealD;
   if( strcmp(opType,"StoreF")==0)  return Form::idealF;
   if( strcmp(opType,"StoreI")==0)  return Form::idealI;
   if( strcmp(opType,"StoreL")==0)  return Form::idealL;
   if( strcmp(opType,"StoreP")==0)  return Form::idealP;
-  if( strcmp(opType,"StoreN")==0) return Form::idealN;
-  if( strcmp(opType,"Store16B")==0)  return Form::idealB;
-  if( strcmp(opType,"Store8B")==0)  return Form::idealB;
-  if( strcmp(opType,"Store4B")==0)  return Form::idealB;
-  if( strcmp(opType,"Store8C")==0)  return Form::idealC;
-  if( strcmp(opType,"Store4C")==0)  return Form::idealC;
-  if( strcmp(opType,"Store2C")==0)  return Form::idealC;
-  if( strcmp(opType,"Store2D")==0)  return Form::idealD;
-  if( strcmp(opType,"Store4F")==0)  return Form::idealF;
-  if( strcmp(opType,"Store2F")==0)  return Form::idealF;
-  if( strcmp(opType,"Store4I")==0)  return Form::idealI;
-  if( strcmp(opType,"Store2I")==0)  return Form::idealI;
-  if( strcmp(opType,"Store2L")==0)  return Form::idealL;
+  if( strcmp(opType,"StoreN")==0)  return Form::idealN;
+  if( strcmp(opType,"StoreVector")==0 )  return Form::idealV;
   assert( strcmp(opType,"Store") != 0, "Must type Stores" );
   return Form::none;
 }
--- a/src/share/vm/adlc/forms.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/forms.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -172,7 +172,8 @@
     idealB      =  6,  // Byte    type
     idealC      =  7,  // Char    type
     idealS      =  8,  // String  type
-    idealN      =  9   // Narrow oop types
+    idealN      =  9,  // Narrow oop types
+    idealV      = 10   // Vector  type
   };
   // Convert ideal name to a DataType, return DataType::none if not a 'ConX'
   Form::DataType  ideal_to_const_type(const char *ideal_type_name) const;
--- a/src/share/vm/adlc/formsopt.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/formsopt.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,7 +66,7 @@
 // for spill-slots/regs.
 void RegisterForm::addSpillRegClass() {
   // Stack slots start at the next available even register number.
-  _reg_ctr = (_reg_ctr+1) & ~1;
+  _reg_ctr = (_reg_ctr+7) & ~7;
   const char *rc_name   = "stack_slots";
   RegClass   *reg_class = new RegClass(rc_name);
   reg_class->_stack_or_reg = true;
@@ -150,9 +150,14 @@
 int RegisterForm::RegMask_Size() {
   // Need at least this many words
   int words_for_regs = (_reg_ctr + 31)>>5;
-  // Add a few for incoming & outgoing arguments to calls.
+  // The array of Register Mask bits should be large enough to cover
+  // all the machine registers and all parameters that need to be passed
+  // on the stack (stack registers) up to some interesting limit.  Methods
+  // that need more parameters will NOT be compiled.  On Intel, the limit
+  // is something like 90+ parameters.
+  // Add a few (3 words == 96 bits) for incoming & outgoing arguments to calls.
   // Round up to the next doubleword size.
-  return (words_for_regs + 2 + 1) & ~1;
+  return (words_for_regs + 3 + 1) & ~1;
 }
 
 void RegisterForm::dump() {                  // Debug printer
--- a/src/share/vm/adlc/formssel.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/formssel.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -432,6 +432,14 @@
   return  _matrule->is_ideal_store();
 }
 
+// Return 'true' if this instruction matches an ideal vector node
+bool InstructForm::is_vector() const {
+  if( _matrule == NULL ) return false;
+
+  return _matrule->is_vector();
+}
+
+
 // Return the input register that must match the output register
 // If this is not required, return 0
 uint InstructForm::two_address(FormDict &globals) {
@@ -751,6 +759,9 @@
 
   if (needs_base_oop_edge(globals)) return true;
 
+  if (is_vector()) return true;
+  if (is_mach_constant()) return true;
+
   return  false;
 }
 
@@ -3381,13 +3392,10 @@
     "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" ,
     "StoreB","StoreC","Store" ,"StoreFP",
     "LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF"  ,
-    "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load"   ,
-    "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B",
-    "Store8B","Store4B","Store8C","Store4C","Store2C",
-    "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" ,
-    "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S",
+    "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
+    "StoreVector", "LoadVector",
     "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
-    "LoadPLocked", "LoadLLocked",
+    "LoadPLocked",
     "StorePConditional", "StoreIConditional", "StoreLConditional",
     "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN",
     "StoreCM",
@@ -3822,6 +3830,10 @@
          strcmp(opType,"RegL")==0 ||
          strcmp(opType,"RegF")==0 ||
          strcmp(opType,"RegD")==0 ||
+         strcmp(opType,"VecS")==0 ||
+         strcmp(opType,"VecD")==0 ||
+         strcmp(opType,"VecX")==0 ||
+         strcmp(opType,"VecY")==0 ||
          strcmp(opType,"Reg" )==0) ) {
       return 1;
     }
@@ -3938,19 +3950,12 @@
         strcmp(opType,"ReverseBytesL")==0 ||
         strcmp(opType,"ReverseBytesUS")==0 ||
         strcmp(opType,"ReverseBytesS")==0 ||
-        strcmp(opType,"Replicate16B")==0 ||
-        strcmp(opType,"Replicate8B")==0 ||
-        strcmp(opType,"Replicate4B")==0 ||
-        strcmp(opType,"Replicate8C")==0 ||
-        strcmp(opType,"Replicate4C")==0 ||
-        strcmp(opType,"Replicate8S")==0 ||
-        strcmp(opType,"Replicate4S")==0 ||
-        strcmp(opType,"Replicate4I")==0 ||
-        strcmp(opType,"Replicate2I")==0 ||
-        strcmp(opType,"Replicate2L")==0 ||
-        strcmp(opType,"Replicate4F")==0 ||
-        strcmp(opType,"Replicate2F")==0 ||
-        strcmp(opType,"Replicate2D")==0 ||
+        strcmp(opType,"ReplicateB")==0 ||
+        strcmp(opType,"ReplicateS")==0 ||
+        strcmp(opType,"ReplicateI")==0 ||
+        strcmp(opType,"ReplicateL")==0 ||
+        strcmp(opType,"ReplicateF")==0 ||
+        strcmp(opType,"ReplicateD")==0 ||
         0 /* 0 to line up columns nicely */ )
       return 1;
   }
@@ -4034,6 +4039,23 @@
   return ideal_load;
 }
 
+bool MatchRule::is_vector() const {
+  if( _rChild ) {
+    const char  *opType = _rChild->_opType;
+    if( strcmp(opType,"ReplicateB")==0 ||
+        strcmp(opType,"ReplicateS")==0 ||
+        strcmp(opType,"ReplicateI")==0 ||
+        strcmp(opType,"ReplicateL")==0 ||
+        strcmp(opType,"ReplicateF")==0 ||
+        strcmp(opType,"ReplicateD")==0 ||
+        strcmp(opType,"LoadVector")==0 ||
+        strcmp(opType,"StoreVector")==0 ||
+        0 /* 0 to line up columns nicely */ )
+      return true;
+  }
+  return false;
+}
+
 
 bool MatchRule::skip_antidep_check() const {
   // Some loads operate on what is effectively immutable memory so we
--- a/src/share/vm/adlc/formssel.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/formssel.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -160,6 +160,7 @@
   virtual bool        is_ideal_safepoint() const; // node matches 'SafePoint'
   virtual bool        is_ideal_nop() const;     // node matches 'Nop'
   virtual bool        is_ideal_control() const; // control node
+  virtual bool        is_vector() const;        // vector instruction
 
   virtual Form::CallType is_ideal_call() const; // matches ideal 'Call'
   virtual Form::DataType is_ideal_load() const; // node matches ideal 'LoadXNode'
@@ -1011,6 +1012,7 @@
   bool       is_ideal_goto() const;    // node matches ideal 'Goto'
   bool       is_ideal_loopEnd() const; // node matches ideal 'LoopEnd'
   bool       is_ideal_bool() const;    // node matches ideal 'Bool'
+  bool       is_vector() const;        // vector instruction
   Form::DataType is_ideal_load() const;// node matches ideal 'LoadXNode'
   // Should antidep checks be disabled for this rule
   // See definition of MatchRule::skip_antidep_check
--- a/src/share/vm/adlc/main.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/main.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -250,6 +250,7 @@
   AD.addInclude(AD._HPP_file, "opto/node.hpp");
   AD.addInclude(AD._HPP_file, "opto/regalloc.hpp");
   AD.addInclude(AD._HPP_file, "opto/subnode.hpp");
+  AD.addInclude(AD._HPP_file, "opto/vectornode.hpp");
   AD.addInclude(AD._CPP_CLONE_file, "precompiled.hpp");
   AD.addInclude(AD._CPP_CLONE_file, "adfiles", get_basename(AD._HPP_file._name));
   AD.addInclude(AD._CPP_EXPAND_file, "precompiled.hpp");
--- a/src/share/vm/adlc/output_h.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/adlc/output_h.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -674,16 +674,19 @@
   else if( inst.is_ideal_mem() ) {
     // Print out the field name if available to improve readability
     fprintf(fp,  "    if (ra->C->alias_type(adr_type())->field() != NULL) {\n");
-    fprintf(fp,  "      st->print(\" ! Field \");\n");
-    fprintf(fp,  "      if( ra->C->alias_type(adr_type())->is_volatile() )\n");
-    fprintf(fp,  "        st->print(\" Volatile\");\n");
-    fprintf(fp,  "      ra->C->alias_type(adr_type())->field()->holder()->name()->print_symbol_on(st);\n");
+    fprintf(fp,  "      ciField* f = ra->C->alias_type(adr_type())->field();\n");
+    fprintf(fp,  "      st->print(\" ! Field: \");\n");
+    fprintf(fp,  "      if (f->is_volatile())\n");
+    fprintf(fp,  "        st->print(\"volatile \");\n");
+    fprintf(fp,  "      f->holder()->name()->print_symbol_on(st);\n");
     fprintf(fp,  "      st->print(\".\");\n");
-    fprintf(fp,  "      ra->C->alias_type(adr_type())->field()->name()->print_symbol_on(st);\n");
+    fprintf(fp,  "      f->name()->print_symbol_on(st);\n");
+    fprintf(fp,  "      if (f->is_constant())\n");
+    fprintf(fp,  "        st->print(\" (constant)\");\n");
     fprintf(fp,  "    } else\n");
     // Make sure 'Volatile' gets printed out
-    fprintf(fp,  "    if( ra->C->alias_type(adr_type())->is_volatile() )\n");
-    fprintf(fp,  "      st->print(\" Volatile!\");\n");
+    fprintf(fp,  "    if (ra->C->alias_type(adr_type())->is_volatile())\n");
+    fprintf(fp,  "      st->print(\" volatile!\");\n");
   }
 
   // Complete the definition of the format function
--- a/src/share/vm/asm/assembler.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/asm/assembler.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -318,6 +318,16 @@
   }
 }
 
+RegisterOrConstant AbstractAssembler::delayed_value(int(*value_fn)(), Register tmp, int offset) {
+  intptr_t val = (intptr_t) (*value_fn)();
+  if (val != 0)  return val + offset;
+  return delayed_value_impl(delayed_value_addr(value_fn), tmp, offset);
+}
+RegisterOrConstant AbstractAssembler::delayed_value(address(*value_fn)(), Register tmp, int offset) {
+  intptr_t val = (intptr_t) (*value_fn)();
+  if (val != 0)  return val + offset;
+  return delayed_value_impl(delayed_value_addr(value_fn), tmp, offset);
+}
 intptr_t* AbstractAssembler::delayed_value_addr(int(*value_fn)()) {
   DelayedConstant* dcon = DelayedConstant::add(T_INT, (DelayedConstant::value_fn_t) value_fn);
   return &dcon->value;
--- a/src/share/vm/asm/assembler.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/asm/assembler.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -406,12 +406,8 @@
   // offsets in code which must be generated before the object class is loaded.
   // Field offsets are never zero, since an object's header (mark word)
   // is located at offset zero.
-  RegisterOrConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0) {
-    return delayed_value_impl(delayed_value_addr(value_fn), tmp, offset);
-  }
-  RegisterOrConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0) {
-    return delayed_value_impl(delayed_value_addr(value_fn), tmp, offset);
-  }
+  RegisterOrConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0);
+  RegisterOrConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0);
   virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset) = 0;
   // Last overloading is platform-dependent; look in assembler_<arch>.cpp.
   static intptr_t* delayed_value_addr(int(*constant_fn)());
--- a/src/share/vm/asm/codeBuffer.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/asm/codeBuffer.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -261,7 +261,7 @@
 
 GrowableArray<int>* CodeBuffer::create_patch_overflow() {
   if (_overflow_arena == NULL) {
-    _overflow_arena = new Arena();
+    _overflow_arena = new (mtCode) Arena();
   }
   return new (_overflow_arena) GrowableArray<int>(_overflow_arena, 8, 0, 0);
 }
@@ -910,7 +910,7 @@
   _comments.add_comment(offset, comment);
 }
 
-class CodeComment: public CHeapObj {
+class CodeComment: public CHeapObj<mtCode> {
  private:
   friend class CodeComments;
   intptr_t     _offset;
@@ -919,13 +919,13 @@
 
   ~CodeComment() {
     assert(_next == NULL, "wrong interface for freeing list");
-    os::free((void*)_comment);
+    os::free((void*)_comment, mtCode);
   }
 
  public:
   CodeComment(intptr_t offset, const char * comment) {
     _offset = offset;
-    _comment = os::strdup(comment);
+    _comment = os::strdup(comment, mtCode);
     _next = NULL;
   }
 
--- a/src/share/vm/asm/register.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/asm/register.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -103,7 +103,8 @@
 ) {
   assert(
     a != b,
-    "registers must be different"
+    err_msg("registers must be different: a=%d, b=%d",
+            a, b)
   );
 }
 
@@ -116,7 +117,8 @@
   assert(
     a != b && a != c
            && b != c,
-    "registers must be different"
+    err_msg("registers must be different: a=%d, b=%d, c=%d",
+            a, b, c)
   );
 }
 
@@ -131,7 +133,8 @@
     a != b && a != c && a != d
            && b != c && b != d
                      && c != d,
-    "registers must be different"
+    err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d",
+            a, b, c, d)
   );
 }
 
@@ -148,7 +151,8 @@
            && b != c && b != d && b != e
                      && c != d && c != e
                                && d != e,
-    "registers must be different"
+    err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d",
+            a, b, c, d, e)
   );
 }
 
@@ -167,7 +171,8 @@
                      && c != d && c != e && c != f
                                && d != e && d != f
                                          && e != f,
-    "registers must be different"
+    err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d",
+            a, b, c, d, e, f)
   );
 }
 
@@ -188,7 +193,8 @@
                                && d != e && d != f && d != g
                                          && e != f && e != g
                                                    && f != g,
-    "registers must be different"
+    err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d",
+            a, b, c, d, e, f, g)
   );
 }
 
@@ -211,7 +217,34 @@
                                          && e != f && e != g && e != h
                                                    && f != g && f != h
                                                              && g != h,
-    "registers must be different"
+    err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d, h=%d",
+            a, b, c, d, e, f, g, h)
+  );
+}
+
+
+inline void assert_different_registers(
+  AbstractRegister a,
+  AbstractRegister b,
+  AbstractRegister c,
+  AbstractRegister d,
+  AbstractRegister e,
+  AbstractRegister f,
+  AbstractRegister g,
+  AbstractRegister h,
+  AbstractRegister i
+) {
+  assert(
+    a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i
+           && b != c && b != d && b != e && b != f && b != g && b != h && b != i
+                     && c != d && c != e && c != f && c != g && c != h && c != i
+                               && d != e && d != f && d != g && d != h && d != i
+                                         && e != f && e != g && e != h && e != i
+                                                   && f != g && f != h && f != i
+                                                             && g != h && g != i
+                                                                       && h != i,
+    err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d, h=%d, i=%d",
+            a, b, c, d, e, f, g, h, i)
   );
 }
 
--- a/src/share/vm/c1/c1_CFGPrinter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_CFGPrinter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,7 +33,7 @@
 #ifndef PRODUCT
 
 
-class CFGPrinterOutput : public CHeapObj {
+class CFGPrinterOutput : public CHeapObj<mtCompiler> {
  private:
   outputStream* _output;
 
@@ -106,7 +106,7 @@
 
 
 CFGPrinterOutput::CFGPrinterOutput()
- : _output(new(ResourceObj::C_HEAP) fileStream("output.cfg"))
+ : _output(new(ResourceObj::C_HEAP, mtCompiler) fileStream("output.cfg"))
 {
 }
 
--- a/src/share/vm/c1/c1_Canonicalizer.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Canonicalizer.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -42,6 +42,11 @@
   // the instruction stream (because the instruction list is embedded
   // in the instructions).
   if (canonical() != x) {
+#ifndef PRODUCT
+    if (!x->has_printable_bci()) {
+      x->set_printable_bci(bci());
+    }
+#endif
     if (PrintCanonicalization) {
       PrintValueVisitor do_print_value;
       canonical()->input_values_do(&do_print_value);
@@ -451,6 +456,28 @@
     }
     break;
   }
+  case vmIntrinsics::_isInstance          : {
+    assert(x->number_of_arguments() == 2, "wrong type");
+
+    InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
+    if (c != NULL && !c->value()->is_null_object()) {
+      // ciInstance::java_mirror_type() returns non-NULL only for Java mirrors
+      ciType* t = c->value()->as_instance()->java_mirror_type();
+      if (t->is_klass()) {
+        // substitute cls.isInstance(obj) of a constant Class into
+        // an InstantOf instruction
+        InstanceOf* i = new InstanceOf(t->as_klass(), x->argument_at(1), x->state_before());
+        set_canonical(i);
+        // and try to canonicalize even further
+        do_InstanceOf(i);
+      } else {
+        assert(t->is_primitive_type(), "should be a primitive type");
+        // cls.isInstance(obj) always returns false for primitive classes
+        set_constant(0);
+      }
+    }
+    break;
+  }
   }
 }
 
@@ -540,6 +567,7 @@
   }
 }
 
+void Canonicalizer::do_TypeCast       (TypeCast*        x) {}
 void Canonicalizer::do_Invoke         (Invoke*          x) {}
 void Canonicalizer::do_NewInstance    (NewInstance*     x) {}
 void Canonicalizer::do_NewTypeArray   (NewTypeArray*    x) {}
@@ -677,8 +705,8 @@
                 return;
             }
           }
+          set_bci(cmp->state_before()->bci());
           set_canonical(canon);
-          set_bci(cmp->state_before()->bci());
         }
       }
     } else if (l->as_InstanceOf() != NULL) {
--- a/src/share/vm/c1/c1_Canonicalizer.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Canonicalizer.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -74,6 +74,7 @@
   virtual void do_IfInstanceOf   (IfInstanceOf*    x);
   virtual void do_Convert        (Convert*         x);
   virtual void do_NullCheck      (NullCheck*       x);
+  virtual void do_TypeCast       (TypeCast*        x);
   virtual void do_Invoke         (Invoke*          x);
   virtual void do_NewInstance    (NewInstance*     x);
   virtual void do_NewTypeArray   (NewTypeArray*    x);
--- a/src/share/vm/c1/c1_CodeStubs.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_CodeStubs.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -574,71 +574,6 @@
 #endif // PRODUCT
 };
 
-// This G1 barrier code stub is used in Unsafe.getObject.
-// It generates a sequence of guards around the SATB
-// barrier code that are used to detect when we have
-// the referent field of a Reference object.
-// The first check is assumed to have been generated
-// in the code generated for Unsafe.getObject().
-
-class G1UnsafeGetObjSATBBarrierStub: public CodeStub {
- private:
-  LIR_Opr _val;
-  LIR_Opr _src;
-
-  LIR_Opr _tmp;
-  LIR_Opr _thread;
-
-  bool _gen_src_check;
-
- public:
-  // A G1 barrier that is guarded by generated guards that determine whether
-  // val (which is the result of Unsafe.getObject() should be recorded in an
-  // SATB log buffer. We could be reading the referent field of a Reference object
-  // using Unsafe.getObject() and we need to record the referent.
-  //
-  // * val is the operand returned by the unsafe.getObject routine.
-  // * src is the base object
-  // * tmp is a temp used to load the klass of src, and then reference type
-  // * thread is the thread object.
-
-  G1UnsafeGetObjSATBBarrierStub(LIR_Opr val, LIR_Opr src,
-                                LIR_Opr tmp, LIR_Opr thread,
-                                bool gen_src_check) :
-    _val(val), _src(src),
-    _tmp(tmp), _thread(thread),
-    _gen_src_check(gen_src_check)
-  {
-    assert(_val->is_register(), "should have already been loaded");
-    assert(_src->is_register(), "should have already been loaded");
-
-    assert(_tmp->is_register(), "should be a temporary register");
-  }
-
-  LIR_Opr val() const { return _val; }
-  LIR_Opr src() const { return _src; }
-
-  LIR_Opr tmp() const { return _tmp; }
-  LIR_Opr thread() const { return _thread; }
-
-  bool gen_src_check() const { return _gen_src_check; }
-
-  virtual void emit_code(LIR_Assembler* e);
-
-  virtual void visit(LIR_OpVisitState* visitor) {
-    visitor->do_slow_case();
-    visitor->do_input(_val);
-    visitor->do_input(_src);
-    visitor->do_input(_thread);
-
-    visitor->do_temp(_tmp);
-  }
-
-#ifndef PRODUCT
-  virtual void print_name(outputStream* out) const { out->print("G1UnsafeGetObjSATBBarrierStub"); }
-#endif // PRODUCT
-};
-
 class G1PostBarrierStub: public CodeStub {
  private:
   LIR_Opr _addr;
--- a/src/share/vm/c1/c1_Compilation.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Compilation.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -523,7 +523,7 @@
   assert(msg != NULL, "bailout message must exist");
   if (!bailed_out()) {
     // keep first bailout message
-    if (PrintBailouts) tty->print_cr("compilation bailout: %s", msg);
+    if (PrintCompilation || PrintBailouts) tty->print_cr("compilation bailout: %s", msg);
     _bailout_msg = msg;
   }
 }
--- a/src/share/vm/c1/c1_Compiler.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Compiler.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -55,7 +55,7 @@
 
 void Compiler::initialize_all() {
   BufferBlob* buffer_blob = CompilerThread::current()->get_buffer_blob();
-  Arena* arena = new Arena();
+  Arena* arena = new (mtCompiler) Arena();
   Runtime1::initialize(buffer_blob);
   FrameMap::initialize();
   // initialize data structures
--- a/src/share/vm/c1/c1_FrameMap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_FrameMap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -92,7 +92,6 @@
   for (i = 0; i < sizeargs;) {
     BasicType t = sig_bt[i];
     assert(t != T_VOID, "should be skipping these");
-
     LIR_Opr opr = map_to_opr(t, regs + i, outgoing);
     args->append(opr);
     if (opr->is_address()) {
--- a/src/share/vm/c1/c1_FrameMap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_FrameMap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -181,8 +181,8 @@
 
   // for outgoing calls, these also update the reserved area to
   // include space for arguments and any ABI area.
-  CallingConvention* c_calling_convention (const BasicTypeArray* signature);
-  CallingConvention* java_calling_convention (const BasicTypeArray* signature, bool outgoing);
+  CallingConvention* c_calling_convention(const BasicTypeArray* signature);
+  CallingConvention* java_calling_convention(const BasicTypeArray* signature, bool outgoing);
 
   // deopt support
   ByteSize sp_offset_for_orig_pc() { return sp_offset_for_monitor_base(_num_monitors); }
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,7 +31,7 @@
 #include "ci/ciCallSite.hpp"
 #include "ci/ciField.hpp"
 #include "ci/ciKlass.hpp"
-#include "ci/ciMethodHandle.hpp"
+#include "ci/ciMemberName.hpp"
 #include "compiler/compileBroker.hpp"
 #include "interpreter/bytecode.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -914,11 +914,11 @@
 
 void GraphBuilder::store_local(ValueType* type, int index) {
   Value x = pop(type);
-  store_local(state(), x, type, index);
+  store_local(state(), x, index);
 }
 
 
-void GraphBuilder::store_local(ValueStack* state, Value x, ValueType* type, int index) {
+void GraphBuilder::store_local(ValueStack* state, Value x, int index) {
   if (parsing_jsr()) {
     // We need to do additional tracking of the location of the return
     // address for jsrs since we don't handle arbitrary jsr/ret
@@ -1306,6 +1306,7 @@
       if (sw.dest_offset_at(i) < 0) has_bb = true;
     }
     // add default successor
+    if (sw.default_offset() < 0) has_bb = true;
     sux->at_put(i, block_at(bci() + sw.default_offset()));
     ValueStack* state_before = has_bb ? copy_state_before() : NULL;
     Instruction* res = append(new TableSwitch(ipop(), sux, sw.low_key(), state_before, has_bb));
@@ -1350,6 +1351,7 @@
       keys->at_put(i, pair.match());
     }
     // add default successor
+    if (sw.default_offset() < 0) has_bb = true;
     sux->at_put(i, block_at(bci() + sw.default_offset()));
     ValueStack* state_before = has_bb ? copy_state_before() : NULL;
     Instruction* res = append(new LookupSwitch(ipop(), sux, keys, state_before, has_bb));
@@ -1533,7 +1535,7 @@
         case T_ARRAY:
         case T_OBJECT:
           if (field_val.as_object()->should_be_constant()) {
-            constant =  new Constant(as_ValueType(field_val));
+            constant = new Constant(as_ValueType(field_val));
           }
           break;
 
@@ -1560,12 +1562,53 @@
         append(new StoreField(append(obj), offset, field, val, true, state_before, needs_patching));
       }
       break;
-    case Bytecodes::_getfield :
-      {
+    case Bytecodes::_getfield: {
+      // Check for compile-time constants, i.e., trusted final non-static fields.
+      Instruction* constant = NULL;
+      obj = apop();
+      ObjectType* obj_type = obj->type()->as_ObjectType();
+      if (obj_type->is_constant() && !PatchALot) {
+        ciObject* const_oop = obj_type->constant_value();
+        if (!const_oop->is_null_object()) {
+          if (field->is_constant()) {
+            ciConstant field_val = field->constant_value_of(const_oop);
+            BasicType field_type = field_val.basic_type();
+            switch (field_type) {
+            case T_ARRAY:
+            case T_OBJECT:
+              if (field_val.as_object()->should_be_constant()) {
+                constant = new Constant(as_ValueType(field_val));
+              }
+              break;
+            default:
+              constant = new Constant(as_ValueType(field_val));
+            }
+          } else {
+            // For CallSite objects treat the target field as a compile time constant.
+            if (const_oop->is_call_site()) {
+              ciCallSite* call_site = const_oop->as_call_site();
+              if (field->is_call_site_target()) {
+                ciMethodHandle* target = call_site->get_target();
+                if (target != NULL) {  // just in case
+                  ciConstant field_val(T_OBJECT, target);
+                  constant = new Constant(as_ValueType(field_val));
+                  // Add a dependence for invalidation of the optimization.
+                  if (!call_site->is_constant_call_site()) {
+                    dependency_recorder()->assert_call_site_target_value(call_site, target);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      if (constant != NULL) {
+        push(type, append(constant));
+      } else {
         if (state_before == NULL) {
           state_before = copy_state_for_exception();
         }
-        LoadField* load = new LoadField(apop(), offset, field, false, state_before, needs_patching);
+        LoadField* load = new LoadField(obj, offset, field, false, state_before, needs_patching);
         Value replacement = !needs_patching ? _memory->load(load) : load;
         if (replacement != load) {
           assert(replacement->is_linked() || !replacement->can_be_linked(), "should already by linked");
@@ -1573,22 +1616,23 @@
         } else {
           push(type, append(load));
         }
-        break;
-      }
-
-    case Bytecodes::_putfield :
-      { Value val = pop(type);
-        if (state_before == NULL) {
-          state_before = copy_state_for_exception();
-        }
-        StoreField* store = new StoreField(apop(), offset, field, val, false, state_before, needs_patching);
-        if (!needs_patching) store = _memory->store(store);
-        if (store != NULL) {
-          append(store);
-        }
       }
       break;
-    default                   :
+    }
+    case Bytecodes::_putfield: {
+      Value val = pop(type);
+      obj = apop();
+      if (state_before == NULL) {
+        state_before = copy_state_for_exception();
+      }
+      StoreField* store = new StoreField(obj, offset, field, val, false, state_before, needs_patching);
+      if (!needs_patching) store = _memory->store(store);
+      if (store != NULL) {
+        append(store);
+      }
+      break;
+    }
+    default:
       ShouldNotReachHere();
       break;
   }
@@ -1603,37 +1647,72 @@
 
 void GraphBuilder::invoke(Bytecodes::Code code) {
   bool will_link;
-  ciMethod* target = stream()->get_method(will_link);
+  ciSignature* declared_signature = NULL;
+  ciMethod*             target = stream()->get_method(will_link, &declared_signature);
+  ciKlass*              holder = stream()->get_declared_method_holder();
+  const Bytecodes::Code bc_raw = stream()->cur_bc_raw();
+  assert(declared_signature != NULL, "cannot be null");
+
+  // FIXME bail out for now
+  if (Bytecodes::has_optional_appendix(bc_raw) && !will_link) {
+    BAILOUT("unlinked call site (FIXME needs patching or recompile support)");
+  }
+
   // we have to make sure the argument size (incl. the receiver)
   // is correct for compilation (the call would fail later during
   // linkage anyway) - was bug (gri 7/28/99)
-  if (target->is_loaded() && target->is_static() != (code == Bytecodes::_invokestatic)) BAILOUT("will cause link error");
+  {
+    // Use raw to get rewritten bytecode.
+    const bool is_invokestatic = bc_raw == Bytecodes::_invokestatic;
+    const bool allow_static =
+          is_invokestatic ||
+          bc_raw == Bytecodes::_invokehandle ||
+          bc_raw == Bytecodes::_invokedynamic;
+    if (target->is_loaded()) {
+      if (( target->is_static() && !allow_static) ||
+          (!target->is_static() &&  is_invokestatic)) {
+        BAILOUT("will cause link error");
+      }
+    }
+  }
   ciInstanceKlass* klass = target->holder();
 
   // check if CHA possible: if so, change the code to invoke_special
   ciInstanceKlass* calling_klass = method()->holder();
-  ciKlass* holder = stream()->get_declared_method_holder();
   ciInstanceKlass* callee_holder = ciEnv::get_instance_klass_for_declared_method_holder(holder);
   ciInstanceKlass* actual_recv = callee_holder;
 
-  // some methods are obviously bindable without any type checks so
-  // convert them directly to an invokespecial.
-  if (target->is_loaded() && !target->is_abstract() &&
-      target->can_be_statically_bound() && code == Bytecodes::_invokevirtual) {
-    code = Bytecodes::_invokespecial;
+  // Some methods are obviously bindable without any type checks so
+  // convert them directly to an invokespecial or invokestatic.
+  if (target->is_loaded() && !target->is_abstract() && target->can_be_statically_bound()) {
+    switch (bc_raw) {
+    case Bytecodes::_invokevirtual:
+      code = Bytecodes::_invokespecial;
+      break;
+    case Bytecodes::_invokehandle:
+      code = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokespecial;
+      break;
+    }
   }
 
-  bool is_invokedynamic = code == Bytecodes::_invokedynamic;
+  // Push appendix argument (MethodType, CallSite, etc.), if one.
+  if (stream()->has_appendix()) {
+    ciObject* appendix = stream()->get_appendix();
+    Value arg = append(new Constant(new ObjectConstant(appendix)));
+    apush(arg);
+  }
 
   // NEEDS_CLEANUP
-  // I've added the target-is_loaded() test below but I don't really understand
+  // I've added the target->is_loaded() test below but I don't really understand
   // how klass->is_loaded() can be true and yet target->is_loaded() is false.
   // this happened while running the JCK invokevirtual tests under doit.  TKR
   ciMethod* cha_monomorphic_target = NULL;
   ciMethod* exact_target = NULL;
   Value better_receiver = NULL;
   if (UseCHA && DeoptC1 && klass->is_loaded() && target->is_loaded() &&
-      !target->is_method_handle_invoke()) {
+      !(// %%% FIXME: Are both of these relevant?
+        target->is_method_handle_intrinsic() ||
+        target->is_compiled_lambda_form())) {
     Value receiver = NULL;
     ciInstanceKlass* receiver_klass = NULL;
     bool type_is_exact = false;
@@ -1692,7 +1771,9 @@
       // they are roughly equivalent to Object.
       ciInstanceKlass* singleton = NULL;
       if (target->holder()->nof_implementors() == 1) {
-        singleton = target->holder()->implementor(0);
+        singleton = target->holder()->implementor();
+        assert(singleton != NULL && singleton != target->holder(),
+               "just checking");
 
         assert(holder->is_interface(), "invokeinterface to non interface?");
         ciInstanceKlass* decl_interface = (ciInstanceKlass*)holder;
@@ -1757,23 +1838,15 @@
         code == Bytecodes::_invokedynamic) {
       ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target;
       bool success = false;
-      if (target->is_method_handle_invoke()) {
+      if (target->is_method_handle_intrinsic()) {
         // method handle invokes
-        success = !is_invokedynamic ? for_method_handle_inline(target) : for_invokedynamic_inline(target);
-      }
-      if (!success) {
+        success = try_method_handle_inline(target);
+      } else {
         // static binding => check if callee is ok
-        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), better_receiver);
+        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
       }
       CHECK_BAILOUT();
 
-#ifndef PRODUCT
-      // printing
-      if (PrintInlining && !success) {
-        // if it was successfully inlined, then it was already printed.
-        print_inline_result(inline_target, success);
-      }
-#endif
       clear_inline_bailout();
       if (success) {
         // Register dependence if JVMTI has either breakpoint
@@ -1784,8 +1857,13 @@
         }
         return;
       }
+    } else {
+      print_inlining(target, "no static binding", /*success*/ false);
     }
+  } else {
+    print_inlining(target, "not inlineable", /*success*/ false);
   }
+
   // If we attempted an inline which did not succeed because of a
   // bailout during construction of the callee graph, the entire
   // compilation has to be aborted. This is fairly rare and currently
@@ -1799,16 +1877,14 @@
 
   // inlining not successful => standard invoke
   bool is_loaded = target->is_loaded();
-  bool has_receiver =
+  ValueType* result_type = as_ValueType(declared_signature->return_type());
+  ValueStack* state_before = copy_state_exhandling();
+
+  // The bytecode (code) might change in this method so we are checking this very late.
+  const bool has_receiver =
     code == Bytecodes::_invokespecial   ||
     code == Bytecodes::_invokevirtual   ||
     code == Bytecodes::_invokeinterface;
-  ValueType* result_type = as_ValueType(target->return_type());
-
-  // We require the debug info to be the "state before" because
-  // invokedynamics may deoptimize.
-  ValueStack* state_before = is_invokedynamic ? copy_state_before() : copy_state_exhandling();
-
   Values* args = state()->pop_arguments(target->arg_size_no_receiver());
   Value recv = has_receiver ? apop() : NULL;
   int vtable_index = methodOopDesc::invalid_vtable_index;
@@ -1851,7 +1927,7 @@
       } else if (exact_target != NULL) {
         target_klass = exact_target->holder();
       }
-      profile_call(recv, target_klass);
+      profile_call(target, recv, target_klass);
     }
   }
 
@@ -2945,6 +3021,8 @@
   case vmIntrinsics::_dtan          : // fall through
   case vmIntrinsics::_dlog          : // fall through
   case vmIntrinsics::_dlog10        : // fall through
+  case vmIntrinsics::_dexp          : // fall through
+  case vmIntrinsics::_dpow          : // fall through
     {
       // Compiles where the root method is an intrinsic need a special
       // compilation environment because the bytecodes for the method
@@ -2965,6 +3043,9 @@
       _state = start_block->state()->copy_for_parsing();
       _last  = start_block;
       load_local(doubleType, 0);
+      if (scope->method()->intrinsic_id() == vmIntrinsics::_dpow) {
+        load_local(doubleType, 2);
+      }
 
       // Emit the intrinsic node.
       bool result = try_inline_intrinsics(scope->method());
@@ -2979,7 +3060,7 @@
 
   case vmIntrinsics::_Reference_get:
     {
-      if (UseG1GC) {
+      {
         // With java.lang.ref.reference.get() we must go through the
         // intrinsic - when G1 is enabled - even when get() is the root
         // method of the compile so that, if necessary, the value in
@@ -2991,6 +3072,9 @@
         // object removed from the list of discovered references during
         // reference processing.
 
+        // Also we need intrinsic to prevent commoning reads from this field
+        // across safepoint since GC can change its value.
+
         // Set up a stream so that appending instructions works properly.
         ciBytecodeStream s(scope->method());
         s.reset_to_bci(0);
@@ -3088,35 +3172,65 @@
 }
 
 
-bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, Value receiver) {
-  // Clear out any existing inline bailout condition
+bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, Bytecodes::Code bc, Value receiver) {
+  const char* msg = NULL;
+
+  // clear out any existing inline bailout condition
   clear_inline_bailout();
 
-  if (callee->should_exclude()) {
-    // callee is excluded
-    INLINE_BAILOUT("excluded by CompilerOracle")
-  } else if (callee->should_not_inline()) {
-    // callee is excluded
-    INLINE_BAILOUT("disallowed by CompilerOracle")
-  } else if (!callee->can_be_compiled()) {
-    // callee is not compilable (prob. has breakpoints)
-    INLINE_BAILOUT("not compilable (disabled)")
-  } else if (callee->intrinsic_id() != vmIntrinsics::_none && try_inline_intrinsics(callee)) {
-    // intrinsics can be native or not
+  // exclude methods we don't want to inline
+  msg = should_not_inline(callee);
+  if (msg != NULL) {
+    print_inlining(callee, msg, /*success*/ false);
+    return false;
+  }
+
+  // handle intrinsics
+  if (callee->intrinsic_id() != vmIntrinsics::_none) {
+    if (try_inline_intrinsics(callee)) {
+      print_inlining(callee, "intrinsic");
+      return true;
+    }
+    // try normal inlining
+  }
+
+  // certain methods cannot be parsed at all
+  msg = check_can_parse(callee);
+  if (msg != NULL) {
+    print_inlining(callee, msg, /*success*/ false);
+    return false;
+  }
+
+  // If bytecode not set use the current one.
+  if (bc == Bytecodes::_illegal) {
+    bc = code();
+  }
+  if (try_inline_full(callee, holder_known, bc, receiver))
     return true;
-  } else if (callee->is_native()) {
-    // non-intrinsic natives cannot be inlined
-    INLINE_BAILOUT("non-intrinsic native")
-  } else if (callee->is_abstract()) {
-    INLINE_BAILOUT("abstract")
-  } else {
-    return try_inline_full(callee, holder_known, NULL, receiver);
-  }
+  print_inlining(callee, _inline_bailout_msg, /*success*/ false);
+  return false;
+}
+
+
+const char* GraphBuilder::check_can_parse(ciMethod* callee) const {
+  // Certain methods cannot be parsed at all:
+  if ( callee->is_native())            return "native method";
+  if ( callee->is_abstract())          return "abstract method";
+  if (!callee->can_be_compiled())      return "not compilable (disabled)";
+  return NULL;
+}
+
+
+// negative filter: should callee NOT be inlined?  returns NULL, ok to inline, or rejection msg
+const char* GraphBuilder::should_not_inline(ciMethod* callee) const {
+  if ( callee->should_exclude())       return "excluded by CompilerOracle";
+  if ( callee->should_not_inline())    return "disallowed by CompilerOracle";
+  if ( callee->dont_inline())          return "don't inline by annotation";
+  return NULL;
 }
 
 
 bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
-  if (!InlineNatives           ) INLINE_BAILOUT("intrinsic method inlining disabled");
   if (callee->is_synchronized()) {
     // We don't currently support any synchronized intrinsics
     return false;
@@ -3124,14 +3238,31 @@
 
   // callee seems like a good candidate
   // determine id
+  vmIntrinsics::ID id = callee->intrinsic_id();
+  if (!InlineNatives && id != vmIntrinsics::_Reference_get) {
+    // InlineNatives does not control Reference.get
+    INLINE_BAILOUT("intrinsic method inlining disabled");
+  }
   bool preserves_state = false;
   bool cantrap = true;
-  vmIntrinsics::ID id = callee->intrinsic_id();
   switch (id) {
-    case vmIntrinsics::_arraycopy     :
+    case vmIntrinsics::_arraycopy:
       if (!InlineArrayCopy) return false;
       break;
 
+#ifdef TRACE_HAVE_INTRINSICS
+    case vmIntrinsics::_classID:
+    case vmIntrinsics::_threadID:
+      preserves_state = true;
+      cantrap = true;
+      break;
+
+    case vmIntrinsics::_counterTime:
+      preserves_state = true;
+      cantrap = false;
+      break;
+#endif
+
     case vmIntrinsics::_currentTimeMillis:
     case vmIntrinsics::_nanoTime:
       preserves_state = true;
@@ -3148,6 +3279,7 @@
       break;
 
     case vmIntrinsics::_getClass      :
+    case vmIntrinsics::_isInstance    :
       if (!InlineClassNatives) return false;
       preserves_state = true;
       break;
@@ -3165,18 +3297,13 @@
     case vmIntrinsics::_dtan          : // fall through
     case vmIntrinsics::_dlog          : // fall through
     case vmIntrinsics::_dlog10        : // fall through
+    case vmIntrinsics::_dexp          : // fall through
+    case vmIntrinsics::_dpow          : // fall through
       if (!InlineMathNatives) return false;
       cantrap = false;
       preserves_state = true;
       break;
 
-    // sun/misc/AtomicLong.attemptUpdate
-    case vmIntrinsics::_attemptUpdate :
-      if (!VM_Version::supports_cx8()) return false;
-      if (!InlineAtomicLong) return false;
-      preserves_state = true;
-      break;
-
     // Use special nodes for Unsafe instructions so we can more easily
     // perform an address-mode optimization on the raw variants
     case vmIntrinsics::_getObject : return append_unsafe_get_obj(callee, T_OBJECT,  false);
@@ -3257,11 +3384,10 @@
       return true;
 
     case vmIntrinsics::_Reference_get:
-      // It is only when G1 is enabled that we absolutely
-      // need to use the intrinsic version of Reference.get()
-      // so that the value in the referent field, if necessary,
-      // can be registered by the pre-barrier code.
-      if (!UseG1GC) return false;
+      // Use the intrinsic version of Reference.get() so that the value in
+      // the referent field can be registered by the G1 pre-barrier code.
+      // Also to prevent commoning reads from this field across safepoint
+      // since GC can change its value.
       preserves_state = true;
       break;
 
@@ -3286,7 +3412,7 @@
           recv = args->at(0);
           null_check(recv);
         }
-        profile_call(recv, NULL);
+        profile_call(callee, recv, NULL);
       }
     }
   }
@@ -3297,13 +3423,6 @@
   Value value = append_split(result);
   if (result_type != voidType) push(result_type, value);
 
-#ifndef PRODUCT
-  // printing
-  if (PrintInlining) {
-    print_inline_result(callee, true);
-  }
-#endif
-
   // done
   return true;
 }
@@ -3459,7 +3578,7 @@
 }
 
 
-bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, BlockBegin* cont_block, Value receiver) {
+bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, Bytecodes::Code bc, Value receiver) {
   assert(!callee->is_native(), "callee must not be native");
   if (CompilationPolicy::policy()->should_not_inline(compilation()->env(), callee)) {
     INLINE_BAILOUT("inlining prohibited by policy");
@@ -3487,11 +3606,13 @@
   }
 
   // now perform tests that are based on flag settings
-  if (callee->should_inline()) {
+  if (callee->force_inline() || callee->should_inline()) {
     // ignore heuristic controls on inlining
+    if (callee->force_inline())
+      print_inlining(callee, "force inline by annotation");
   } else {
-    if (inline_level() > MaxInlineLevel                         ) INLINE_BAILOUT("too-deep inlining");
-    if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("too-deep recursive inlining");
+    if (inline_level() > MaxInlineLevel                         ) INLINE_BAILOUT("inlining too deep");
+    if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("recursive inlining too deep");
     if (callee->code_size_for_inlining() > max_inline_size()    ) INLINE_BAILOUT("callee is too large");
 
     // don't inline throwable methods unless the inlining tree is rooted in a throwable class
@@ -3510,28 +3631,25 @@
     if (compilation()->env()->num_inlined_bytecodes() > DesiredMethodLimit) {
       INLINE_BAILOUT("total inlining greater than DesiredMethodLimit");
     }
+    // printing
+    print_inlining(callee, "");
   }
 
-#ifndef PRODUCT
-      // printing
-  if (PrintInlining) {
-    print_inline_result(callee, true);
-  }
-#endif
-
   // NOTE: Bailouts from this point on, which occur at the
   // GraphBuilder level, do not cause bailout just of the inlining but
   // in fact of the entire compilation.
 
   BlockBegin* orig_block = block();
 
+  const bool is_invokedynamic = bc == Bytecodes::_invokedynamic;
+  const bool has_receiver = (bc != Bytecodes::_invokestatic && !is_invokedynamic);
+
   const int args_base = state()->stack_size() - callee->arg_size();
   assert(args_base >= 0, "stack underflow during inlining");
 
   // Insert null check if necessary
   Value recv = NULL;
-  if (code() != Bytecodes::_invokestatic &&
-      code() != Bytecodes::_invokedynamic) {
+  if (has_receiver) {
     // note: null check must happen even if first instruction of callee does
     //       an implicit null check since the callee is in a different scope
     //       and we must make sure exception handling does the right thing
@@ -3547,7 +3665,7 @@
     compilation()->set_would_profile(true);
 
     if (profile_calls()) {
-      profile_call(recv, holder_known ? callee->holder() : NULL);
+      profile_call(callee, recv, holder_known ? callee->holder() : NULL);
     }
   }
 
@@ -3556,7 +3674,7 @@
   // fall-through of control flow, all return instructions of the
   // callee will need to be replaced by Goto's pointing to this
   // continuation point.
-  BlockBegin* cont = cont_block != NULL ? cont_block : block_at(next_bci());
+  BlockBegin* cont = block_at(next_bci());
   bool continuation_existed = true;
   if (cont == NULL) {
     cont = new BlockBegin(next_bci());
@@ -3589,17 +3707,10 @@
   // note: this will also ensure that all arguments are computed before being passed
   ValueStack* callee_state = state();
   ValueStack* caller_state = state()->caller_state();
-  { int i = args_base;
-    while (i < caller_state->stack_size()) {
-      const int par_no = i - args_base;
-      Value  arg = caller_state->stack_at_inc(i);
-      // NOTE: take base() of arg->type() to avoid problems storing
-      // constants
-      if (receiver != NULL && par_no == 0) {
-        arg = receiver;
-      }
-      store_local(callee_state, arg, arg->type()->base(), par_no);
-    }
+  for (int i = args_base; i < caller_state->stack_size(); ) {
+    const int arg_no = i - args_base;
+    Value arg = caller_state->stack_at_inc(i);
+    store_local(callee_state, arg, arg_no);
   }
 
   // Remove args from stack.
@@ -3675,29 +3786,27 @@
   // block merging. This allows load elimination and CSE to take place
   // across multiple callee scopes if they are relatively simple, and
   // is currently essential to making inlining profitable.
-  if (cont_block == NULL) {
-    if (num_returns() == 1
-        && block() == orig_block
-        && block() == inline_cleanup_block()) {
-      _last  = inline_cleanup_return_prev();
-      _state = inline_cleanup_state();
-    } else if (continuation_preds == cont->number_of_preds()) {
-      // Inlining caused that the instructions after the invoke in the
-      // caller are not reachable any more. So skip filling this block
-      // with instructions!
-      assert(cont == continuation(), "");
+  if (num_returns() == 1
+      && block() == orig_block
+      && block() == inline_cleanup_block()) {
+    _last  = inline_cleanup_return_prev();
+    _state = inline_cleanup_state();
+  } else if (continuation_preds == cont->number_of_preds()) {
+    // Inlining caused that the instructions after the invoke in the
+    // caller are not reachable any more. So skip filling this block
+    // with instructions!
+    assert(cont == continuation(), "");
+    assert(_last && _last->as_BlockEnd(), "");
+    _skip_block = true;
+  } else {
+    // Resume parsing in continuation block unless it was already parsed.
+    // Note that if we don't change _last here, iteration in
+    // iterate_bytecodes_for_block will stop when we return.
+    if (!continuation()->is_set(BlockBegin::was_visited_flag)) {
+      // add continuation to work list instead of parsing it immediately
       assert(_last && _last->as_BlockEnd(), "");
+      scope_data()->parent()->add_to_work_list(continuation());
       _skip_block = true;
-    } else {
-      // Resume parsing in continuation block unless it was already parsed.
-      // Note that if we don't change _last here, iteration in
-      // iterate_bytecodes_for_block will stop when we return.
-      if (!continuation()->is_set(BlockBegin::was_visited_flag)) {
-        // add continuation to work list instead of parsing it immediately
-        assert(_last && _last->as_BlockEnd(), "");
-        scope_data()->parent()->add_to_work_list(continuation());
-        _skip_block = true;
-      }
     }
   }
 
@@ -3714,115 +3823,89 @@
 }
 
 
-bool GraphBuilder::for_method_handle_inline(ciMethod* callee) {
-  assert(!callee->is_static(), "change next line");
-  int index = state()->stack_size() - (callee->arg_size_no_receiver() + 1);
-  Value receiver = state()->stack_at(index);
-
-  if (receiver->type()->is_constant()) {
-    ciMethodHandle* method_handle = receiver->type()->as_ObjectType()->constant_value()->as_method_handle();
-
-    // Set the callee to have access to the class and signature in
-    // the MethodHandleCompiler.
-    method_handle->set_callee(callee);
-    method_handle->set_caller(method());
-
-    // Get an adapter for the MethodHandle.
-    ciMethod* method_handle_adapter = method_handle->get_method_handle_adapter();
-    if (method_handle_adapter != NULL) {
-      return try_inline(method_handle_adapter, /*holder_known=*/ true);
-    }
-  } else if (receiver->as_CheckCast()) {
-    // Match MethodHandle.selectAlternative idiom
-    Phi* phi = receiver->as_CheckCast()->obj()->as_Phi();
-
-    if (phi != NULL && phi->operand_count() == 2) {
-      // Get the two MethodHandle inputs from the Phi.
-      Value op1 = phi->operand_at(0);
-      Value op2 = phi->operand_at(1);
-      ObjectType* op1type = op1->type()->as_ObjectType();
-      ObjectType* op2type = op2->type()->as_ObjectType();
-
-      if (op1type->is_constant() && op2type->is_constant()) {
-        ciMethodHandle* mh1 = op1type->constant_value()->as_method_handle();
-        ciMethodHandle* mh2 = op2type->constant_value()->as_method_handle();
-
-        // Set the callee to have access to the class and signature in
-        // the MethodHandleCompiler.
-        mh1->set_callee(callee);
-        mh1->set_caller(method());
-        mh2->set_callee(callee);
-        mh2->set_caller(method());
-
-        // Get adapters for the MethodHandles.
-        ciMethod* mh1_adapter = mh1->get_method_handle_adapter();
-        ciMethod* mh2_adapter = mh2->get_method_handle_adapter();
-
-        if (mh1_adapter != NULL && mh2_adapter != NULL) {
-          set_inline_cleanup_info();
-
-          // Build the If guard
-          BlockBegin* one = new BlockBegin(next_bci());
-          BlockBegin* two = new BlockBegin(next_bci());
-          BlockBegin* end = new BlockBegin(next_bci());
-          Instruction* iff = append(new If(phi, If::eql, false, op1, one, two, NULL, false));
-          block()->set_end(iff->as_BlockEnd());
-
-          // Connect up the states
-          one->merge(block()->end()->state());
-          two->merge(block()->end()->state());
-
-          // Save the state for the second inlinee
-          ValueStack* state_before = copy_state_before();
-
-          // Parse first adapter
-          _last = _block = one;
-          if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, end, NULL)) {
-            restore_inline_cleanup_info();
-            block()->clear_end();  // remove appended iff
-            return false;
-          }
-
-          // Parse second adapter
-          _last = _block = two;
-          _state = state_before;
-          if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, end, NULL)) {
-            restore_inline_cleanup_info();
-            block()->clear_end();  // remove appended iff
-            return false;
-          }
-
-          connect_to_end(end);
+bool GraphBuilder::try_method_handle_inline(ciMethod* callee) {
+  ValueStack* state_before = state()->copy_for_parsing();
+  vmIntrinsics::ID iid = callee->intrinsic_id();
+  switch (iid) {
+  case vmIntrinsics::_invokeBasic:
+    {
+      // get MethodHandle receiver
+      const int args_base = state()->stack_size() - callee->arg_size();
+      ValueType* type = state()->stack_at(args_base)->type();
+      if (type->is_constant()) {
+        ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget();
+        guarantee(!target->is_method_handle_intrinsic(), "should not happen");  // XXX remove
+        Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+        if (try_inline(target, /*holder_known*/ true, bc)) {
           return true;
         }
+      } else {
+        print_inlining(callee, "receiver not constant", /*success*/ false);
       }
     }
+    break;
+
+  case vmIntrinsics::_linkToVirtual:
+  case vmIntrinsics::_linkToStatic:
+  case vmIntrinsics::_linkToSpecial:
+  case vmIntrinsics::_linkToInterface:
+    {
+      // pop MemberName argument
+      const int args_base = state()->stack_size() - callee->arg_size();
+      ValueType* type = apop()->type();
+      if (type->is_constant()) {
+        ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget();
+        // If the target is another method handle invoke try recursivly to get
+        // a better target.
+        if (target->is_method_handle_intrinsic()) {
+          if (try_method_handle_inline(target)) {
+            return true;
+          }
+        } else {
+          ciSignature* signature = target->signature();
+          const int receiver_skip = target->is_static() ? 0 : 1;
+          // Cast receiver to its type.
+          if (!target->is_static()) {
+            ciKlass* tk = signature->accessing_klass();
+            Value obj = state()->stack_at(args_base);
+            if (obj->exact_type() == NULL &&
+                obj->declared_type() != tk && tk != compilation()->env()->Object_klass()) {
+              TypeCast* c = new TypeCast(tk, obj, state_before);
+              append(c);
+              state()->stack_at_put(args_base, c);
+            }
+          }
+          // Cast reference arguments to its type.
+          for (int i = 0, j = 0; i < signature->count(); i++) {
+            ciType* t = signature->type_at(i);
+            if (t->is_klass()) {
+              ciKlass* tk = t->as_klass();
+              Value obj = state()->stack_at(args_base + receiver_skip + j);
+              if (obj->exact_type() == NULL &&
+                  obj->declared_type() != tk && tk != compilation()->env()->Object_klass()) {
+                TypeCast* c = new TypeCast(t, obj, state_before);
+                append(c);
+                state()->stack_at_put(args_base + receiver_skip + j, c);
+              }
+            }
+            j += t->size();  // long and double take two slots
+          }
+          Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+          if (try_inline(target, /*holder_known*/ true, bc)) {
+            return true;
+          }
+        }
+      } else {
+        print_inlining(callee, "MemberName not constant", /*success*/ false);
+      }
+    }
+    break;
+
+  default:
+    fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+    break;
   }
-  return false;
-}
-
-
-bool GraphBuilder::for_invokedynamic_inline(ciMethod* callee) {
-  // Get the MethodHandle from the CallSite.
-  ciCallSite*     call_site     = stream()->get_call_site();
-  ciMethodHandle* method_handle = call_site->get_target();
-
-  // Set the callee to have access to the class and signature in the
-  // MethodHandleCompiler.
-  method_handle->set_callee(callee);
-  method_handle->set_caller(method());
-
-  // Get an adapter for the MethodHandle.
-  ciMethod* method_handle_adapter = method_handle->get_invokedynamic_adapter();
-  if (method_handle_adapter != NULL) {
-    if (try_inline(method_handle_adapter, /*holder_known=*/ true)) {
-      // Add a dependence for invalidation of the optimization.
-      if (!call_site->is_constant_call_site()) {
-        dependency_recorder()->assert_call_site_target_value(call_site, method_handle);
-      }
-      return true;
-    }
-  }
+  set_state(state_before);
   return false;
 }
 
@@ -4014,22 +4097,24 @@
 }
 
 
-#ifndef PRODUCT
-void GraphBuilder::print_inline_result(ciMethod* callee, bool res) {
-  CompileTask::print_inlining(callee, scope()->level(), bci(), _inline_bailout_msg);
-  if (res && CIPrintMethodCodes) {
+void GraphBuilder::print_inlining(ciMethod* callee, const char* msg, bool success) {
+  if (!PrintInlining)  return;
+  assert(msg != NULL, "must be");
+  CompileTask::print_inlining(callee, scope()->level(), bci(), msg);
+  if (success && CIPrintMethodCodes) {
     callee->print_codes();
   }
 }
 
 
+#ifndef PRODUCT
 void GraphBuilder::print_stats() {
   vmap()->print();
 }
 #endif // PRODUCT
 
-void GraphBuilder::profile_call(Value recv, ciKlass* known_holder) {
-  append(new ProfileCall(method(), bci(), recv, known_holder));
+void GraphBuilder::profile_call(ciMethod* callee, Value recv, ciKlass* known_holder) {
+  append(new ProfileCall(method(), bci(), callee, recv, known_holder));
 }
 
 void GraphBuilder::profile_invocation(ciMethod* callee, ValueStack* state) {
--- a/src/share/vm/c1/c1_GraphBuilder.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -225,7 +225,7 @@
   void load_constant();
   void load_local(ValueType* type, int index);
   void store_local(ValueType* type, int index);
-  void store_local(ValueStack* state, Value value, ValueType* type, int index);
+  void store_local(ValueStack* state, Value value, int index);
   void load_indexed (BasicType type);
   void store_indexed(BasicType type);
   void stack_op(Bytecodes::Code code);
@@ -337,14 +337,16 @@
   void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false);
 
   // inliners
-  bool try_inline(           ciMethod* callee, bool holder_known, Value receiver = NULL);
+  bool try_inline(           ciMethod* callee, bool holder_known, Bytecodes::Code bc = Bytecodes::_illegal, Value receiver = NULL);
   bool try_inline_intrinsics(ciMethod* callee);
-  bool try_inline_full(      ciMethod* callee, bool holder_known, BlockBegin* cont_block, Value receiver);
+  bool try_inline_full(      ciMethod* callee, bool holder_known, Bytecodes::Code bc = Bytecodes::_illegal, Value receiver = NULL);
   bool try_inline_jsr(int jsr_dest_bci);
 
+  const char* check_can_parse(ciMethod* callee) const;
+  const char* should_not_inline(ciMethod* callee) const;
+
   // JSR 292 support
-  bool for_method_handle_inline(ciMethod* callee);
-  bool for_invokedynamic_inline(ciMethod* callee);
+  bool try_method_handle_inline(ciMethod* callee);
 
   // helpers
   void inline_bailout(const char* msg);
@@ -366,9 +368,9 @@
   bool append_unsafe_prefetch(ciMethod* callee, bool is_store, bool is_static);
   void append_unsafe_CAS(ciMethod* callee);
 
-  NOT_PRODUCT(void print_inline_result(ciMethod* callee, bool res);)
+  void print_inlining(ciMethod* callee, const char* msg, bool success = true);
 
-  void profile_call(Value recv, ciKlass* predicted_holder);
+  void profile_call(ciMethod* callee, Value recv, ciKlass* predicted_holder);
   void profile_invocation(ciMethod* inlinee, ValueStack* state);
 
   // Shortcuts to profiling control.
--- a/src/share/vm/c1/c1_Instruction.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Instruction.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -161,6 +161,12 @@
   return NULL;
 }
 
+ciType* Constant::exact_type() const {
+  if (type()->is_object()) {
+    return type()->as_ObjectType()->exact_type();
+  }
+  return NULL;
+}
 
 ciType* LoadIndexed::exact_type() const {
   ciType* array_type = array()->exact_type();
@@ -363,9 +369,6 @@
   _signature = new BasicTypeList(number_of_arguments() + (has_receiver() ? 1 : 0));
   if (has_receiver()) {
     _signature->append(as_BasicType(receiver()->type()));
-  } else if (is_invokedynamic()) {
-    // Add the synthetic MethodHandle argument to the signature.
-    _signature->append(T_OBJECT);
   }
   for (int i = 0; i < number_of_arguments(); i++) {
     ValueType* t = argument_at(i)->type();
--- a/src/share/vm/c1/c1_Instruction.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Instruction.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -66,6 +66,7 @@
 class     IfOp;
 class   Convert;
 class   NullCheck;
+class   TypeCast;
 class   OsrEntry;
 class   ExceptionObject;
 class   StateSplit;
@@ -174,6 +175,7 @@
   virtual void do_IfOp           (IfOp*            x) = 0;
   virtual void do_Convert        (Convert*         x) = 0;
   virtual void do_NullCheck      (NullCheck*       x) = 0;
+  virtual void do_TypeCast       (TypeCast*        x) = 0;
   virtual void do_Invoke         (Invoke*          x) = 0;
   virtual void do_NewInstance    (NewInstance*     x) = 0;
   virtual void do_NewTypeArray   (NewTypeArray*    x) = 0;
@@ -302,9 +304,8 @@
 
   void update_exception_state(ValueStack* state);
 
-  bool has_printable_bci() const                 { return NOT_PRODUCT(_printable_bci != -99) PRODUCT_ONLY(false); }
-
- protected:
+ //protected:
+ public:
   void set_type(ValueType* type) {
     assert(type != NULL, "type must exist");
     _type = type;
@@ -392,8 +393,9 @@
   // accessors
   int id() const                                 { return _id; }
 #ifndef PRODUCT
+  bool has_printable_bci() const                 { return _printable_bci != -99; }
   int printable_bci() const                      { assert(has_printable_bci(), "_printable_bci should have been set"); return _printable_bci; }
-  void set_printable_bci(int bci)                { NOT_PRODUCT(_printable_bci = bci;) }
+  void set_printable_bci(int bci)                { _printable_bci = bci; }
 #endif
   int use_count() const                          { return _use_count; }
   int pin_state() const                          { return _pin_state; }
@@ -486,6 +488,7 @@
   virtual TypeCheck*        as_TypeCheck()       { return NULL; }
   virtual CheckCast*        as_CheckCast()       { return NULL; }
   virtual InstanceOf*       as_InstanceOf()      { return NULL; }
+  virtual TypeCast*         as_TypeCast()        { return NULL; }
   virtual AccessMonitor*    as_AccessMonitor()   { return NULL; }
   virtual MonitorEnter*     as_MonitorEnter()    { return NULL; }
   virtual MonitorExit*      as_MonitorExit()     { return NULL; }
@@ -576,6 +579,7 @@
   , _block(b)
   , _index(index)
   {
+    NOT_PRODUCT(set_printable_bci(Value(b)->printable_bci()));
     if (type->is_illegal()) {
       make_illegal();
     }
@@ -631,13 +635,15 @@
     : Instruction(type)
     , _java_index(index)
     , _declared_type(declared)
-  {}
+  {
+    NOT_PRODUCT(set_printable_bci(-1));
+  }
 
   // accessors
   int java_index() const                         { return _java_index; }
 
-  ciType* declared_type() const                  { return _declared_type; }
-  ciType* exact_type() const;
+  virtual ciType* declared_type() const          { return _declared_type; }
+  virtual ciType* exact_type() const;
 
   // generic
   virtual void input_values_do(ValueVisitor* f)   { /* no values */ }
@@ -648,13 +654,13 @@
  public:
   // creation
   Constant(ValueType* type):
-      Instruction(type, NULL, true)
+      Instruction(type, NULL, /*type_is_constant*/ true)
   {
     assert(type->is_constant(), "must be a constant");
   }
 
   Constant(ValueType* type, ValueStack* state_before):
-    Instruction(type, state_before, true)
+    Instruction(type, state_before, /*type_is_constant*/ true)
   {
     assert(state_before != NULL, "only used for constants which need patching");
     assert(type->is_constant(), "must be a constant");
@@ -668,6 +674,7 @@
   virtual intx hash() const;
   virtual bool is_equal(Value v) const;
 
+  virtual ciType* exact_type() const;
 
   enum CompareResult { not_comparable = -1, cond_false, cond_true };
 
@@ -1101,6 +1108,29 @@
 };
 
 
+// This node is supposed to cast the type of another node to a more precise
+// declared type.
+LEAF(TypeCast, Instruction)
+ private:
+  ciType* _declared_type;
+  Value   _obj;
+
+ public:
+  // The type of this node is the same type as the object type (and it might be constant).
+  TypeCast(ciType* type, Value obj, ValueStack* state_before)
+  : Instruction(obj->type(), state_before, obj->type()->is_constant()),
+    _declared_type(type),
+    _obj(obj) {}
+
+  // accessors
+  ciType* declared_type() const                  { return _declared_type; }
+  Value   obj() const                            { return _obj; }
+
+  // generic
+  virtual void input_values_do(ValueVisitor* f)  { f->visit(&_obj); }
+};
+
+
 BASE(StateSplit, Instruction)
  private:
   ValueStack* _state;
@@ -1164,6 +1194,7 @@
 
   // JSR 292 support
   bool is_invokedynamic() const                  { return code() == Bytecodes::_invokedynamic; }
+  bool is_method_handle_intrinsic() const        { return target()->is_method_handle_intrinsic(); }
 
   virtual bool needs_exception_state() const     { return false; }
 
@@ -2275,14 +2306,16 @@
  private:
   ciMethod* _method;
   int       _bci_of_invoke;
+  ciMethod* _callee;         // the method that is called at the given bci
   Value     _recv;
   ciKlass*  _known_holder;
 
  public:
-  ProfileCall(ciMethod* method, int bci, Value recv, ciKlass* known_holder)
+  ProfileCall(ciMethod* method, int bci, ciMethod* callee, Value recv, ciKlass* known_holder)
     : Instruction(voidType)
     , _method(method)
     , _bci_of_invoke(bci)
+    , _callee(callee)
     , _recv(recv)
     , _known_holder(known_holder)
   {
@@ -2292,6 +2325,7 @@
 
   ciMethod* method()      { return _method; }
   int bci_of_invoke()     { return _bci_of_invoke; }
+  ciMethod* callee()      { return _callee; }
   Value recv()            { return _recv; }
   ciKlass* known_holder() { return _known_holder; }
 
--- a/src/share/vm/c1/c1_InstructionPrinter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_InstructionPrinter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -137,12 +137,16 @@
       ciMethod* m = (ciMethod*)value;
       output()->print("<method %s.%s>", m->holder()->name()->as_utf8(), m->name()->as_utf8());
     } else {
-      output()->print("<object " PTR_FORMAT ">", value->constant_encoding());
+      output()->print("<object " PTR_FORMAT " klass=", value->constant_encoding());
+      print_klass(value->klass());
+      output()->print(">");
     }
   } else if (type->as_InstanceConstant() != NULL) {
     ciInstance* value = type->as_InstanceConstant()->value();
     if (value->is_loaded()) {
-      output()->print("<instance " PTR_FORMAT ">", value->constant_encoding());
+      output()->print("<instance " PTR_FORMAT " klass=", value->constant_encoding());
+      print_klass(value->klass());
+      output()->print(">");
     } else {
       output()->print("<unloaded instance " PTR_FORMAT ">", value);
     }
@@ -453,6 +457,14 @@
 }
 
 
+void InstructionPrinter::do_TypeCast(TypeCast* x) {
+  output()->print("type_cast(");
+  print_value(x->obj());
+  output()->print(") ");
+  print_klass(x->declared_type()->klass());
+}
+
+
 void InstructionPrinter::do_Invoke(Invoke* x) {
   if (x->receiver() != NULL) {
     print_value(x->receiver());
--- a/src/share/vm/c1/c1_InstructionPrinter.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_InstructionPrinter.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -101,6 +101,7 @@
   virtual void do_IfOp           (IfOp*            x);
   virtual void do_Convert        (Convert*         x);
   virtual void do_NullCheck      (NullCheck*       x);
+  virtual void do_TypeCast       (TypeCast*        x);
   virtual void do_Invoke         (Invoke*          x);
   virtual void do_NewInstance    (NewInstance*     x);
   virtual void do_NewTypeArray   (NewTypeArray*    x);
--- a/src/share/vm/c1/c1_LIR.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_LIR.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -624,11 +624,13 @@
     {
       assert(op->as_Op2() != NULL, "must be");
       LIR_Op2* op2 = (LIR_Op2*)op;
+      assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() &&
+             op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
 
       if (op2->_info)                     do_info(op2->_info);
       if (op2->_opr1->is_valid())         do_input(op2->_opr1);
       if (op2->_opr2->is_valid())         do_input(op2->_opr2);
-      if (op2->_tmp->is_valid())          do_temp(op2->_tmp);
+      if (op2->_tmp1->is_valid())         do_temp(op2->_tmp1);
       if (op2->_result->is_valid())       do_output(op2->_result);
 
       break;
@@ -641,7 +643,8 @@
       assert(op->as_Op2() != NULL, "must be");
       LIR_Op2* op2 = (LIR_Op2*)op;
 
-      assert(op2->_info == NULL && op2->_tmp->is_illegal(), "not used");
+      assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
+             op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
       assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
 
       do_input(op2->_opr1);
@@ -665,10 +668,12 @@
       assert(op2->_opr1->is_valid(), "used");
       assert(op2->_opr2->is_valid(), "used");
       assert(op2->_result->is_valid(), "used");
+      assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() &&
+             op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
 
       do_input(op2->_opr1); do_temp(op2->_opr1);
       do_input(op2->_opr2); do_temp(op2->_opr2);
-      if (op2->_tmp->is_valid()) do_temp(op2->_tmp);
+      if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
       do_output(op2->_result);
 
       break;
@@ -682,6 +687,8 @@
       if (op2->_opr1->is_valid())         do_temp(op2->_opr1);
       if (op2->_opr2->is_valid())         do_input(op2->_opr2); // exception object is input parameter
       assert(op2->_result->is_illegal(), "no result");
+      assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() &&
+             op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
 
       break;
     }
@@ -702,7 +709,8 @@
     case lir_sin:
     case lir_cos:
     case lir_log:
-    case lir_log10: {
+    case lir_log10:
+    case lir_exp: {
       assert(op->as_Op2() != NULL, "must be");
       LIR_Op2* op2 = (LIR_Op2*)op;
 
@@ -711,16 +719,47 @@
       // Register input operand as temp to guarantee that it doesn't
       // overlap with the input.
       assert(op2->_info == NULL, "not used");
+      assert(op2->_tmp5->is_illegal(), "not used");
+      assert(op2->_tmp2->is_valid() == (op->code() == lir_exp), "not used");
+      assert(op2->_tmp3->is_valid() == (op->code() == lir_exp), "not used");
+      assert(op2->_tmp4->is_valid() == (op->code() == lir_exp), "not used");
       assert(op2->_opr1->is_valid(), "used");
       do_input(op2->_opr1); do_temp(op2->_opr1);
 
       if (op2->_opr2->is_valid())         do_temp(op2->_opr2);
-      if (op2->_tmp->is_valid())          do_temp(op2->_tmp);
+      if (op2->_tmp1->is_valid())         do_temp(op2->_tmp1);
+      if (op2->_tmp2->is_valid())         do_temp(op2->_tmp2);
+      if (op2->_tmp3->is_valid())         do_temp(op2->_tmp3);
+      if (op2->_tmp4->is_valid())         do_temp(op2->_tmp4);
       if (op2->_result->is_valid())       do_output(op2->_result);
 
       break;
     }
 
+    case lir_pow: {
+      assert(op->as_Op2() != NULL, "must be");
+      LIR_Op2* op2 = (LIR_Op2*)op;
+
+      // On x86 pow needs two temporary fpu stack slots: tmp1 and
+      // tmp2. Register input operands as temps to guarantee that it
+      // doesn't overlap with the temporary slots.
+      assert(op2->_info == NULL, "not used");
+      assert(op2->_opr1->is_valid() && op2->_opr2->is_valid(), "used");
+      assert(op2->_tmp1->is_valid() && op2->_tmp2->is_valid() && op2->_tmp3->is_valid()
+             && op2->_tmp4->is_valid() && op2->_tmp5->is_valid(), "used");
+      assert(op2->_result->is_valid(), "used");
+
+      do_input(op2->_opr1); do_temp(op2->_opr1);
+      do_input(op2->_opr2); do_temp(op2->_opr2);
+      do_temp(op2->_tmp1);
+      do_temp(op2->_tmp2);
+      do_temp(op2->_tmp3);
+      do_temp(op2->_tmp4);
+      do_temp(op2->_tmp5);
+      do_output(op2->_result);
+
+      break;
+    }
 
 // LIR_Op3
     case lir_idiv:
@@ -1670,6 +1709,8 @@
      case lir_tan:                   s = "tan";           break;
      case lir_log:                   s = "log";           break;
      case lir_log10:                 s = "log10";         break;
+     case lir_exp:                   s = "exp";           break;
+     case lir_pow:                   s = "pow";           break;
      case lir_logic_and:             s = "logic_and";     break;
      case lir_logic_or:              s = "logic_or";      break;
      case lir_logic_xor:             s = "logic_xor";     break;
@@ -1892,7 +1933,11 @@
   }
   in_opr1()->print(out);    out->print(" ");
   in_opr2()->print(out);    out->print(" ");
-  if (tmp_opr()->is_valid()) { tmp_opr()->print(out);    out->print(" "); }
+  if (tmp1_opr()->is_valid()) { tmp1_opr()->print(out);    out->print(" "); }
+  if (tmp2_opr()->is_valid()) { tmp2_opr()->print(out);    out->print(" "); }
+  if (tmp3_opr()->is_valid()) { tmp3_opr()->print(out);    out->print(" "); }
+  if (tmp4_opr()->is_valid()) { tmp4_opr()->print(out);    out->print(" "); }
+  if (tmp5_opr()->is_valid()) { tmp5_opr()->print(out);    out->print(" "); }
   result_opr()->print(out);
 }
 
--- a/src/share/vm/c1/c1_LIR.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_LIR.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #define SHARE_VM_C1_C1_LIR_HPP
 
 #include "c1/c1_ValueType.hpp"
+#include "oops/methodOop.hpp"
 
 class BlockBegin;
 class BlockList;
@@ -916,6 +917,8 @@
       , lir_tan
       , lir_log
       , lir_log10
+      , lir_exp
+      , lir_pow
       , lir_logic_and
       , lir_logic_or
       , lir_logic_xor
@@ -1160,8 +1163,9 @@
     return
       is_invokedynamic()  // An invokedynamic is always a MethodHandle call site.
       ||
-      (method()->holder()->name() == ciSymbol::java_lang_invoke_MethodHandle() &&
-       methodOopDesc::is_method_handle_invoke_name(method()->name()->sid()));
+      method()->is_compiled_lambda_form()  // Java-generated adapter
+      ||
+      method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
   }
 
   intptr_t vtable_offset() const {
@@ -1560,7 +1564,11 @@
   LIR_Opr   _opr1;
   LIR_Opr   _opr2;
   BasicType _type;
-  LIR_Opr   _tmp;
+  LIR_Opr   _tmp1;
+  LIR_Opr   _tmp2;
+  LIR_Opr   _tmp3;
+  LIR_Opr   _tmp4;
+  LIR_Opr   _tmp5;
   LIR_Condition _condition;
 
   void verify() const;
@@ -1573,7 +1581,11 @@
     , _type(T_ILLEGAL)
     , _condition(condition)
     , _fpu_stack_size(0)
-    , _tmp(LIR_OprFact::illegalOpr) {
+    , _tmp1(LIR_OprFact::illegalOpr)
+    , _tmp2(LIR_OprFact::illegalOpr)
+    , _tmp3(LIR_OprFact::illegalOpr)
+    , _tmp4(LIR_OprFact::illegalOpr)
+    , _tmp5(LIR_OprFact::illegalOpr) {
     assert(code == lir_cmp, "code check");
   }
 
@@ -1584,7 +1596,11 @@
     , _type(type)
     , _condition(condition)
     , _fpu_stack_size(0)
-    , _tmp(LIR_OprFact::illegalOpr) {
+    , _tmp1(LIR_OprFact::illegalOpr)
+    , _tmp2(LIR_OprFact::illegalOpr)
+    , _tmp3(LIR_OprFact::illegalOpr)
+    , _tmp4(LIR_OprFact::illegalOpr)
+    , _tmp5(LIR_OprFact::illegalOpr) {
     assert(code == lir_cmove, "code check");
     assert(type != T_ILLEGAL, "cmove should have type");
   }
@@ -1597,25 +1613,38 @@
     , _type(type)
     , _condition(lir_cond_unknown)
     , _fpu_stack_size(0)
-    , _tmp(LIR_OprFact::illegalOpr) {
+    , _tmp1(LIR_OprFact::illegalOpr)
+    , _tmp2(LIR_OprFact::illegalOpr)
+    , _tmp3(LIR_OprFact::illegalOpr)
+    , _tmp4(LIR_OprFact::illegalOpr)
+    , _tmp5(LIR_OprFact::illegalOpr) {
     assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
   }
 
-  LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp)
+  LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
+          LIR_Opr tmp3 = LIR_OprFact::illegalOpr, LIR_Opr tmp4 = LIR_OprFact::illegalOpr, LIR_Opr tmp5 = LIR_OprFact::illegalOpr)
     : LIR_Op(code, result, NULL)
     , _opr1(opr1)
     , _opr2(opr2)
     , _type(T_ILLEGAL)
     , _condition(lir_cond_unknown)
     , _fpu_stack_size(0)
-    , _tmp(tmp) {
+    , _tmp1(tmp1)
+    , _tmp2(tmp2)
+    , _tmp3(tmp3)
+    , _tmp4(tmp4)
+    , _tmp5(tmp5) {
     assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
   }
 
   LIR_Opr in_opr1() const                        { return _opr1; }
   LIR_Opr in_opr2() const                        { return _opr2; }
   BasicType type()  const                        { return _type; }
-  LIR_Opr tmp_opr() const                        { return _tmp; }
+  LIR_Opr tmp1_opr() const                       { return _tmp1; }
+  LIR_Opr tmp2_opr() const                       { return _tmp2; }
+  LIR_Opr tmp3_opr() const                       { return _tmp3; }
+  LIR_Opr tmp4_opr() const                       { return _tmp4; }
+  LIR_Opr tmp5_opr() const                       { return _tmp5; }
   LIR_Condition condition() const  {
     assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); return _condition;
   }
@@ -1796,18 +1825,20 @@
 
  private:
   ciMethod* _profiled_method;
-  int _profiled_bci;
-  LIR_Opr _mdo;
-  LIR_Opr _recv;
-  LIR_Opr _tmp1;
-  ciKlass* _known_holder;
+  int       _profiled_bci;
+  ciMethod* _profiled_callee;
+  LIR_Opr   _mdo;
+  LIR_Opr   _recv;
+  LIR_Opr   _tmp1;
+  ciKlass*  _known_holder;
 
  public:
   // Destroys recv
-  LIR_OpProfileCall(LIR_Code code, ciMethod* profiled_method, int profiled_bci, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* known_holder)
+  LIR_OpProfileCall(LIR_Code code, ciMethod* profiled_method, int profiled_bci, ciMethod* profiled_callee, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* known_holder)
     : LIR_Op(code, LIR_OprFact::illegalOpr, NULL)  // no result, no info
     , _profiled_method(profiled_method)
     , _profiled_bci(profiled_bci)
+    , _profiled_callee(profiled_callee)
     , _mdo(mdo)
     , _recv(recv)
     , _tmp1(t1)
@@ -1815,6 +1846,7 @@
 
   ciMethod* profiled_method() const              { return _profiled_method;  }
   int       profiled_bci()    const              { return _profiled_bci;     }
+  ciMethod* profiled_callee() const              { return _profiled_callee;  }
   LIR_Opr   mdo()             const              { return _mdo;              }
   LIR_Opr   recv()            const              { return _recv;             }
   LIR_Opr   tmp1()            const              { return _tmp1;             }
@@ -2025,6 +2057,8 @@
   void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); }
   void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); }
   void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); }
+  void exp (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5)                { append(new LIR_Op2(lir_exp , from, tmp1, to, tmp2, tmp3, tmp4, tmp5)); }
+  void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); }
 
   void add (LIR_Opr left, LIR_Opr right, LIR_Opr res)      { append(new LIR_Op2(lir_add, left, right, res)); }
   void sub (LIR_Opr left, LIR_Opr right, LIR_Opr res, CodeEmitInfo* info = NULL) { append(new LIR_Op2(lir_sub, left, right, res, info)); }
@@ -2116,8 +2150,8 @@
                   CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub,
                   ciMethod* profiled_method, int profiled_bci);
   // methodDataOop profiling
-  void profile_call(ciMethod* method, int bci, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* cha_klass) {
-    append(new LIR_OpProfileCall(lir_profile_call, method, bci, mdo, recv, t1, cha_klass));
+  void profile_call(ciMethod* method, int bci, ciMethod* callee, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* cha_klass) {
+    append(new LIR_OpProfileCall(lir_profile_call, method, bci, callee, mdo, recv, t1, cha_klass));
   }
 };
 
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -448,10 +448,10 @@
 
   switch (op->code()) {
   case lir_static_call:
+  case lir_dynamic_call:
     call(op, relocInfo::static_call_type);
     break;
   case lir_optvirtual_call:
-  case lir_dynamic_call:
     call(op, relocInfo::opt_virtual_call_type);
     break;
   case lir_icvirtual_call:
@@ -460,7 +460,9 @@
   case lir_virtual_call:
     vtable_call(op);
     break;
-  default: ShouldNotReachHere();
+  default:
+    fatal(err_msg_res("unexpected op code: %s", op->name()));
+    break;
   }
 
   // JSR 292
@@ -718,7 +720,7 @@
       if (op->in_opr2()->is_constant()) {
         shift_op(op->code(), op->in_opr1(), op->in_opr2()->as_constant_ptr()->as_jint(), op->result_opr());
       } else {
-        shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp_opr());
+        shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr());
       }
       break;
 
@@ -746,6 +748,8 @@
     case lir_cos:
     case lir_log:
     case lir_log10:
+    case lir_exp:
+    case lir_pow:
       intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
       break;
 
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -920,7 +920,8 @@
 
 
 LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
-  assert(type2size[t] == type2size[value->type()], "size mismatch");
+  assert(type2size[t] == type2size[value->type()],
+         err_msg_res("size mismatch: t=%s, value->type()=%s", type2name(t), type2name(value->type())));
   if (!value->is_register()) {
     // force into a register
     LIR_Opr r = new_register(value->type());
@@ -1242,6 +1243,36 @@
               NULL   /* info */);
 }
 
+// Example: clazz.isInstance(object)
+void LIRGenerator::do_isInstance(Intrinsic* x) {
+  assert(x->number_of_arguments() == 2, "wrong type");
+
+  // TODO could try to substitute this node with an equivalent InstanceOf
+  // if clazz is known to be a constant Class. This will pick up newly found
+  // constants after HIR construction. I'll leave this to a future change.
+
+  // as a first cut, make a simple leaf call to runtime to stay platform independent.
+  // could follow the aastore example in a future change.
+
+  LIRItem clazz(x->argument_at(0), this);
+  LIRItem object(x->argument_at(1), this);
+  clazz.load_item();
+  object.load_item();
+  LIR_Opr result = rlock_result(x);
+
+  // need to perform null check on clazz
+  if (x->needs_null_check()) {
+    CodeEmitInfo* info = state_for(x);
+    __ null_check(clazz.result(), info);
+  }
+
+  LIR_Opr call_result = call_runtime(clazz.value(), object.value(),
+                                     CAST_FROM_FN_PTR(address, Runtime1::is_instance_of),
+                                     x->type(),
+                                     NULL); // NULL CodeEmitInfo results in a leaf call
+  __ move(call_result, result);
+}
+
 // Example: object.getClass ()
 void LIRGenerator::do_getClass(Intrinsic* x) {
   assert(x->number_of_arguments() == 1, "wrong type");
@@ -1910,6 +1941,14 @@
 }
 
 
+void LIRGenerator::do_TypeCast(TypeCast* x) {
+  LIRItem value(x->obj(), this);
+  value.load_item();
+  // the result is the same as from the node we are casting
+  set_result(x, value.result());
+}
+
+
 void LIRGenerator::do_Throw(Throw* x) {
   LIRItem exception(x->exception(), this);
   exception.load_item();
@@ -2138,9 +2177,9 @@
   off.load_item();
   src.load_item();
 
-  LIR_Opr reg = rlock_result(x, x->basic_type());
-
-  get_Object_unsafe(reg, src.result(), off.result(), type, x->is_volatile());
+  LIR_Opr value = rlock_result(x, x->basic_type());
+
+  get_Object_unsafe(value, src.result(), off.result(), type, x->is_volatile());
 
 #ifndef SERIALGC
   // We might be reading the value of the referent field of a
@@ -2153,19 +2192,16 @@
   // if (offset == java_lang_ref_Reference::referent_offset) {
   //   if (src != NULL) {
   //     if (klass(src)->reference_type() != REF_NONE) {
-  //       pre_barrier(..., reg, ...);
+  //       pre_barrier(..., value, ...);
   //     }
   //   }
   // }
-  //
-  // The first non-constant check of either the offset or
-  // the src operand will be done here; the remainder
-  // will take place in the generated code stub.
 
   if (UseG1GC && type == T_OBJECT) {
-    bool gen_code_stub = true;       // Assume we need to generate the slow code stub.
-    bool gen_offset_check = true;       // Assume the code stub has to generate the offset guard.
-    bool gen_source_check = true;       // Assume the code stub has to check the src object for null.
+    bool gen_pre_barrier = true;     // Assume we need to generate pre_barrier.
+    bool gen_offset_check = true;    // Assume we need to generate the offset guard.
+    bool gen_source_check = true;    // Assume we need to check the src object for null.
+    bool gen_type_check = true;      // Assume we need to check the reference_type.
 
     if (off.is_constant()) {
       jlong off_con = (off.type()->is_int() ?
@@ -2177,7 +2213,7 @@
         // The constant offset is something other than referent_offset.
         // We can skip generating/checking the remaining guards and
         // skip generation of the code stub.
-        gen_code_stub = false;
+        gen_pre_barrier = false;
       } else {
         // The constant offset is the same as referent_offset -
         // we do not need to generate a runtime offset check.
@@ -2186,11 +2222,11 @@
     }
 
     // We don't need to generate stub if the source object is an array
-    if (gen_code_stub && src.type()->is_array()) {
-      gen_code_stub = false;
+    if (gen_pre_barrier && src.type()->is_array()) {
+      gen_pre_barrier = false;
     }
 
-    if (gen_code_stub) {
+    if (gen_pre_barrier) {
       // We still need to continue with the checks.
       if (src.is_constant()) {
         ciObject* src_con = src.get_jobject_constant();
@@ -2198,7 +2234,7 @@
         if (src_con->is_null_object()) {
           // The constant src object is null - We can skip
           // generating the code stub.
-          gen_code_stub = false;
+          gen_pre_barrier = false;
         } else {
           // Non-null constant source object. We still have to generate
           // the slow stub - but we don't need to generate the runtime
@@ -2207,20 +2243,28 @@
         }
       }
     }
-
-    if (gen_code_stub) {
-      // Temoraries.
-      LIR_Opr src_klass = new_register(T_OBJECT);
-
-      // Get the thread pointer for the pre-barrier
-      LIR_Opr thread = getThreadPointer();
-
-      CodeStub* stub;
+    if (gen_pre_barrier && !PatchALot) {
+      // Can the klass of object be statically determined to be
+      // a sub-class of Reference?
+      ciType* type = src.value()->declared_type();
+      if ((type != NULL) && type->is_loaded()) {
+        if (type->is_subtype_of(compilation()->env()->Reference_klass())) {
+          gen_type_check = false;
+        } else if (type->is_klass() &&
+                   !compilation()->env()->Object_klass()->is_subtype_of(type->as_klass())) {
+          // Not Reference and not Object klass.
+          gen_pre_barrier = false;
+        }
+      }
+    }
+
+    if (gen_pre_barrier) {
+      LabelObj* Lcont = new LabelObj();
 
       // We can have generate one runtime check here. Let's start with
       // the offset check.
       if (gen_offset_check) {
-        // if (offset == referent_offset) -> slow code stub
+        // if (offset != referent_offset) -> continue
         // If offset is an int then we can do the comparison with the
         // referent_offset constant; otherwise we need to move
         // referent_offset into a temporary register and generate
@@ -2235,43 +2279,36 @@
           referent_off = new_register(T_LONG);
           __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
         }
-
-        __ cmp(lir_cond_equal, off.result(), referent_off);
-
-        // Optionally generate "src == null" check.
-        stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    gen_source_check);
-
-        __ branch(lir_cond_equal, as_BasicType(off.type()), stub);
-      } else {
-        if (gen_source_check) {
-          // offset is a const and equals referent offset
-          // if (source != null) -> slow code stub
-          __ cmp(lir_cond_notEqual, src.result(), LIR_OprFact::oopConst(NULL));
-
-          // Since we are generating the "if src == null" guard here,
-          // there is no need to generate the "src == null" check again.
-          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    false);
-
-          __ branch(lir_cond_notEqual, T_OBJECT, stub);
-        } else {
-          // We have statically determined that offset == referent_offset
-          // && src != null so we unconditionally branch to code stub
-          // to perform the guards and record reg in the SATB log buffer.
-
-          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    false);
-
-          __ branch(lir_cond_always, T_ILLEGAL, stub);
-        }
+        __ cmp(lir_cond_notEqual, off.result(), referent_off);
+        __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label());
+      }
+      if (gen_source_check) {
+        // offset is a const and equals referent offset
+        // if (source == null) -> continue
+        __ cmp(lir_cond_equal, src.result(), LIR_OprFact::oopConst(NULL));
+        __ branch(lir_cond_equal, T_OBJECT, Lcont->label());
       }
-
-      // Continuation point
-      __ branch_destination(stub->continuation());
+      LIR_Opr src_klass = new_register(T_OBJECT);
+      if (gen_type_check) {
+        // We have determined that offset == referent_offset && src != null.
+        // if (src->_klass->_reference_type == REF_NONE) -> continue
+        __ move(new LIR_Address(src.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), src_klass);
+        LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(instanceKlass::reference_type_offset()), T_BYTE);
+        LIR_Opr reference_type = new_register(T_INT);
+        __ move(reference_type_addr, reference_type);
+        __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE));
+        __ branch(lir_cond_equal, T_INT, Lcont->label());
+      }
+      {
+        // We have determined that src->_klass->_reference_type != REF_NONE
+        // so register the value in the referent field with the pre-barrier.
+        pre_barrier(LIR_OprFact::illegalOpr /* addr_opr */,
+                    value  /* pre_val */,
+                    false  /* do_load */,
+                    false  /* patch */,
+                    NULL   /* info */);
+      }
+      __ branch_destination(Lcont->label());
     }
   }
 #endif // SERIALGC
@@ -2626,8 +2663,9 @@
 
 
 void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list) {
-  int i = (x->has_receiver() || x->is_invokedynamic()) ? 1 : 0;
-  for (; i < args->length(); i++) {
+  assert(args->length() == arg_list->length(),
+         err_msg_res("args=%d, arg_list=%d", args->length(), arg_list->length()));
+  for (int i = x->has_receiver() ? 1 : 0; i < args->length(); i++) {
     LIRItem* param = args->at(i);
     LIR_Opr loc = arg_list->at(i);
     if (loc->is_register()) {
@@ -2667,15 +2705,9 @@
     LIRItem* receiver = new LIRItem(x->receiver(), this);
     argument_items->append(receiver);
   }
-  if (x->is_invokedynamic()) {
-    // Insert a dummy for the synthetic MethodHandle argument.
-    argument_items->append(NULL);
-  }
-  int idx = x->has_receiver() ? 1 : 0;
   for (int i = 0; i < x->number_of_arguments(); i++) {
     LIRItem* param = new LIRItem(x->argument_at(i), this);
     argument_items->append(param);
-    idx += (param->type()->is_double_word() ? 2 : 1);
   }
   return argument_items;
 }
@@ -2720,9 +2752,6 @@
 
   CodeEmitInfo* info = state_for(x, x->state());
 
-  // invokedynamics can deoptimize.
-  CodeEmitInfo* deopt_info = x->is_invokedynamic() ? state_for(x, x->state_before()) : NULL;
-
   invoke_load_arguments(x, args, arg_list);
 
   if (x->has_receiver()) {
@@ -2737,7 +2766,10 @@
   // JSR 292
   // Preserve the SP over MethodHandle call sites.
   ciMethod* target = x->target();
-  if (target->is_method_handle_invoke()) {
+  bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant?
+                                  target->is_method_handle_intrinsic() ||
+                                  target->is_compiled_lambda_form());
+  if (is_method_handle_invoke) {
     info->set_is_method_handle_invoke(true);
     __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
   }
@@ -2768,41 +2800,8 @@
       }
       break;
     case Bytecodes::_invokedynamic: {
-      ciBytecodeStream bcs(x->scope()->method());
-      bcs.force_bci(x->state()->bci());
-      assert(bcs.cur_bc() == Bytecodes::_invokedynamic, "wrong stream");
-      ciCPCache* cpcache = bcs.get_cpcache();
-
-      // Get CallSite offset from constant pool cache pointer.
-      int index = bcs.get_method_index();
-      size_t call_site_offset = cpcache->get_f1_offset(index);
-
-      // Load CallSite object from constant pool cache.
-      LIR_Opr call_site = new_register(objectType);
-      __ oop2reg(cpcache->constant_encoding(), call_site);
-      __ move_wide(new LIR_Address(call_site, call_site_offset, T_OBJECT), call_site);
-
-      // If this invokedynamic call site hasn't been executed yet in
-      // the interpreter, the CallSite object in the constant pool
-      // cache is still null and we need to deoptimize.
-      if (cpcache->is_f1_null_at(index)) {
-        // Only deoptimize if the CallSite object is still null; we don't
-        // recompile methods in C1 after deoptimization so this call site
-        // might be resolved the next time we execute it after OSR.
-        DeoptimizeStub* deopt_stub = new DeoptimizeStub(deopt_info);
-        __ cmp(lir_cond_equal, call_site, LIR_OprFact::oopConst(NULL));
-        __ branch(lir_cond_equal, T_OBJECT, deopt_stub);
-      }
-
-      // Use the receiver register for the synthetic MethodHandle
-      // argument.
-      receiver = LIR_Assembler::receiverOpr();
-
-      // Load target MethodHandle from CallSite object.
-      __ load(new LIR_Address(call_site, java_lang_invoke_CallSite::target_offset_in_bytes(), T_OBJECT), receiver);
-
       __ call_dynamic(target, receiver, result_register,
-                      SharedRuntime::get_resolve_opt_virtual_call_stub(),
+                      SharedRuntime::get_resolve_static_call_stub(),
                       arg_list, info);
       break;
     }
@@ -2813,7 +2812,7 @@
 
   // JSR 292
   // Restore the SP after MethodHandle call sites.
-  if (target->is_method_handle_invoke()) {
+  if (is_method_handle_invoke) {
     __ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer());
   }
 
@@ -2877,6 +2876,50 @@
   __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
 }
 
+void LIRGenerator::do_RuntimeCall(address routine, int expected_arguments, Intrinsic* x) {
+    assert(x->number_of_arguments() == expected_arguments, "wrong type");
+    LIR_Opr reg = result_register_for(x->type());
+    __ call_runtime_leaf(routine, getThreadTemp(),
+                         reg, new LIR_OprList());
+    LIR_Opr result = rlock_result(x);
+    __ move(reg, result);
+}
+
+#ifdef TRACE_HAVE_INTRINSICS
+void LIRGenerator::do_ThreadIDIntrinsic(Intrinsic* x) {
+    LIR_Opr thread = getThreadPointer();
+    LIR_Opr osthread = new_pointer_register();
+    __ move(new LIR_Address(thread, in_bytes(JavaThread::osthread_offset()), osthread->type()), osthread);
+    size_t thread_id_size = OSThread::thread_id_size();
+    if (thread_id_size == (size_t) BytesPerLong) {
+      LIR_Opr id = new_register(T_LONG);
+      __ move(new LIR_Address(osthread, in_bytes(OSThread::thread_id_offset()), T_LONG), id);
+      __ convert(Bytecodes::_l2i, id, rlock_result(x));
+    } else if (thread_id_size == (size_t) BytesPerInt) {
+      __ move(new LIR_Address(osthread, in_bytes(OSThread::thread_id_offset()), T_INT), rlock_result(x));
+    } else {
+      ShouldNotReachHere();
+    }
+}
+
+void LIRGenerator::do_ClassIDIntrinsic(Intrinsic* x) {
+    CodeEmitInfo* info = state_for(x);
+    CodeEmitInfo* info2 = new CodeEmitInfo(info); // Clone for the second null check
+    assert(info != NULL, "must have info");
+    LIRItem arg(x->argument_at(1), this);
+    arg.load_item();
+    LIR_Opr klass = new_register(T_OBJECT);
+    __ move(new LIR_Address(arg.result(), java_lang_Class::klass_offset_in_bytes(), T_OBJECT), klass, info);
+    LIR_Opr id = new_register(T_LONG);
+    ByteSize offset = TRACE_ID_OFFSET;
+    LIR_Address* trace_id_addr = new LIR_Address(klass, in_bytes(offset), T_LONG);
+    __ move(trace_id_addr, id);
+    __ logical_or(id, LIR_OprFact::longConst(0x01l), id);
+    __ store(id, trace_id_addr);
+    __ logical_and(id, LIR_OprFact::longConst(~0x3l), id);
+    __ move(id, rlock_result(x));
+}
+#endif
 
 void LIRGenerator::do_Intrinsic(Intrinsic* x) {
   switch (x->id()) {
@@ -2888,27 +2931,24 @@
     break;
   }
 
-  case vmIntrinsics::_currentTimeMillis: {
-    assert(x->number_of_arguments() == 0, "wrong type");
-    LIR_Opr reg = result_register_for(x->type());
-    __ call_runtime_leaf(CAST_FROM_FN_PTR(address, os::javaTimeMillis), getThreadTemp(),
-                         reg, new LIR_OprList());
-    LIR_Opr result = rlock_result(x);
-    __ move(reg, result);
+#ifdef TRACE_HAVE_INTRINSICS
+  case vmIntrinsics::_threadID: do_ThreadIDIntrinsic(x); break;
+  case vmIntrinsics::_classID: do_ClassIDIntrinsic(x); break;
+  case vmIntrinsics::_counterTime:
+    do_RuntimeCall(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), 0, x);
     break;
-  }
-
-  case vmIntrinsics::_nanoTime: {
-    assert(x->number_of_arguments() == 0, "wrong type");
-    LIR_Opr reg = result_register_for(x->type());
-    __ call_runtime_leaf(CAST_FROM_FN_PTR(address, os::javaTimeNanos), getThreadTemp(),
-                         reg, new LIR_OprList());
-    LIR_Opr result = rlock_result(x);
-    __ move(reg, result);
+#endif
+
+  case vmIntrinsics::_currentTimeMillis:
+    do_RuntimeCall(CAST_FROM_FN_PTR(address, os::javaTimeMillis), 0, x);
     break;
-  }
+
+  case vmIntrinsics::_nanoTime:
+    do_RuntimeCall(CAST_FROM_FN_PTR(address, os::javaTimeNanos), 0, x);
+    break;
 
   case vmIntrinsics::_Object_init:    do_RegisterFinalizer(x); break;
+  case vmIntrinsics::_isInstance:     do_isInstance(x);    break;
   case vmIntrinsics::_getClass:       do_getClass(x);      break;
   case vmIntrinsics::_currentThread:  do_currentThread(x); break;
 
@@ -2918,7 +2958,9 @@
   case vmIntrinsics::_dsqrt:          // fall through
   case vmIntrinsics::_dtan:           // fall through
   case vmIntrinsics::_dsin :          // fall through
-  case vmIntrinsics::_dcos :          do_MathIntrinsic(x); break;
+  case vmIntrinsics::_dcos :          // fall through
+  case vmIntrinsics::_dexp :          // fall through
+  case vmIntrinsics::_dpow :          do_MathIntrinsic(x); break;
   case vmIntrinsics::_arraycopy:      do_ArrayCopy(x);     break;
 
   // java.nio.Buffer.checkIndex
@@ -2934,11 +2976,6 @@
     do_CompareAndSwap(x, longType);
     break;
 
-    // sun.misc.AtomicLongCSImpl.attemptUpdate
-  case vmIntrinsics::_attemptUpdate:
-    do_AttemptUpdate(x);
-    break;
-
   case vmIntrinsics::_Reference_get:
     do_Reference_get(x);
     break;
@@ -2959,7 +2996,7 @@
     recv = new_register(T_OBJECT);
     __ move(value.result(), recv);
   }
-  __ profile_call(x->method(), x->bci_of_invoke(), mdo, recv, tmp, x->known_holder());
+  __ profile_call(x->method(), x->bci_of_invoke(), x->callee(), mdo, recv, tmp, x->known_holder());
 }
 
 void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) {
@@ -3179,4 +3216,3 @@
     }
   }
 }
-
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -238,12 +238,12 @@
   LIR_Opr getThreadPointer();
 
   void do_RegisterFinalizer(Intrinsic* x);
+  void do_isInstance(Intrinsic* x);
   void do_getClass(Intrinsic* x);
   void do_currentThread(Intrinsic* x);
   void do_MathIntrinsic(Intrinsic* x);
   void do_ArrayCopy(Intrinsic* x);
   void do_CompareAndSwap(Intrinsic* x, ValueType* type);
-  void do_AttemptUpdate(Intrinsic* x);
   void do_NIOCheckIndex(Intrinsic* x);
   void do_FPIntrinsics(Intrinsic* x);
   void do_Reference_get(Intrinsic* x);
@@ -426,6 +426,12 @@
   SwitchRangeArray* create_lookup_ranges(LookupSwitch* x);
   void do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegin* default_sux);
 
+  void do_RuntimeCall(address routine, int expected_arguments, Intrinsic* x);
+#ifdef TRACE_HAVE_INTRINSICS
+  void do_ThreadIDIntrinsic(Intrinsic* x);
+  void do_ClassIDIntrinsic(Intrinsic* x);
+#endif
+
  public:
   Compilation*  compilation() const              { return _compilation; }
   FrameMap*     frame_map() const                { return _compilation->frame_map(); }
@@ -494,6 +500,7 @@
   virtual void do_IfOp           (IfOp*            x);
   virtual void do_Convert        (Convert*         x);
   virtual void do_NullCheck      (NullCheck*       x);
+  virtual void do_TypeCast       (TypeCast*        x);
   virtual void do_Invoke         (Invoke*          x);
   virtual void do_NewInstance    (NewInstance*     x);
   virtual void do_NewTypeArray   (NewTypeArray*    x);
--- a/src/share/vm/c1/c1_LinearScan.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1884,7 +1884,7 @@
 
   if (move_resolver.has_mappings()) {
     // insert moves after first instruction
-    move_resolver.set_insert_position(block->lir(), 1);
+    move_resolver.set_insert_position(block->lir(), 0);
     move_resolver.resolve_and_append_moves();
   }
 }
@@ -2467,12 +2467,12 @@
 // Allocate them with new so they are never destroyed (otherwise, a
 // forced exit could destroy these objects while they are still in
 // use).
-ConstantOopWriteValue* LinearScan::_oop_null_scope_value = new (ResourceObj::C_HEAP) ConstantOopWriteValue(NULL);
-ConstantIntValue*      LinearScan::_int_m1_scope_value = new (ResourceObj::C_HEAP) ConstantIntValue(-1);
-ConstantIntValue*      LinearScan::_int_0_scope_value =  new (ResourceObj::C_HEAP) ConstantIntValue(0);
-ConstantIntValue*      LinearScan::_int_1_scope_value =  new (ResourceObj::C_HEAP) ConstantIntValue(1);
-ConstantIntValue*      LinearScan::_int_2_scope_value =  new (ResourceObj::C_HEAP) ConstantIntValue(2);
-LocationValue*         _illegal_value = new (ResourceObj::C_HEAP) LocationValue(Location());
+ConstantOopWriteValue* LinearScan::_oop_null_scope_value = new (ResourceObj::C_HEAP, mtCompiler) ConstantOopWriteValue(NULL);
+ConstantIntValue*      LinearScan::_int_m1_scope_value = new (ResourceObj::C_HEAP, mtCompiler) ConstantIntValue(-1);
+ConstantIntValue*      LinearScan::_int_0_scope_value =  new (ResourceObj::C_HEAP, mtCompiler) ConstantIntValue(0);
+ConstantIntValue*      LinearScan::_int_1_scope_value =  new (ResourceObj::C_HEAP, mtCompiler) ConstantIntValue(1);
+ConstantIntValue*      LinearScan::_int_2_scope_value =  new (ResourceObj::C_HEAP, mtCompiler) ConstantIntValue(2);
+LocationValue*         _illegal_value = new (ResourceObj::C_HEAP, mtCompiler) LocationValue(Location());
 
 void LinearScan::init_compute_debug_info() {
   // cache for frequently used scope values
@@ -6579,6 +6579,8 @@
         case lir_abs:
         case lir_log10:
         case lir_log:
+        case lir_pow:
+        case lir_exp:
         case lir_logic_and:
         case lir_logic_or:
         case lir_logic_xor:
--- a/src/share/vm/c1/c1_Optimizer.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Optimizer.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -478,6 +478,7 @@
   void do_IfOp           (IfOp*            x);
   void do_Convert        (Convert*         x);
   void do_NullCheck      (NullCheck*       x);
+  void do_TypeCast       (TypeCast*        x);
   void do_Invoke         (Invoke*          x);
   void do_NewInstance    (NewInstance*     x);
   void do_NewTypeArray   (NewTypeArray*    x);
@@ -648,6 +649,7 @@
 void NullCheckVisitor::do_IfOp           (IfOp*            x) {}
 void NullCheckVisitor::do_Convert        (Convert*         x) {}
 void NullCheckVisitor::do_NullCheck      (NullCheck*       x) { nce()->handle_NullCheck(x); }
+void NullCheckVisitor::do_TypeCast       (TypeCast*        x) {}
 void NullCheckVisitor::do_Invoke         (Invoke*          x) { nce()->handle_Invoke(x); }
 void NullCheckVisitor::do_NewInstance    (NewInstance*     x) { nce()->handle_NewInstance(x); }
 void NullCheckVisitor::do_NewTypeArray   (NewTypeArray*    x) { nce()->handle_NewArray(x); }
--- a/src/share/vm/c1/c1_Runtime1.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -294,7 +294,11 @@
   FUNCTION_CASE(entry, SharedRuntime::lrem);
   FUNCTION_CASE(entry, SharedRuntime::dtrace_method_entry);
   FUNCTION_CASE(entry, SharedRuntime::dtrace_method_exit);
+  FUNCTION_CASE(entry, is_instance_of);
   FUNCTION_CASE(entry, trace_block_entry);
+#ifdef TRACE_HAVE_INTRINSICS
+  FUNCTION_CASE(entry, TRACE_TIME_METHOD);
+#endif
 
 #undef FUNCTION_CASE
 
@@ -1267,6 +1271,19 @@
 JRT_END
 
 
+JRT_LEAF(int, Runtime1::is_instance_of(oopDesc* mirror, oopDesc* obj))
+  // had to return int instead of bool, otherwise there may be a mismatch
+  // between the C calling convention and the Java one.
+  // e.g., on x86, GCC may clear only %al when returning a bool false, but
+  // JVM takes the whole %eax as the return value, which may misinterpret
+  // the return value as a boolean true.
+
+  assert(mirror != NULL, "should null-check on mirror before calling");
+  klassOop k = java_lang_Class::as_klassOop(mirror);
+  return (k != NULL && obj != NULL && obj->is_a(k)) ? 1 : 0;
+JRT_END
+
+
 #ifndef PRODUCT
 void Runtime1::print_statistics() {
   tty->print_cr("C1 Runtime statistics:");
--- a/src/share/vm/c1/c1_Runtime1.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_Runtime1.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -186,6 +186,7 @@
   static int  arraycopy(oopDesc* src, int src_pos, oopDesc* dst, int dst_pos, int length);
   static void primitive_arraycopy(HeapWord* src, HeapWord* dst, int length);
   static void oop_arraycopy(HeapWord* src, HeapWord* dst, int length);
+  static int  is_instance_of(oopDesc* mirror, oopDesc* obj);
 
   static void print_statistics()                 PRODUCT_RETURN;
 };
--- a/src/share/vm/c1/c1_ValueMap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_ValueMap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -178,6 +178,7 @@
   void do_IfOp           (IfOp*            x) { /* nothing to do */ }
   void do_Convert        (Convert*         x) { /* nothing to do */ }
   void do_NullCheck      (NullCheck*       x) { /* nothing to do */ }
+  void do_TypeCast       (TypeCast*        x) { /* nothing to do */ }
   void do_NewInstance    (NewInstance*     x) { /* nothing to do */ }
   void do_NewTypeArray   (NewTypeArray*    x) { /* nothing to do */ }
   void do_NewObjectArray (NewObjectArray*  x) { /* nothing to do */ }
--- a/src/share/vm/c1/c1_ValueStack.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_ValueStack.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -195,6 +195,7 @@
 
 void ValueStack::print() {
   scope()->method()->print_name();
+  tty->cr();
   if (stack_is_empty()) {
     tty->print_cr("empty stack");
   } else {
--- a/src/share/vm/c1/c1_ValueStack.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_ValueStack.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -142,6 +142,10 @@
     return x;
   }
 
+  void stack_at_put(int i, Value x) {
+    _stack.at_put(i, x);
+  }
+
   // pinning support
   void pin_stack_for_linear_scan();
 
--- a/src/share/vm/c1/c1_ValueType.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_ValueType.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -101,6 +101,23 @@
 ciObject* InstanceConstant::constant_value() const                 { return _value; }
 ciObject* ClassConstant::constant_value() const                    { return _value; }
 
+ciType* ObjectConstant::exact_type() const {
+  ciObject* c = constant_value();
+  return (c != NULL && !c->is_null_object()) ? c->klass() : NULL;
+}
+ciType* ArrayConstant::exact_type() const {
+  ciObject* c = constant_value();
+  return (c != NULL && !c->is_null_object()) ? c->klass() : NULL;
+}
+ciType* InstanceConstant::exact_type() const {
+  ciObject* c = constant_value();
+  return (c != NULL && !c->is_null_object()) ? c->klass() : NULL;
+}
+ciType* ClassConstant::exact_type() const {
+  ciObject* c = constant_value();
+  return (c != NULL && !c->is_null_object()) ? c->klass() : NULL;
+}
+
 
 ValueType* as_ValueType(BasicType type) {
   switch (type) {
--- a/src/share/vm/c1/c1_ValueType.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_ValueType.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -297,7 +297,8 @@
   virtual const char tchar() const               { return 'a'; }
   virtual const char* name() const               { return "object"; }
   virtual ObjectType* as_ObjectType()            { return this; }
-  virtual ciObject* constant_value() const       { ShouldNotReachHere(); return NULL;  }
+  virtual ciObject* constant_value() const       { ShouldNotReachHere(); return NULL; }
+  virtual ciType* exact_type() const             { return NULL; }
   bool is_loaded() const;
   jobject encoding() const;
 };
@@ -315,6 +316,7 @@
   virtual bool is_constant() const               { return true; }
   virtual ObjectConstant* as_ObjectConstant()    { return this; }
   virtual ciObject* constant_value() const;
+  virtual ciType* exact_type() const;
 };
 
 
@@ -334,9 +336,9 @@
   ciArray* value() const                         { return _value; }
 
   virtual bool is_constant() const               { return true; }
-
   virtual ArrayConstant* as_ArrayConstant()      { return this; }
   virtual ciObject* constant_value() const;
+  virtual ciType* exact_type() const;
 };
 
 
@@ -356,9 +358,9 @@
   ciInstance* value() const                      { return _value; }
 
   virtual bool is_constant() const               { return true; }
-
   virtual InstanceConstant* as_InstanceConstant(){ return this; }
   virtual ciObject* constant_value() const;
+  virtual ciType* exact_type() const;
 };
 
 
@@ -378,9 +380,9 @@
   ciInstanceKlass* value() const                 { return _value; }
 
   virtual bool is_constant() const               { return true; }
-
   virtual ClassConstant* as_ClassConstant()      { return this; }
   virtual ciObject* constant_value() const;
+  virtual ciType* exact_type() const;
 };
 
 
--- a/src/share/vm/c1/c1_globals.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/c1/c1_globals.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -156,18 +156,12 @@
   develop(bool, CanonicalizeNodes, true,                                    \
           "Canonicalize graph nodes")                                       \
                                                                             \
-  develop(bool, CanonicalizeExperimental, false,                            \
-          "Canonicalize graph nodes, experimental code")                    \
-                                                                            \
   develop(bool, PrintCanonicalization, false,                               \
           "Print graph node canonicalization")                              \
                                                                             \
   develop(bool, UseTableRanges, true,                                       \
           "Faster versions of lookup table using ranges")                   \
                                                                             \
-  develop(bool, UseFastExceptionHandling, true,                             \
-          "Faster handling of exceptions")                                  \
-                                                                            \
   develop_pd(bool, RoundFPResults,                                          \
           "Indicates whether rounding is needed for floating point results")\
                                                                             \
@@ -224,9 +218,6 @@
   develop(bool, PinAllInstructions, false,                                  \
           "All instructions are pinned")                                    \
                                                                             \
-  develop(bool, ValueStackPinStackAll, true,                                \
-          "Pinning in ValueStack pin everything")                           \
-                                                                            \
   develop(bool, UseFastNewInstance, true,                                   \
           "Use fast inlined instance allocation")                           \
                                                                             \
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -236,11 +236,17 @@
   ciInstanceKlass* callee_holder = ciEnv::get_instance_klass_for_declared_method_holder(holder);
   ciInstanceKlass* actual_recv = callee_holder;
 
-  // some methods are obviously bindable without any type checks so
-  // convert them directly to an invokespecial.
-  if (target->is_loaded() && !target->is_abstract() &&
-      target->can_be_statically_bound() && code == Bytecodes::_invokevirtual) {
-    code = Bytecodes::_invokespecial;
+  // Some methods are obviously bindable without any type checks so
+  // convert them directly to an invokespecial or invokestatic.
+  if (target->is_loaded() && !target->is_abstract() && target->can_be_statically_bound()) {
+    switch (code) {
+    case Bytecodes::_invokevirtual:
+      code = Bytecodes::_invokespecial;
+      break;
+    case Bytecodes::_invokehandle:
+      code = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokespecial;
+      break;
+    }
   }
 
   // compute size of arguments
@@ -824,8 +830,8 @@
         break;
       case Bytecodes::_getstatic:
       case Bytecodes::_getfield:
-        { bool will_link;
-          ciField* field = s.get_field(will_link);
+        { bool ignored_will_link;
+          ciField* field = s.get_field(ignored_will_link);
           BasicType field_type = field->type()->basic_type();
           if (s.cur_bc() != Bytecodes::_getstatic) {
             set_method_escape(state.apop());
@@ -863,11 +869,21 @@
       case Bytecodes::_invokestatic:
       case Bytecodes::_invokedynamic:
       case Bytecodes::_invokeinterface:
-        { bool will_link;
-          ciMethod* target = s.get_method(will_link);
-          ciKlass* holder = s.get_declared_method_holder();
-          invoke(state, s.cur_bc(), target, holder);
-          ciType* return_type = target->return_type();
+        { bool ignored_will_link;
+          ciSignature* declared_signature = NULL;
+          ciMethod* target = s.get_method(ignored_will_link, &declared_signature);
+          ciKlass*  holder = s.get_declared_method_holder();
+          assert(declared_signature != NULL, "cannot be null");
+          // Push appendix argument, if one.
+          if (s.has_appendix()) {
+            state.apush(unknown_obj);
+          }
+          // Pass in raw bytecode because we need to see invokehandle instructions.
+          invoke(state, s.cur_bc_raw(), target, holder);
+          // We are using the return type of the declared signature here because
+          // it might be a more concrete type than the one from the target (for
+          // e.g. invokedynamic and invokehandle).
+          ciType* return_type = declared_signature->return_type();
           if (!return_type->is_primitive_type()) {
             state.apush(unknown_obj);
           } else if (return_type->is_one_word()) {
--- a/src/share/vm/ci/ciClassList.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciClassList.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -47,6 +47,7 @@
 class   ciNullObject;
 class   ciInstance;
 class     ciCallSite;
+class     ciMemberName;
 class     ciMethodHandle;
 class   ciMethod;
 class   ciMethodData;
@@ -100,6 +101,7 @@
 friend class ciObject;                 \
 friend class ciNullObject;             \
 friend class ciInstance;               \
+friend class ciMemberName;             \
 friend class ciMethod;                 \
 friend class ciMethodData;             \
 friend class ciMethodHandle;           \
--- a/src/share/vm/ci/ciEnv.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciEnv.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -50,7 +50,6 @@
 #include "oops/oop.inline.hpp"
 #include "oops/oop.inline2.hpp"
 #include "prims/jvmtiExport.hpp"
-#include "prims/methodHandleWalk.hpp"
 #include "runtime/init.hpp"
 #include "runtime/reflection.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -582,7 +581,7 @@
     assert(index < 0, "only one kind of index at a time");
     ConstantPoolCacheEntry* cpc_entry = cpool->cache()->entry_at(cache_index);
     index = cpc_entry->constant_pool_index();
-    oop obj = cpc_entry->f1();
+    oop obj = cpc_entry->f1_as_instance();
     if (obj != NULL) {
       assert(obj->is_instance() || obj->is_array(), "must be a Java reference");
       ciObject* ciobj = get_object(obj);
@@ -739,88 +738,81 @@
 ciMethod* ciEnv::get_method_by_index_impl(constantPoolHandle cpool,
                                           int index, Bytecodes::Code bc,
                                           ciInstanceKlass* accessor) {
-  int holder_index = cpool->klass_ref_index_at(index);
-  bool holder_is_accessible;
-  ciKlass* holder = get_klass_by_index_impl(cpool, holder_index, holder_is_accessible, accessor);
-  ciInstanceKlass* declared_holder = get_instance_klass_for_declared_method_holder(holder);
+  if (bc == Bytecodes::_invokedynamic) {
+    ConstantPoolCacheEntry* secondary_entry = cpool->cache()->secondary_entry_at(index);
+    const bool is_resolved = !secondary_entry->is_f1_null();
+    // FIXME: code generation could allow for null (unlinked) call site
+    // The call site could be made patchable as follows:
+    // Load the appendix argument from the constant pool.
+    // Test the appendix argument and jump to a known deopt routine if it is null.
+    // Jump through a patchable call site, which is initially a deopt routine.
+    // Patch the call site to the nmethod entry point of the static compiled lambda form.
+    // As with other two-component call sites, both values must be independently verified.
 
-  // Get the method's name and signature.
-  Symbol* name_sym = cpool->name_ref_at(index);
-  Symbol* sig_sym  = cpool->signature_ref_at(index);
+    if (is_resolved) {
+      // Get the invoker methodOop and the extra argument from the constant pool.
+      methodOop adapter = secondary_entry->f2_as_vfinal_method();
+      return get_object(adapter)->as_method();
+    }
 
-  if (cpool->has_preresolution()
-      || (holder == ciEnv::MethodHandle_klass() &&
-          methodOopDesc::is_method_handle_invoke_name(name_sym))) {
-    // Short-circuit lookups for JSR 292-related call sites.
-    // That is, do not rely only on name-based lookups, because they may fail
-    // if the names are not resolvable in the boot class loader (7056328).
-    switch (bc) {
-    case Bytecodes::_invokevirtual:
-    case Bytecodes::_invokeinterface:
-    case Bytecodes::_invokespecial:
-    case Bytecodes::_invokestatic:
-      {
-        methodOop m = constantPoolOopDesc::method_at_if_loaded(cpool, index, bc);
-        if (m != NULL) {
-          return get_object(m)->as_method();
+    // Fake a method that is equivalent to a declared method.
+    ciInstanceKlass* holder    = get_object(SystemDictionary::MethodHandle_klass())->as_instance_klass();
+    ciSymbol*        name      = ciSymbol::invokeBasic_name();
+    ciSymbol*        signature = get_symbol(cpool->signature_ref_at(index));
+    return get_unloaded_method(holder, name, signature, accessor);
+  } else {
+    const int holder_index = cpool->klass_ref_index_at(index);
+    bool holder_is_accessible;
+    ciKlass* holder = get_klass_by_index_impl(cpool, holder_index, holder_is_accessible, accessor);
+    ciInstanceKlass* declared_holder = get_instance_klass_for_declared_method_holder(holder);
+
+    // Get the method's name and signature.
+    Symbol* name_sym = cpool->name_ref_at(index);
+    Symbol* sig_sym  = cpool->signature_ref_at(index);
+
+    if (cpool->has_preresolution()
+        || (holder == ciEnv::MethodHandle_klass() &&
+            MethodHandles::is_signature_polymorphic_name(holder->get_klassOop(), name_sym))) {
+      // Short-circuit lookups for JSR 292-related call sites.
+      // That is, do not rely only on name-based lookups, because they may fail
+      // if the names are not resolvable in the boot class loader (7056328).
+      switch (bc) {
+      case Bytecodes::_invokevirtual:
+      case Bytecodes::_invokeinterface:
+      case Bytecodes::_invokespecial:
+      case Bytecodes::_invokestatic:
+        {
+          methodOop m = constantPoolOopDesc::method_at_if_loaded(cpool, index);
+          if (m != NULL) {
+            return get_object(m)->as_method();
+          }
         }
+        break;
       }
     }
-  }
-
-  if (holder_is_accessible) { // Our declared holder is loaded.
-    instanceKlass* lookup = declared_holder->get_instanceKlass();
-    methodOop m = lookup_method(accessor->get_instanceKlass(), lookup, name_sym, sig_sym, bc);
-    if (m != NULL &&
-        (bc == Bytecodes::_invokestatic
-         ?  instanceKlass::cast(m->method_holder())->is_not_initialized()
-         : !instanceKlass::cast(m->method_holder())->is_loaded())) {
-      m = NULL;
-    }
-    if (m != NULL) {
-      // We found the method.
-      return get_object(m)->as_method();
-    }
-  }
-
-  // Either the declared holder was not loaded, or the method could
-  // not be found.  Create a dummy ciMethod to represent the failed
-  // lookup.
-  ciSymbol* name      = get_symbol(name_sym);
-  ciSymbol* signature = get_symbol(sig_sym);
-  return get_unloaded_method(declared_holder, name, signature, accessor);
-}
-
 
-// ------------------------------------------------------------------
-// ciEnv::get_fake_invokedynamic_method_impl
-ciMethod* ciEnv::get_fake_invokedynamic_method_impl(constantPoolHandle cpool,
-                                                    int index, Bytecodes::Code bc,
-                                                    ciInstanceKlass* accessor) {
-  // Compare the following logic with InterpreterRuntime::resolve_invokedynamic.
-  assert(bc == Bytecodes::_invokedynamic, "must be invokedynamic");
-
-  bool is_resolved = cpool->cache()->main_entry_at(index)->is_resolved(bc);
-  if (is_resolved && cpool->cache()->secondary_entry_at(index)->is_f1_null())
-    // FIXME: code generation could allow for null (unlinked) call site
-    is_resolved = false;
+    if (holder_is_accessible) {  // Our declared holder is loaded.
+      instanceKlass* lookup = declared_holder->get_instanceKlass();
+      methodOop m = lookup_method(accessor->get_instanceKlass(), lookup, name_sym, sig_sym, bc);
+      if (m != NULL &&
+          (bc == Bytecodes::_invokestatic
+           ?  instanceKlass::cast(m->method_holder())->is_not_initialized()
+           : !instanceKlass::cast(m->method_holder())->is_loaded())) {
+        m = NULL;
+      }
+      if (m != NULL) {
+        // We found the method.
+        return get_object(m)->as_method();
+      }
+    }
 
-  // Call site might not be resolved yet.  We could create a real invoker method from the
-  // compiler, but it is simpler to stop the code path here with an unlinked method.
-  if (!is_resolved) {
-    ciInstanceKlass* holder    = get_object(SystemDictionary::MethodHandle_klass())->as_instance_klass();
-    ciSymbol*        name      = ciSymbol::invokeExact_name();
-    ciSymbol*        signature = get_symbol(cpool->signature_ref_at(index));
-    return get_unloaded_method(holder, name, signature, accessor);
+    // Either the declared holder was not loaded, or the method could
+    // not be found.  Create a dummy ciMethod to represent the failed
+    // lookup.
+    ciSymbol* name      = get_symbol(name_sym);
+    ciSymbol* signature = get_symbol(sig_sym);
+    return get_unloaded_method(declared_holder, name, signature, accessor);
   }
-
-  // Get the invoker methodOop from the constant pool.
-  oop f1_value = cpool->cache()->main_entry_at(index)->f1();
-  methodOop signature_invoker = (methodOop) f1_value;
-  assert(signature_invoker != NULL && signature_invoker->is_method() && signature_invoker->is_method_handle_invoke(),
-         "correct result from LinkResolver::resolve_invokedynamic");
-
-  return get_object(signature_invoker)->as_method();
 }
 
 
@@ -851,11 +843,7 @@
 ciMethod* ciEnv::get_method_by_index(constantPoolHandle cpool,
                                      int index, Bytecodes::Code bc,
                                      ciInstanceKlass* accessor) {
-  if (bc == Bytecodes::_invokedynamic) {
-    GUARDED_VM_ENTRY(return get_fake_invokedynamic_method_impl(cpool, index, bc, accessor);)
-  } else {
-    GUARDED_VM_ENTRY(return get_method_by_index_impl(          cpool, index, bc, accessor);)
-  }
+  GUARDED_VM_ENTRY(return get_method_by_index_impl(cpool, index, bc, accessor);)
 }
 
 
@@ -1131,7 +1119,7 @@
 // ------------------------------------------------------------------
 // ciEnv::notice_inlined_method()
 void ciEnv::notice_inlined_method(ciMethod* method) {
-  _num_inlined_bytecodes += method->code_size();
+  _num_inlined_bytecodes += method->code_size_for_inlining();
 }
 
 // ------------------------------------------------------------------
--- a/src/share/vm/ci/ciEnv.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciEnv.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -152,9 +152,6 @@
   ciMethod*  get_method_by_index_impl(constantPoolHandle cpool,
                                       int method_index, Bytecodes::Code bc,
                                       ciInstanceKlass* loading_klass);
-  ciMethod*  get_fake_invokedynamic_method_impl(constantPoolHandle cpool,
-                                                int index, Bytecodes::Code bc,
-                                                ciInstanceKlass* accessor);
 
   // Helper methods
   bool       check_klass_accessibility(ciKlass* accessing_klass,
--- a/src/share/vm/ci/ciInstanceKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciInstanceKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -59,10 +59,7 @@
   _has_nonstatic_fields = ik->has_nonstatic_fields();
   _nonstatic_fields = NULL; // initialized lazily by compute_nonstatic_fields:
 
-  _nof_implementors = ik->nof_implementors();
-  for (int i = 0; i < implementors_limit; i++) {
-    _implementors[i] = NULL;  // we will fill these lazily
-  }
+  _implementor = NULL; // we will fill these lazily
 
   Thread *thread = Thread::current();
   if (ciObjectFactory::is_initialized()) {
@@ -102,7 +99,6 @@
   _nonstatic_field_size = -1;
   _has_nonstatic_fields = false;
   _nonstatic_fields = NULL;
-  _nof_implementors = -1;
   _loader = loader;
   _protection_domain = protection_domain;
   _is_shared = false;
@@ -133,17 +129,6 @@
 }
 
 // ------------------------------------------------------------------
-// ciInstanceKlass::compute_shared_nof_implementors
-int ciInstanceKlass::compute_shared_nof_implementors() {
-  // We requery this property, since it is a very old ciObject.
-  GUARDED_VM_ENTRY(
-    instanceKlass* ik = get_instanceKlass();
-    _nof_implementors = ik->nof_implementors();
-    return _nof_implementors;
-  )
-}
-
-// ------------------------------------------------------------------
 // ciInstanceKlass::loader
 oop ciInstanceKlass::loader() {
   ASSERT_IN_VM;
@@ -540,7 +525,7 @@
   if (is_shared()) {
     return is_final();  // approximately correct
   } else {
-    return !_has_subklass && (_nof_implementors == 0);
+    return !_has_subklass && (nof_implementors() == 0);
   }
 }
 
@@ -548,35 +533,31 @@
 // ciInstanceKlass::implementor
 //
 // Report an implementor of this interface.
-// Returns NULL if exact information is not available.
 // Note that there are various races here, since my copy
 // of _nof_implementors might be out of date with respect
 // to results returned by instanceKlass::implementor.
 // This is OK, since any dependencies we decide to assert
 // will be checked later under the Compile_lock.
-ciInstanceKlass* ciInstanceKlass::implementor(int n) {
-  if (n >= implementors_limit) {
-    return NULL;
-  }
-  ciInstanceKlass* impl = _implementors[n];
+ciInstanceKlass* ciInstanceKlass::implementor() {
+  ciInstanceKlass* impl = _implementor;
   if (impl == NULL) {
-    if (_nof_implementors > implementors_limit) {
-      return NULL;
-    }
     // Go into the VM to fetch the implementor.
     {
       VM_ENTRY_MARK;
-      klassOop k = get_instanceKlass()->implementor(n);
+      klassOop k = get_instanceKlass()->implementor();
       if (k != NULL) {
-        impl = CURRENT_THREAD_ENV->get_object(k)->as_instance_klass();
+        if (k == get_instanceKlass()->as_klassOop()) {
+          // More than one implementors. Use 'this' in this case.
+          impl = this;
+        } else {
+          impl = CURRENT_THREAD_ENV->get_object(k)->as_instance_klass();
+        }
       }
     }
     // Memoize this result.
     if (!is_shared()) {
-      _implementors[n] = (impl == NULL)? this: impl;
+      _implementor = impl;
     }
-  } else if (impl == this) {
-    impl = NULL;  // memoized null result from a VM query
   }
   return impl;
 }
--- a/src/share/vm/ci/ciInstanceKlass.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciInstanceKlass.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,9 +65,11 @@
   ciConstantPoolCache*   _field_cache;  // cached map index->field
   GrowableArray<ciField*>* _nonstatic_fields;
 
-  enum { implementors_limit = instanceKlass::implementors_limit };
-  ciInstanceKlass*       _implementors[implementors_limit];
-  jint                   _nof_implementors;
+  // The possible values of the _implementor fall into following three cases:
+  //   NULL: no implementor.
+  //   A ciInstanceKlass that's not itself: one implementor.
+  //   Itsef: more than one implementors.
+  ciInstanceKlass*       _implementor;
 
   GrowableArray<ciField*>* _non_static_fields;
 
@@ -97,7 +99,6 @@
 
   void compute_shared_init_state();
   bool compute_shared_has_subklass();
-  int  compute_shared_nof_implementors();
   int  compute_nonstatic_fields();
   GrowableArray<ciField*>* compute_nonstatic_fields_impl(GrowableArray<ciField*>* super_fields);
 
@@ -158,10 +159,17 @@
     assert(is_loaded(), "must be loaded");
     return _nonstatic_oop_map_size; }
   ciInstanceKlass*       super();
-  jint                   nof_implementors()  {
+  jint                   nof_implementors() {
+    ciInstanceKlass* impl;
     assert(is_loaded(), "must be loaded");
-    if (_is_shared)  return compute_shared_nof_implementors();
-    return _nof_implementors;
+    impl = implementor();
+    if (impl == NULL) {
+      return 0;
+    } else if (impl != this) {
+      return 1;
+    } else {
+      return 2;
+    }
   }
 
   ciInstanceKlass* get_canonical_holder(int offset);
@@ -207,7 +215,7 @@
   // but consider adding to vmSymbols.hpp instead.
 
   bool is_leaf_type();
-  ciInstanceKlass* implementor(int n);
+  ciInstanceKlass* implementor();
 
   // Is the defining class loader of this class the default loader?
   bool uses_default_loader();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/ci/ciMemberName.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "ci/ciClassList.hpp"
+#include "ci/ciMemberName.hpp"
+#include "ci/ciUtilities.hpp"
+#include "classfile/javaClasses.hpp"
+
+// ------------------------------------------------------------------
+// ciMemberName::get_vmtarget
+//
+// Return: MN.vmtarget
+ciMethod* ciMemberName::get_vmtarget() const {
+  VM_ENTRY_MARK;
+  oop vmtarget_oop = java_lang_invoke_MemberName::vmtarget(get_oop());
+  return CURRENT_ENV->get_object(vmtarget_oop)->as_method();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/ci/ciMemberName.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CI_CIMEMBERNAME_HPP
+#define SHARE_VM_CI_CIMEMBERNAME_HPP
+
+#include "ci/ciCallProfile.hpp"
+#include "ci/ciInstance.hpp"
+
+// ciMemberName
+//
+// The class represents a java.lang.invoke.MemberName object.
+class ciMemberName : public ciInstance {
+public:
+  ciMemberName(instanceHandle h_i) : ciInstance(h_i) {}
+
+  // What kind of ciObject is this?
+  bool is_member_name() const { return true; }
+
+  ciMethod* get_vmtarget() const;
+};
+
+#endif // SHARE_VM_CI_CIMEMBERNAME_HPP
--- a/src/share/vm/ci/ciMethod.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciMethod.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -79,7 +79,7 @@
   _max_locals         = h_m()->max_locals();
   _code_size          = h_m()->code_size();
   _intrinsic_id       = h_m()->intrinsic_id();
-  _handler_count      = h_m()->exception_table()->length() / 4;
+  _handler_count      = h_m()->exception_table_length();
   _uses_monitors      = h_m()->access_flags().has_monitor_bytecodes();
   _balanced_monitors  = !_uses_monitors || h_m()->access_flags().is_monitor_matching();
   _is_c1_compilable   = !h_m()->is_not_c1_compilable();
@@ -198,7 +198,7 @@
   }
 
   // And load the exception table.
-  typeArrayOop exc_table = me->exception_table();
+  ExceptionTable exc_table(me);
 
   // Allocate one extra spot in our list of exceptions.  This
   // last entry will be used to represent the possibility that
@@ -209,13 +209,12 @@
                                          * (_handler_count + 1));
   if (_handler_count > 0) {
     for (int i=0; i<_handler_count; i++) {
-      int base = i*4;
       _exception_handlers[i] = new (arena) ciExceptionHandler(
                                 holder(),
-            /* start    */      exc_table->int_at(base),
-            /* limit    */      exc_table->int_at(base+1),
-            /* goto pc  */      exc_table->int_at(base+2),
-            /* cp index */      exc_table->int_at(base+3));
+            /* start    */      exc_table.start_pc(i),
+            /* limit    */      exc_table.end_pc(i),
+            /* goto pc  */      exc_table.handler_pc(i),
+            /* cp index */      exc_table.catch_type_index(i));
     }
   }
 
@@ -770,39 +769,37 @@
 // invokedynamic support
 
 // ------------------------------------------------------------------
-// ciMethod::is_method_handle_invoke
+// ciMethod::is_method_handle_intrinsic
 //
-// Return true if the method is an instance of one of the two
-// signature-polymorphic MethodHandle methods, invokeExact or invokeGeneric.
-bool ciMethod::is_method_handle_invoke() const {
-  if (!is_loaded()) {
-    bool flag = (holder()->name() == ciSymbol::java_lang_invoke_MethodHandle() &&
-                 methodOopDesc::is_method_handle_invoke_name(name()->sid()));
-    return flag;
-  }
-  VM_ENTRY_MARK;
-  return get_methodOop()->is_method_handle_invoke();
+// Return true if the method is an instance of the JVM-generated
+// signature-polymorphic MethodHandle methods, _invokeBasic, _linkToVirtual, etc.
+bool ciMethod::is_method_handle_intrinsic() const {
+  vmIntrinsics::ID iid = _intrinsic_id;  // do not check if loaded
+  return (MethodHandles::is_signature_polymorphic(iid) &&
+          MethodHandles::is_signature_polymorphic_intrinsic(iid));
 }
 
 // ------------------------------------------------------------------
-// ciMethod::is_method_handle_adapter
+// ciMethod::is_compiled_lambda_form
 //
 // Return true if the method is a generated MethodHandle adapter.
-// These are built by MethodHandleCompiler.
-bool ciMethod::is_method_handle_adapter() const {
-  if (!is_loaded())  return false;
-  VM_ENTRY_MARK;
-  return get_methodOop()->is_method_handle_adapter();
+// These are built by Java code.
+bool ciMethod::is_compiled_lambda_form() const {
+  vmIntrinsics::ID iid = _intrinsic_id;  // do not check if loaded
+  return iid == vmIntrinsics::_compiledLambdaForm;
 }
 
-ciInstance* ciMethod::method_handle_type() {
-  check_is_loaded();
-  VM_ENTRY_MARK;
-  oop mtype = get_methodOop()->method_handle_type();
-  return CURRENT_THREAD_ENV->get_object(mtype)->as_instance();
+// ------------------------------------------------------------------
+// ciMethod::has_member_arg
+//
+// Return true if the method is a linker intrinsic like _linkToVirtual.
+// These are built by the JVM.
+bool ciMethod::has_member_arg() const {
+  vmIntrinsics::ID iid = _intrinsic_id;  // do not check if loaded
+  return (MethodHandles::is_signature_polymorphic(iid) &&
+          MethodHandles::has_member_arg(iid));
 }
 
-
 // ------------------------------------------------------------------
 // ciMethod::ensure_method_data
 //
@@ -1025,28 +1022,13 @@
 // ------------------------------------------------------------------
 // ciMethod::code_size_for_inlining
 //
-// Code size for inlining decisions.
-//
-// Don't fully count method handle adapters against inlining budgets:
-// the metric we use here is the number of call sites in the adapter
-// as they are probably the instructions which generate some code.
+// Code size for inlining decisions.  This method returns a code
+// size of 1 for methods which has the ForceInline annotation.
 int ciMethod::code_size_for_inlining() {
   check_is_loaded();
-
-  // Method handle adapters
-  if (is_method_handle_adapter()) {
-    // Count call sites
-    int call_site_count = 0;
-    ciBytecodeStream iter(this);
-    while (iter.next() != ciBytecodeStream::EOBC()) {
-      if (Bytecodes::is_invoke(iter.cur_bc())) {
-        call_site_count++;
-      }
-    }
-    return call_site_count;
+  if (get_methodOop()->force_inline()) {
+    return 1;
   }
-
-  // Normal method
   return code_size();
 }
 
@@ -1128,7 +1110,8 @@
     constantPoolHandle pool (THREAD, get_methodOop()->constants());
     methodHandle spec_method;
     KlassHandle  spec_klass;
-    LinkResolver::resolve_method(spec_method, spec_klass, pool, refinfo_index, THREAD);
+    Bytecodes::Code code = (is_static ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual);
+    LinkResolver::resolve_method_statically(spec_method, spec_klass, code, pool, refinfo_index, THREAD);
     if (HAS_PENDING_EXCEPTION) {
       CLEAR_PENDING_EXCEPTION;
       return false;
@@ -1208,8 +1191,16 @@
 //
 // Print the name of this method, without signature.
 void ciMethod::print_short_name(outputStream* st) {
-  check_is_loaded();
-  GUARDED_VM_ENTRY(get_methodOop()->print_short_name(st);)
+  if (is_loaded()) {
+    GUARDED_VM_ENTRY(get_methodOop()->print_short_name(st););
+  } else {
+    // Fall back if method is not loaded.
+    holder()->print_name_on(st);
+    st->print("::");
+    name()->print_symbol_on(st);
+    if (WizardMode)
+      signature()->as_symbol()->print_symbol_on(st);
+  }
 }
 
 // ------------------------------------------------------------------
@@ -1225,7 +1216,9 @@
   st->print(" signature=");
   signature()->as_symbol()->print_symbol_on(st);
   if (is_loaded()) {
-    st->print(" loaded=true flags=");
+    st->print(" loaded=true");
+    st->print(" arg_size=%d", arg_size());
+    st->print(" flags=");
     flags().print_member_flags(st);
   } else {
     st->print(" loaded=false");
--- a/src/share/vm/ci/ciMethod.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciMethod.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -133,16 +133,20 @@
     return _signature->size() + (_flags.is_static() ? 0 : 1);
   }
   // Report the number of elements on stack when invoking this method.
-  // This is different than the regular arg_size because invokdynamic
+  // This is different than the regular arg_size because invokedynamic
   // has an implicit receiver.
   int invoke_arg_size(Bytecodes::Code code) const {
-    int arg_size = _signature->size();
-    // Add a receiver argument, maybe:
-    if (code != Bytecodes::_invokestatic &&
-        code != Bytecodes::_invokedynamic) {
-      arg_size++;
+    if (is_loaded()) {
+      return arg_size();
+    } else {
+      int arg_size = _signature->size();
+      // Add a receiver argument, maybe:
+      if (code != Bytecodes::_invokestatic &&
+          code != Bytecodes::_invokedynamic) {
+        arg_size++;
+      }
+      return arg_size;
     }
-    return arg_size;
   }
 
 
@@ -160,6 +164,9 @@
   // Code size for inlining decisions.
   int code_size_for_inlining();
 
+  bool force_inline() { return get_methodOop()->force_inline(); }
+  bool dont_inline()  { return get_methodOop()->dont_inline();  }
+
   int comp_level();
   int highest_osr_comp_level();
 
@@ -256,9 +263,9 @@
   int scale_count(int count, float prof_factor = 1.);  // make MDO count commensurate with IIC
 
   // JSR 292 support
-  bool is_method_handle_invoke()  const;
-  bool is_method_handle_adapter() const;
-  ciInstance* method_handle_type();
+  bool is_method_handle_intrinsic()  const;
+  bool is_compiled_lambda_form() const;
+  bool has_member_arg() const;
 
   // What kind of ciObject is this?
   bool is_method()                               { return true; }
--- a/src/share/vm/ci/ciMethodHandle.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciMethodHandle.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -24,84 +24,18 @@
 
 #include "precompiled.hpp"
 #include "ci/ciClassList.hpp"
-#include "ci/ciInstance.hpp"
-#include "ci/ciMethodData.hpp"
 #include "ci/ciMethodHandle.hpp"
 #include "ci/ciUtilities.hpp"
-#include "prims/methodHandleWalk.hpp"
-#include "prims/methodHandles.hpp"
-
-// ciMethodHandle
+#include "classfile/javaClasses.hpp"
 
 // ------------------------------------------------------------------
-// ciMethodHandle::get_adapter
+// ciMethodHandle::get_vmtarget
 //
-// Return an adapter for this MethodHandle.
-ciMethod* ciMethodHandle::get_adapter_impl(bool is_invokedynamic) {
+// Return: MH.form -> LF.vmentry -> MN.vmtarget
+ciMethod* ciMethodHandle::get_vmtarget() const {
   VM_ENTRY_MARK;
-  Handle h(get_oop());
-  methodHandle callee(_callee->get_methodOop());
-  assert(callee->is_method_handle_invoke(), "");
-  oop mt1 = callee->method_handle_type();
-  oop mt2 = java_lang_invoke_MethodHandle::type(h());
-  if (!java_lang_invoke_MethodType::equals(mt1, mt2)) {
-    if (PrintMiscellaneous && (Verbose || WizardMode)) {
-      tty->print_cr("ciMethodHandle::get_adapter: types not equal");
-      mt1->print(); mt2->print();
-    }
-    return NULL;
-  }
-  // We catch all exceptions here that could happen in the method
-  // handle compiler and stop the VM.
-  MethodHandleCompiler mhc(h, callee->name(), callee->signature(), _profile.count(), is_invokedynamic, THREAD);
-  if (!HAS_PENDING_EXCEPTION) {
-    methodHandle m = mhc.compile(THREAD);
-    if (!HAS_PENDING_EXCEPTION) {
-      return CURRENT_ENV->get_object(m())->as_method();
-    }
-  }
-  if (PrintMiscellaneous && (Verbose || WizardMode)) {
-    tty->print("*** ciMethodHandle::get_adapter => ");
-    PENDING_EXCEPTION->print();
-    tty->print("*** get_adapter (%s): ", is_invokedynamic ? "indy" : "mh"); ((ciObject*)this)->print();
-  }
-  CLEAR_PENDING_EXCEPTION;
-  return NULL;
+  oop form_oop     = java_lang_invoke_MethodHandle::form(get_oop());
+  oop vmentry_oop  = java_lang_invoke_LambdaForm::vmentry(form_oop);
+  oop vmtarget_oop = java_lang_invoke_MemberName::vmtarget(vmentry_oop);
+  return CURRENT_ENV->get_object(vmtarget_oop)->as_method();
 }
-
-// ------------------------------------------------------------------
-// ciMethodHandle::get_adapter
-//
-// Return an adapter for this MethodHandle.
-ciMethod* ciMethodHandle::get_adapter(bool is_invokedynamic) {
-  ciMethod* result = get_adapter_impl(is_invokedynamic);
-  if (result) {
-    // Fake up the MDO maturity.
-    ciMethodData* mdo = result->method_data();
-    if (mdo != NULL && _caller->method_data() != NULL && _caller->method_data()->is_mature()) {
-      mdo->set_mature();
-    }
-  }
-  return result;
-}
-
-
-#ifdef ASSERT
-// ------------------------------------------------------------------
-// ciMethodHandle::print_chain_impl
-//
-// Implementation of the print method.
-void ciMethodHandle::print_chain_impl() {
-  ASSERT_IN_VM;
-  MethodHandleChain::print(get_oop());
-}
-
-
-// ------------------------------------------------------------------
-// ciMethodHandle::print_chain
-//
-// Implementation of the print_chain method.
-void ciMethodHandle::print_chain() {
-  GUARDED_VM_ENTRY(print_chain_impl(););
-}
-#endif
--- a/src/share/vm/ci/ciMethodHandle.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciMethodHandle.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,61 +25,20 @@
 #ifndef SHARE_VM_CI_CIMETHODHANDLE_HPP
 #define SHARE_VM_CI_CIMETHODHANDLE_HPP
 
-#include "ci/ciCallProfile.hpp"
+#include "ci/ciClassList.hpp"
 #include "ci/ciInstance.hpp"
-#include "prims/methodHandles.hpp"
 
 // ciMethodHandle
 //
 // The class represents a java.lang.invoke.MethodHandle object.
 class ciMethodHandle : public ciInstance {
-private:
-  ciMethod*      _callee;
-  ciMethod*      _caller;
-  ciCallProfile  _profile;
-  ciMethod*      _method_handle_adapter;
-  ciMethod*      _invokedynamic_adapter;
-
-  // Return an adapter for this MethodHandle.
-  ciMethod* get_adapter_impl(bool is_invokedynamic);
-  ciMethod* get_adapter(     bool is_invokedynamic);
-
-protected:
-  void print_chain_impl() NOT_DEBUG_RETURN;
-
 public:
-  ciMethodHandle(instanceHandle h_i) :
-    ciInstance(h_i),
-    _callee(NULL),
-    _caller(NULL),
-    _method_handle_adapter(NULL),
-    _invokedynamic_adapter(NULL)
-  {}
+  ciMethodHandle(instanceHandle h_i) : ciInstance(h_i) {}
 
   // What kind of ciObject is this?
   bool is_method_handle() const { return true; }
 
-  void set_callee(ciMethod* m)                  { _callee  = m;       }
-  void set_caller(ciMethod* m)                  { _caller  = m;       }
-  void set_call_profile(ciCallProfile profile)  { _profile = profile; }
-
-  // Return an adapter for a MethodHandle call.
-  ciMethod* get_method_handle_adapter() {
-    if (_method_handle_adapter == NULL) {
-      _method_handle_adapter = get_adapter(false);
-    }
-    return _method_handle_adapter;
-  }
-
-  // Return an adapter for an invokedynamic call.
-  ciMethod* get_invokedynamic_adapter() {
-    if (_invokedynamic_adapter == NULL) {
-      _invokedynamic_adapter = get_adapter(true);
-    }
-    return _invokedynamic_adapter;
-  }
-
-  void print_chain() NOT_DEBUG_RETURN;
+  ciMethod* get_vmtarget() const;
 };
 
 #endif // SHARE_VM_CI_CIMETHODHANDLE_HPP
--- a/src/share/vm/ci/ciObject.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciObject.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -138,13 +138,14 @@
   jobject constant_encoding();
 
   // What kind of ciObject is this?
-  virtual bool is_null_object() const       { return false; }
-  virtual bool is_call_site() const         { return false; }
-  virtual bool is_cpcache() const           { return false; }
+  virtual bool is_null_object()       const { return false; }
+  virtual bool is_call_site()         const { return false; }
+  virtual bool is_cpcache()           const { return false; }
   virtual bool is_instance()                { return false; }
+  virtual bool is_member_name()       const { return false; }
   virtual bool is_method()                  { return false; }
   virtual bool is_method_data()             { return false; }
-  virtual bool is_method_handle() const     { return false; }
+  virtual bool is_method_handle()     const { return false; }
   virtual bool is_array()                   { return false; }
   virtual bool is_obj_array()               { return false; }
   virtual bool is_type_array()              { return false; }
@@ -208,6 +209,10 @@
     assert(is_instance(), "bad cast");
     return (ciInstance*)this;
   }
+  ciMemberName*            as_member_name() {
+    assert(is_member_name(), "bad cast");
+    return (ciMemberName*)this;
+  }
   ciMethod*                as_method() {
     assert(is_method(), "bad cast");
     return (ciMethod*)this;
@@ -290,7 +295,8 @@
   }
 
   // Print debugging output about this ciObject.
-  void print(outputStream* st = tty);
+  void print(outputStream* st);
+  void print() { print(tty); }  // GDB cannot handle default arguments
 
   // Print debugging output about the oop this ciObject represents.
   void print_oop(outputStream* st = tty);
--- a/src/share/vm/ci/ciObjectFactory.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -28,6 +28,7 @@
 #include "ci/ciInstance.hpp"
 #include "ci/ciInstanceKlass.hpp"
 #include "ci/ciInstanceKlassKlass.hpp"
+#include "ci/ciMemberName.hpp"
 #include "ci/ciMethod.hpp"
 #include "ci/ciMethodData.hpp"
 #include "ci/ciMethodHandle.hpp"
@@ -111,7 +112,7 @@
   // This Arena is long lived and exists in the resource mark of the
   // compiler thread that initializes the initial ciObjectFactory which
   // creates the shared ciObjects that all later ciObjectFactories use.
-  Arena* arena = new Arena();
+  Arena* arena = new (mtCompiler) Arena();
   ciEnv initial(arena);
   ciEnv* env = ciEnv::current();
   env->_factory->init_shared_objects();
@@ -344,6 +345,8 @@
     instanceHandle h_i(THREAD, (instanceOop)o);
     if (java_lang_invoke_CallSite::is_instance(o))
       return new (arena()) ciCallSite(h_i);
+    else if (java_lang_invoke_MemberName::is_instance(o))
+      return new (arena()) ciMemberName(h_i);
     else if (java_lang_invoke_MethodHandle::is_instance(o))
       return new (arena()) ciMethodHandle(h_i);
     else
--- a/src/share/vm/ci/ciSignature.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciSignature.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -39,10 +39,11 @@
   ciKlass*  _accessing_klass;
 
   GrowableArray<ciType*>* _types;
-  int _size;
-  int _count;
+  int _size;   // number of stack slots required for arguments
+  int _count;  // number of parameter types in the signature
 
   friend class ciMethod;
+  friend class ciBytecodeStream;
   friend class ciObjectFactory;
 
   ciSignature(ciKlass* accessing_klass, constantPoolHandle cpool, ciSymbol* signature);
--- a/src/share/vm/ci/ciStreams.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciStreams.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -355,12 +355,47 @@
 // ciBytecodeStream::get_method
 //
 // If this is a method invocation bytecode, get the invoked method.
-ciMethod* ciBytecodeStream::get_method(bool& will_link) {
+// Additionally return the declared signature to get more concrete
+// type information if required (Cf. invokedynamic and invokehandle).
+ciMethod* ciBytecodeStream::get_method(bool& will_link, ciSignature* *declared_signature_result) {
+  VM_ENTRY_MARK;
+  ciEnv* env = CURRENT_ENV;
+  constantPoolHandle cpool(_method->get_methodOop()->constants());
+  ciMethod* m = env->get_method_by_index(cpool, get_method_index(), cur_bc(), _holder);
+  will_link = m->is_loaded();
+  // Get declared method signature and return it.
+  if (has_optional_appendix()) {
+    const int sig_index = get_method_signature_index();
+    Symbol* sig_sym = cpool->symbol_at(sig_index);
+    ciKlass* pool_holder = env->get_object(cpool->pool_holder())->as_klass();
+    (*declared_signature_result) = new (env->arena()) ciSignature(pool_holder, cpool, env->get_symbol(sig_sym));
+  } else {
+    (*declared_signature_result) = m->signature();
+  }
+  return m;
+}
+
+// ------------------------------------------------------------------
+// ciBytecodeStream::has_appendix
+//
+// Returns true if there is an appendix argument stored in the
+// constant pool cache at the current bci.
+bool ciBytecodeStream::has_appendix() {
   VM_ENTRY_MARK;
   constantPoolHandle cpool(_method->get_methodOop()->constants());
-  ciMethod* m = CURRENT_ENV->get_method_by_index(cpool, get_method_index(), cur_bc(), _holder);
-  will_link = m->is_loaded();
-  return m;
+  return constantPoolOopDesc::has_appendix_at_if_loaded(cpool, get_method_index());
+}
+
+// ------------------------------------------------------------------
+// ciBytecodeStream::get_appendix
+//
+// Return the appendix argument stored in the constant pool cache at
+// the current bci.
+ciObject* ciBytecodeStream::get_appendix() {
+  VM_ENTRY_MARK;
+  constantPoolHandle cpool(_method->get_methodOop()->constants());
+  oop appendix_oop = constantPoolOopDesc::appendix_at_if_loaded(cpool, get_method_index());
+  return CURRENT_ENV->get_object(appendix_oop);
 }
 
 // ------------------------------------------------------------------
@@ -378,9 +413,9 @@
   VM_ENTRY_MARK;
   constantPoolHandle cpool(_method->get_methodOop()->constants());
   bool ignore;
-  // report as InvokeDynamic for invokedynamic, which is syntactically classless
+  // report as MethodHandle for invokedynamic, which is syntactically classless
   if (cur_bc() == Bytecodes::_invokedynamic)
-    return CURRENT_ENV->get_klass_by_name(_holder, ciSymbol::java_lang_invoke_InvokeDynamic(), false);
+    return CURRENT_ENV->get_klass_by_name(_holder, ciSymbol::java_lang_invoke_MethodHandle(), false);
   return CURRENT_ENV->get_klass_by_index(cpool, get_method_holder_index(), ignore, _holder);
 }
 
@@ -402,11 +437,12 @@
 // referenced by the current bytecode.  Used for generating
 // deoptimization information.
 int ciBytecodeStream::get_method_signature_index() {
-  VM_ENTRY_MARK;
-  constantPoolOop cpool = _holder->get_instanceKlass()->constants();
-  int method_index = get_method_index();
-  int name_and_type_index = cpool->name_and_type_ref_index_at(method_index);
-  return cpool->signature_ref_index_at(name_and_type_index);
+  GUARDED_VM_ENTRY(
+    constantPoolOop cpool = _holder->get_instanceKlass()->constants();
+    const int method_index = get_method_index();
+    const int name_and_type_index = cpool->name_and_type_ref_index_at(method_index);
+    return cpool->signature_ref_index_at(name_and_type_index);
+  )
 }
 
 // ------------------------------------------------------------------
@@ -434,7 +470,7 @@
   // Get the CallSite from the constant pool cache.
   int method_index = get_method_index();
   ConstantPoolCacheEntry* cpcache_entry = cpcache->secondary_entry_at(method_index);
-  oop call_site_oop = cpcache_entry->f1();
+  oop call_site_oop = cpcache_entry->f1_as_instance();
 
   // Create a CallSite object and return it.
   return CURRENT_ENV->get_object(call_site_oop)->as_call_site();
--- a/src/share/vm/ci/ciStreams.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciStreams.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -151,6 +151,8 @@
   // Does this instruction contain an index which refes into the CP cache?
   bool has_cache_index() const { return Bytecodes::uses_cp_cache(cur_bc_raw()); }
 
+  bool has_optional_appendix() { return Bytecodes::has_optional_appendix(cur_bc_raw()); }
+
   int get_index_u1() const {
     return bytecode().get_index_u1(cur_bc_raw());
   }
@@ -257,8 +259,9 @@
   int      get_field_holder_index();
   int      get_field_signature_index();
 
-  // If this is a method invocation bytecode, get the invoked method.
-  ciMethod* get_method(bool& will_link);
+  ciMethod* get_method(bool& will_link, ciSignature* *declared_signature_result);
+  bool      has_appendix();
+  ciObject* get_appendix();
   ciKlass*  get_declared_method_holder();
   int       get_method_holder_index();
   int       get_method_signature_index();
--- a/src/share/vm/ci/ciSymbol.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciSymbol.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -83,6 +83,10 @@
   GUARDED_VM_ENTRY(return get_symbol()->starts_with(prefix, len);)
 }
 
+bool ciSymbol::is_signature_polymorphic_name()  const {
+  GUARDED_VM_ENTRY(return MethodHandles::is_signature_polymorphic_name(get_symbol());)
+}
+
 // ------------------------------------------------------------------
 // ciSymbol::index_of
 //
--- a/src/share/vm/ci/ciSymbol.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciSymbol.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -107,6 +107,8 @@
 
   // Are two ciSymbols equal?
   bool equals(ciSymbol* obj) { return this->_symbol == obj->get_symbol(); }
+
+  bool is_signature_polymorphic_name() const;
 };
 
 #endif // SHARE_VM_CI_CISYMBOL_HPP
--- a/src/share/vm/ci/ciTypeFlow.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/ci/ciTypeFlow.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -645,7 +645,9 @@
 void ciTypeFlow::StateVector::do_invoke(ciBytecodeStream* str,
                                         bool has_receiver) {
   bool will_link;
-  ciMethod* method = str->get_method(will_link);
+  ciSignature* declared_signature = NULL;
+  ciMethod* callee = str->get_method(will_link, &declared_signature);
+  assert(declared_signature != NULL, "cannot be null");
   if (!will_link) {
     // We weren't able to find the method.
     if (str->cur_bc() == Bytecodes::_invokedynamic) {
@@ -654,14 +656,16 @@
            (Deoptimization::Reason_uninitialized,
             Deoptimization::Action_reinterpret));
     } else {
-      ciKlass* unloaded_holder = method->holder();
+      ciKlass* unloaded_holder = callee->holder();
       trap(str, unloaded_holder, str->get_method_holder_index());
     }
   } else {
-    ciSignature* signature = method->signature();
-    ciSignatureStream sigstr(signature);
-    int arg_size = signature->size();
-    int stack_base = stack_size() - arg_size;
+    // We are using the declared signature here because it might be
+    // different from the callee signature (Cf. invokedynamic and
+    // invokehandle).
+    ciSignatureStream sigstr(declared_signature);
+    const int arg_size = declared_signature->size();
+    const int stack_base = stack_size() - arg_size;
     int i = 0;
     for( ; !sigstr.at_return_type(); sigstr.next()) {
       ciType* type = sigstr.type();
--- a/src/share/vm/classfile/classFileParser.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/classFileParser.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -81,7 +81,7 @@
 #define JAVA_7_VERSION                    51
 
 
-void ClassFileParser::parse_constant_pool_entries(constantPoolHandle cp, int length, TRAPS) {
+void ClassFileParser::parse_constant_pool_entries(Handle class_loader, constantPoolHandle cp, int length, TRAPS) {
   // Use a local copy of ClassFileStream. It helps the C++ compiler to optimize
   // this function (_current can be allocated in a register, with scalar
   // replacement of aggregates). The _current pointer is copied back to
@@ -272,7 +272,7 @@
             indices[names_count] = index;
             hashValues[names_count++] = hash;
             if (names_count == SymbolTable::symbol_alloc_batch_size) {
-              SymbolTable::new_symbols(cp, names_count, names, lengths, indices, hashValues, CHECK);
+              SymbolTable::new_symbols(class_loader, cp, names_count, names, lengths, indices, hashValues, CHECK);
               names_count = 0;
             }
           } else {
@@ -289,7 +289,7 @@
 
   // Allocate the remaining symbols
   if (names_count > 0) {
-    SymbolTable::new_symbols(cp, names_count, names, lengths, indices, hashValues, CHECK);
+    SymbolTable::new_symbols(class_loader, cp, names_count, names, lengths, indices, hashValues, CHECK);
   }
 
   // Copy _current pointer of local copy back to stream().
@@ -318,7 +318,14 @@
 
 bool inline valid_cp_range(int index, int length) { return (index > 0 && index < length); }
 
-constantPoolHandle ClassFileParser::parse_constant_pool(TRAPS) {
+inline Symbol* check_symbol_at(constantPoolHandle cp, int index) {
+  if (valid_cp_range(index, cp->length()) && cp->tag_at(index).is_utf8())
+    return cp->symbol_at(index);
+  else
+    return NULL;
+}
+
+constantPoolHandle ClassFileParser::parse_constant_pool(Handle class_loader, TRAPS) {
   ClassFileStream* cfs = stream();
   constantPoolHandle nullHandle;
 
@@ -337,7 +344,7 @@
   ConstantPoolCleaner cp_in_error(cp); // set constant pool to be cleaned up.
 
   // parsing constant pool entries
-  parse_constant_pool_entries(cp, length, CHECK_(nullHandle));
+  parse_constant_pool_entries(class_loader, cp, length, CHECK_(nullHandle));
 
   int index = 1;  // declared outside of loops for portability
 
@@ -902,6 +909,7 @@
                                              bool* is_synthetic_addr,
                                              u2* generic_signature_index_addr,
                                              typeArrayHandle* field_annotations,
+                                             ClassFileParser::FieldAnnotationCollector* parsed_annotations,
                                              TRAPS) {
   ClassFileStream* cfs = stream();
   assert(attributes_count > 0, "length should be greater than 0");
@@ -1082,12 +1090,36 @@
 
   int num_injected = 0;
   InjectedField* injected = JavaClasses::get_injected(class_name, &num_injected);
-
-  // Tuples of shorts [access, name index, sig index, initial value index, byte offset, generic signature index]
-  typeArrayOop new_fields = oopFactory::new_permanent_shortArray((length + num_injected) * FieldInfo::field_slots, CHECK_(nullHandle));
-  typeArrayHandle fields(THREAD, new_fields);
+  int total_fields = length + num_injected;
+
+  // The field array starts with tuples of shorts
+  // [access, name index, sig index, initial value index, byte offset].
+  // A generic signature slot only exists for field with generic
+  // signature attribute. And the access flag is set with
+  // JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE for that field. The generic
+  // signature slots are at the end of the field array and after all
+  // other fields data.
+  //
+  //   f1: [access, name index, sig index, initial value index, low_offset, high_offset]
+  //   f2: [access, name index, sig index, initial value index, low_offset, high_offset]
+  //       ...
+  //   fn: [access, name index, sig index, initial value index, low_offset, high_offset]
+  //       [generic signature index]
+  //       [generic signature index]
+  //       ...
+  //
+  // Allocate a temporary resource array for field data. For each field,
+  // a slot is reserved in the temporary array for the generic signature
+  // index. After parsing all fields, the data are copied to a permanent
+  // array and any unused slots will be discarded.
+  ResourceMark rm(THREAD);
+  u2* fa = NEW_RESOURCE_ARRAY_IN_THREAD(
+             THREAD, u2, total_fields * (FieldInfo::field_slots + 1));
 
   typeArrayHandle field_annotations;
+  // The generic signature slots start after all other fields' data.
+  int generic_signature_slot = total_fields * FieldInfo::field_slots;
+  int num_generic_signature = 0;
   for (int n = 0; n < length; n++) {
     cfs->guarantee_more(8, CHECK_(nullHandle));  // access_flags, name_index, descriptor_index, attributes_count
 
@@ -1118,12 +1150,14 @@
     bool is_synthetic = false;
     u2 generic_signature_index = 0;
     bool is_static = access_flags.is_static();
+    FieldAnnotationCollector parsed_annotations;
 
     u2 attributes_count = cfs->get_u2_fast();
     if (attributes_count > 0) {
       parse_field_attributes(cp, attributes_count, is_static, signature_index,
                              &constantvalue_index, &is_synthetic,
                              &generic_signature_index, &field_annotations,
+                             &parsed_annotations,
                              CHECK_(nullHandle));
       if (field_annotations.not_null()) {
         if (fields_annotations->is_null()) {
@@ -1135,15 +1169,22 @@
       if (is_synthetic) {
         access_flags.set_is_synthetic();
       }
+      if (generic_signature_index != 0) {
+        access_flags.set_field_has_generic_signature();
+        fa[generic_signature_slot] = generic_signature_index;
+        generic_signature_slot ++;
+        num_generic_signature ++;
+      }
     }
 
-    FieldInfo* field = FieldInfo::from_field_array(fields(), n);
+    FieldInfo* field = FieldInfo::from_field_array(fa, n);
     field->initialize(access_flags.as_short(),
                       name_index,
                       signature_index,
                       constantvalue_index,
-                      generic_signature_index,
                       0);
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(field);
 
     BasicType type = cp->basic_type_for_signature_at(signature_index);
 
@@ -1155,8 +1196,8 @@
     field->set_offset(atype);
   }
 
+  int index = length;
   if (num_injected != 0) {
-    int index = length;
     for (int n = 0; n < num_injected; n++) {
       // Check for duplicates
       if (injected[n].may_be_java) {
@@ -1164,7 +1205,7 @@
         Symbol* signature = injected[n].signature();
         bool duplicate = false;
         for (int i = 0; i < length; i++) {
-          FieldInfo* f = FieldInfo::from_field_array(fields(), i);
+          FieldInfo* f = FieldInfo::from_field_array(fa, i);
           if (name      == cp->symbol_at(f->name_index()) &&
               signature == cp->symbol_at(f->signature_index())) {
             // Symbol is desclared in Java so skip this one
@@ -1179,12 +1220,11 @@
       }
 
       // Injected field
-      FieldInfo* field = FieldInfo::from_field_array(fields(), index);
+      FieldInfo* field = FieldInfo::from_field_array(fa, index);
       field->initialize(JVM_ACC_FIELD_INTERNAL,
                         injected[n].name_index,
                         injected[n].signature_index,
                         0,
-                        0,
                         0);
 
       BasicType type = FieldType::basic_type(injected[n].signature());
@@ -1197,17 +1237,27 @@
       field->set_offset(atype);
       index++;
     }
-
-    if (index < length + num_injected) {
-      // sometimes injected fields already exist in the Java source so
-      // the fields array could be too long.  In that case trim the
-      // fields array.
-      new_fields = oopFactory::new_permanent_shortArray(index * FieldInfo::field_slots, CHECK_(nullHandle));
-      for (int i = 0; i < index * FieldInfo::field_slots; i++) {
-        new_fields->short_at_put(i, fields->short_at(i));
-      }
-      fields = new_fields;
+  }
+
+  // Now copy the fields' data from the temporary resource array.
+  // Sometimes injected fields already exist in the Java source so
+  // the fields array could be too long.  In that case the
+  // fields array is trimed. Also unused slots that were reserved
+  // for generic signature indexes are discarded.
+  typeArrayOop new_fields = oopFactory::new_permanent_shortArray(
+    index * FieldInfo::field_slots + num_generic_signature,
+    CHECK_(nullHandle));
+  typeArrayHandle fields(THREAD, new_fields);
+  {
+    int i = 0;
+    for (; i < index * FieldInfo::field_slots; i++) {
+      new_fields->short_at_put(i, fa[i]);
     }
+    for (int j = total_fields * FieldInfo::field_slots;
+         j < generic_signature_slot; j++) {
+      new_fields->short_at_put(i++, fa[j]);
+    }
+    assert(i == new_fields->length(), "");
   }
 
   if (_need_verify && length > 1) {
@@ -1246,42 +1296,38 @@
 }
 
 
-typeArrayHandle ClassFileParser::parse_exception_table(u4 code_length,
-                                                       u4 exception_table_length,
-                                                       constantPoolHandle cp,
-                                                       TRAPS) {
+u2* ClassFileParser::parse_exception_table(u4 code_length,
+                                           u4 exception_table_length,
+                                           constantPoolHandle cp,
+                                           TRAPS) {
   ClassFileStream* cfs = stream();
-  typeArrayHandle nullHandle;
-
-  // 4-tuples of ints [start_pc, end_pc, handler_pc, catch_type index]
-  typeArrayOop eh = oopFactory::new_permanent_intArray(exception_table_length*4, CHECK_(nullHandle));
-  typeArrayHandle exception_handlers = typeArrayHandle(THREAD, eh);
-
-  int index = 0;
-  cfs->guarantee_more(8 * exception_table_length, CHECK_(nullHandle)); // start_pc, end_pc, handler_pc, catch_type_index
-  for (unsigned int i = 0; i < exception_table_length; i++) {
-    u2 start_pc = cfs->get_u2_fast();
-    u2 end_pc = cfs->get_u2_fast();
-    u2 handler_pc = cfs->get_u2_fast();
-    u2 catch_type_index = cfs->get_u2_fast();
-    // Will check legal target after parsing code array in verifier.
-    if (_need_verify) {
+
+  u2* exception_table_start = cfs->get_u2_buffer();
+  assert(exception_table_start != NULL, "null exception table");
+  cfs->guarantee_more(8 * exception_table_length, CHECK_NULL); // start_pc, end_pc, handler_pc, catch_type_index
+  // Will check legal target after parsing code array in verifier.
+  if (_need_verify) {
+    for (unsigned int i = 0; i < exception_table_length; i++) {
+      u2 start_pc = cfs->get_u2_fast();
+      u2 end_pc = cfs->get_u2_fast();
+      u2 handler_pc = cfs->get_u2_fast();
+      u2 catch_type_index = cfs->get_u2_fast();
       guarantee_property((start_pc < end_pc) && (end_pc <= code_length),
-                         "Illegal exception table range in class file %s", CHECK_(nullHandle));
+                         "Illegal exception table range in class file %s",
+                         CHECK_NULL);
       guarantee_property(handler_pc < code_length,
-                         "Illegal exception table handler in class file %s", CHECK_(nullHandle));
+                         "Illegal exception table handler in class file %s",
+                         CHECK_NULL);
       if (catch_type_index != 0) {
         guarantee_property(valid_cp_range(catch_type_index, cp->length()) &&
                            is_klass_reference(cp, catch_type_index),
-                           "Catch type in exception table has bad constant type in class file %s", CHECK_(nullHandle));
+                           "Catch type in exception table has bad constant type in class file %s", CHECK_NULL);
       }
     }
-    exception_handlers->int_at_put(index++, start_pc);
-    exception_handlers->int_at_put(index++, end_pc);
-    exception_handlers->int_at_put(index++, handler_pc);
-    exception_handlers->int_at_put(index++, catch_type_index);
+  } else {
+    cfs->skip_u2_fast(exception_table_length * 4);
   }
-  return exception_handlers;
+  return exception_table_start;
 }
 
 void ClassFileParser::parse_linenumber_table(
@@ -1330,7 +1376,7 @@
 };
 
 
-class LVT_Hash: public CHeapObj {
+class LVT_Hash: public CHeapObj<mtClass> {
  public:
   LocalVariableTableElement  *_elem;  // element
   LVT_Hash*                   _next;  // Next entry in hash table
@@ -1600,12 +1646,173 @@
       name->as_C_string(), _class_name->as_C_string(), sig->as_C_string());
 }
 
+// Skip an annotation.  Return >=limit if there is any problem.
+int ClassFileParser::skip_annotation(u1* buffer, int limit, int index) {
+  // annotation := atype:u2 do(nmem:u2) {member:u2 value}
+  // value := switch (tag:u1) { ... }
+  index += 2;  // skip atype
+  if ((index += 2) >= limit)  return limit;  // read nmem
+  int nmem = Bytes::get_Java_u2(buffer+index-2);
+  while (--nmem >= 0 && index < limit) {
+    index += 2; // skip member
+    index = skip_annotation_value(buffer, limit, index);
+  }
+  return index;
+}
+
+// Skip an annotation value.  Return >=limit if there is any problem.
+int ClassFileParser::skip_annotation_value(u1* buffer, int limit, int index) {
+  // value := switch (tag:u1) {
+  //   case B, C, I, S, Z, D, F, J, c: con:u2;
+  //   case e: e_class:u2 e_name:u2;
+  //   case s: s_con:u2;
+  //   case [: do(nval:u2) {value};
+  //   case @: annotation;
+  //   case s: s_con:u2;
+  // }
+  if ((index += 1) >= limit)  return limit;  // read tag
+  u1 tag = buffer[index-1];
+  switch (tag) {
+  case 'B': case 'C': case 'I': case 'S': case 'Z':
+  case 'D': case 'F': case 'J': case 'c': case 's':
+    index += 2;  // skip con or s_con
+    break;
+  case 'e':
+    index += 4;  // skip e_class, e_name
+    break;
+  case '[':
+    {
+      if ((index += 2) >= limit)  return limit;  // read nval
+      int nval = Bytes::get_Java_u2(buffer+index-2);
+      while (--nval >= 0 && index < limit) {
+        index = skip_annotation_value(buffer, limit, index);
+      }
+    }
+    break;
+  case '@':
+    index = skip_annotation(buffer, limit, index);
+    break;
+  default:
+    assert(false, "annotation tag");
+    return limit;  //  bad tag byte
+  }
+  return index;
+}
+
+// Sift through annotations, looking for those significant to the VM:
+void ClassFileParser::parse_annotations(u1* buffer, int limit,
+                                        constantPoolHandle cp,
+                                        ClassFileParser::AnnotationCollector* coll,
+                                        TRAPS) {
+  // annotations := do(nann:u2) {annotation}
+  int index = 0;
+  if ((index += 2) >= limit)  return;  // read nann
+  int nann = Bytes::get_Java_u2(buffer+index-2);
+  enum {  // initial annotation layout
+    atype_off = 0,      // utf8 such as 'Ljava/lang/annotation/Retention;'
+    count_off = 2,      // u2   such as 1 (one value)
+    member_off = 4,     // utf8 such as 'value'
+    tag_off = 6,        // u1   such as 'c' (type) or 'e' (enum)
+    e_tag_val = 'e',
+      e_type_off = 7,   // utf8 such as 'Ljava/lang/annotation/RetentionPolicy;'
+      e_con_off = 9,    // utf8 payload, such as 'SOURCE', 'CLASS', 'RUNTIME'
+      e_size = 11,     // end of 'e' annotation
+    c_tag_val = 'c',
+      c_con_off = 7,    // utf8 payload, such as 'I' or 'Ljava/lang/String;'
+      c_size = 9,       // end of 'c' annotation
+    min_size = 6        // smallest possible size (zero members)
+  };
+  while ((--nann) >= 0 && (index-2 + min_size <= limit)) {
+    int index0 = index;
+    index = skip_annotation(buffer, limit, index);
+    u1* abase = buffer + index0;
+    int atype = Bytes::get_Java_u2(abase + atype_off);
+    int count = Bytes::get_Java_u2(abase + count_off);
+    Symbol* aname = check_symbol_at(cp, atype);
+    if (aname == NULL)  break;  // invalid annotation name
+    Symbol* member = NULL;
+    if (count >= 1) {
+      int member_index = Bytes::get_Java_u2(abase + member_off);
+      member = check_symbol_at(cp, member_index);
+      if (member == NULL)  break;  // invalid member name
+    }
+
+    // Here is where parsing particular annotations will take place.
+    AnnotationCollector::ID id = coll->annotation_index(aname);
+    if (id == AnnotationCollector::_unknown)  continue;
+    coll->set_annotation(id);
+    // If there are no values, just set the bit and move on:
+    if (count == 0)   continue;
+
+    // For the record, here is how annotation payloads can be collected.
+    // Suppose we want to capture @Retention.value.  Here is how:
+    //if (id == AnnotationCollector::_class_Retention) {
+    //  Symbol* payload = NULL;
+    //  if (count == 1
+    //      && e_size == (index0 - index)  // match size
+    //      && e_tag_val == *(abase + tag_off)
+    //      && (check_symbol_at(cp, Bytes::get_Java_u2(abase + e_type_off))
+    //          == vmSymbols::RetentionPolicy_signature())
+    //      && member == vmSymbols::value_name()) {
+    //    payload = check_symbol_at(cp, Bytes::get_Java_u2(abase + e_con_off));
+    //  }
+    //  check_property(payload != NULL,
+    //                 "Invalid @Retention annotation at offset %u in class file %s",
+    //                 index0, CHECK);
+    //  if (payload != NULL) {
+    //      payload->increment_refcount();
+    //      coll->_class_RetentionPolicy = payload;
+    //  }
+    //}
+  }
+}
+
+ClassFileParser::AnnotationCollector::ID ClassFileParser::AnnotationCollector::annotation_index(Symbol* name) {
+  vmSymbols::SID sid = vmSymbols::find_sid(name);
+  switch (sid) {
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_ForceInline_signature):
+    if (_location != _in_method)  break;  // only allow for methods
+    return _method_ForceInline;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_DontInline_signature):
+    if (_location != _in_method)  break;  // only allow for methods
+    return _method_DontInline;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_LambdaForm_Compiled_signature):
+    if (_location != _in_method)  break;  // only allow for methods
+    return _method_LambdaForm_Compiled;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_LambdaForm_Hidden_signature):
+    if (_location != _in_method)  break;  // only allow for methods
+    return _method_LambdaForm_Hidden;
+  default: break;
+  }
+  return AnnotationCollector::_unknown;
+}
+
+void ClassFileParser::FieldAnnotationCollector::apply_to(FieldInfo* f) {
+  fatal("no field annotations yet");
+}
+
+void ClassFileParser::MethodAnnotationCollector::apply_to(methodHandle m) {
+  if (has_annotation(_method_ForceInline))
+    m->set_force_inline(true);
+  if (has_annotation(_method_DontInline))
+    m->set_dont_inline(true);
+  if (has_annotation(_method_LambdaForm_Compiled) && m->intrinsic_id() == vmIntrinsics::_none)
+    m->set_intrinsic_id(vmIntrinsics::_compiledLambdaForm);
+  if (has_annotation(_method_LambdaForm_Hidden))
+    m->set_hidden(true);
+}
+
+void ClassFileParser::ClassAnnotationCollector::apply_to(instanceKlassHandle k) {
+  fatal("no class annotations yet");
+}
+
+
 #define MAX_ARGS_SIZE 255
 #define MAX_CODE_SIZE 65535
 #define INITIAL_MAX_LVT_NUMBER 256
 
 // Note: the parse_method below is big and clunky because all parsing of the code and exceptions
-// attribute is inlined. This is curbersome to avoid since we inline most of the parts in the
+// attribute is inlined. This is cumbersome to avoid since we inline most of the parts in the
 // methodOop to save footprint, so we only know the size of the resulting methodOop when the
 // entire method attribute is parsed.
 //
@@ -1674,6 +1881,7 @@
   u4 code_length = 0;
   u1* code_start = 0;
   u2 exception_table_length = 0;
+  u2* exception_table_start = NULL;
   typeArrayHandle exception_handlers(THREAD, Universe::the_empty_int_array());
   u2 checked_exceptions_length = 0;
   u2* checked_exceptions_start = NULL;
@@ -1695,6 +1903,7 @@
   // stackmap attribute - JDK1.5
   typeArrayHandle stackmap_data;
   u2 generic_signature_index = 0;
+  MethodAnnotationCollector parsed_annotations;
   u1* runtime_visible_annotations = NULL;
   int runtime_visible_annotations_length = 0;
   u1* runtime_invisible_annotations = NULL;
@@ -1760,7 +1969,7 @@
       cfs->guarantee_more(2, CHECK_(nullHandle));  // exception_table_length
       exception_table_length = cfs->get_u2_fast();
       if (exception_table_length > 0) {
-        exception_handlers =
+        exception_table_start =
               parse_exception_table(code_length, exception_table_length, cp, CHECK_(nullHandle));
       }
 
@@ -1921,6 +2130,7 @@
         runtime_visible_annotations_length = method_attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
+        parse_annotations(runtime_visible_annotations, runtime_visible_annotations_length, cp, &parsed_annotations, CHECK_(nullHandle));
         cfs->skip_u1(runtime_visible_annotations_length, CHECK_(nullHandle));
       } else if (PreserveAllAnnotations && method_attribute_name == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = method_attribute_length;
@@ -1964,9 +2174,13 @@
   }
 
   // All sizing information for a methodOop is finally available, now create it
-  methodOop m_oop  = oopFactory::new_method(code_length, access_flags, linenumber_table_length,
-                                            total_lvt_length, checked_exceptions_length,
-                                            oopDesc::IsSafeConc, CHECK_(nullHandle));
+  methodOop m_oop  = oopFactory::new_method(code_length, access_flags,
+                                            linenumber_table_length,
+                                            total_lvt_length,
+                                            exception_table_length,
+                                            checked_exceptions_length,
+                                            oopDesc::IsSafeConc,
+                                            CHECK_(nullHandle));
   methodHandle m (THREAD, m_oop);
 
   ClassLoadingService::add_class_method_size(m_oop->size()*HeapWordSize);
@@ -1997,16 +2211,15 @@
   // Fill in code attribute information
   m->set_max_stack(max_stack);
   m->set_max_locals(max_locals);
-  m->constMethod()->set_stackmap_data(stackmap_data());
 
   /**
-   * The exception_table field is the flag used to indicate
+   * The stackmap_data field is the flag used to indicate
    * that the methodOop and it's associated constMethodOop are partially
    * initialized and thus are exempt from pre/post GC verification.  Once
    * the field is set, the oops are considered fully initialized so make
    * sure that the oops can pass verification when this field is set.
    */
-  m->set_exception_table(exception_handlers());
+  m->constMethod()->set_stackmap_data(stackmap_data());
 
   // Copy byte codes
   m->set_code(code_start);
@@ -2017,6 +2230,14 @@
            linenumber_table->buffer(), linenumber_table_length);
   }
 
+  // Copy exception table
+  if (exception_table_length > 0) {
+    int size =
+      exception_table_length * sizeof(ExceptionTableElement) / sizeof(u2);
+    copy_u2_with_conversion((u2*) m->exception_table_start(),
+                             exception_table_start, size);
+  }
+
   // Copy checked exceptions
   if (checked_exceptions_length > 0) {
     int size = checked_exceptions_length * sizeof(CheckedExceptionElement) / sizeof(u2);
@@ -2098,6 +2319,8 @@
     clear_hashtable(lvt_Hash);
   }
 
+  if (parsed_annotations.has_any_annotations())
+    parsed_annotations.apply_to(m);
   *method_annotations = assemble_annotations(runtime_visible_annotations,
                                              runtime_visible_annotations_length,
                                              runtime_invisible_annotations,
@@ -2128,12 +2351,6 @@
     _has_vanilla_constructor = true;
   }
 
-  if (EnableInvokeDynamic && (m->is_method_handle_invoke() ||
-                              m->is_method_handle_adapter())) {
-    THROW_MSG_(vmSymbols::java_lang_VirtualMachineError(),
-               "Method handle invokers must be defined internally to the VM", nullHandle);
-  }
-
   return m;
 }
 
@@ -2276,7 +2493,7 @@
 }
 
 
-void ClassFileParser::parse_classfile_sourcefile_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_sourcefile_attribute(constantPoolHandle cp, TRAPS) {
   ClassFileStream* cfs = stream();
   cfs->guarantee_more(2, CHECK);  // sourcefile_index
   u2 sourcefile_index = cfs->get_u2_fast();
@@ -2285,13 +2502,12 @@
       cp->tag_at(sourcefile_index).is_utf8(),
     "Invalid SourceFile attribute at constant pool index %u in class file %s",
     sourcefile_index, CHECK);
-  k->set_source_file_name(cp->symbol_at(sourcefile_index));
+  set_class_sourcefile(cp->symbol_at(sourcefile_index));
 }
 
 
 
 void ClassFileParser::parse_classfile_source_debug_extension_attribute(constantPoolHandle cp,
-                                                                       instanceKlassHandle k,
                                                                        int length, TRAPS) {
   ClassFileStream* cfs = stream();
   u1* sde_buffer = cfs->get_u1_buffer();
@@ -2299,7 +2515,13 @@
 
   // Don't bother storing it if there is no way to retrieve it
   if (JvmtiExport::can_get_source_debug_extension()) {
-    k->set_source_debug_extension((char*)sde_buffer, length);
+    assert((length+1) > length, "Overflow checking");
+    u1* sde = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, u1, length+1);
+    for (int i = 0; i < length; i++) {
+      sde[i] = sde_buffer[i];
+    }
+    sde[length] = '\0';
+    set_class_sde_buffer((char*)sde, length);
   }
   // Got utf8 string, set stream position forward
   cfs->skip_u1(length, CHECK);
@@ -2310,13 +2532,32 @@
 #define RECOGNIZED_INNER_CLASS_MODIFIERS (JVM_RECOGNIZED_CLASS_MODIFIERS | JVM_ACC_PRIVATE | JVM_ACC_PROTECTED | JVM_ACC_STATIC)
 
 // Return number of classes in the inner classes attribute table
-u2 ClassFileParser::parse_classfile_inner_classes_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+u2 ClassFileParser::parse_classfile_inner_classes_attribute(u1* inner_classes_attribute_start,
+                                                            bool parsed_enclosingmethod_attribute,
+                                                            u2 enclosing_method_class_index,
+                                                            u2 enclosing_method_method_index,
+                                                            constantPoolHandle cp,
+                                                            TRAPS) {
   ClassFileStream* cfs = stream();
-  cfs->guarantee_more(2, CHECK_0);  // length
-  u2 length = cfs->get_u2_fast();
-
-  // 4-tuples of shorts [inner_class_info_index, outer_class_info_index, inner_name_index, inner_class_access_flags]
-  typeArrayOop ic = oopFactory::new_permanent_shortArray(length*4, CHECK_0);
+  u1* current_mark = cfs->current();
+  u2 length = 0;
+  if (inner_classes_attribute_start != NULL) {
+    cfs->set_current(inner_classes_attribute_start);
+    cfs->guarantee_more(2, CHECK_0);  // length
+    length = cfs->get_u2_fast();
+  }
+
+  // 4-tuples of shorts of inner classes data and 2 shorts of enclosing
+  // method data:
+  //   [inner_class_info_index,
+  //    outer_class_info_index,
+  //    inner_name_index,
+  //    inner_class_access_flags,
+  //    ...
+  //    enclosing_method_class_index,
+  //    enclosing_method_method_index]
+  int size = length * 4 + (parsed_enclosingmethod_attribute ? 2 : 0);
+  typeArrayOop ic = oopFactory::new_permanent_shortArray(size, CHECK_0);
   typeArrayHandle inner_classes(THREAD, ic);
   int index = 0;
   int cp_size = cp->length();
@@ -2367,8 +2608,8 @@
 
   // 4347400: make sure there's no duplicate entry in the classes array
   if (_need_verify && _major_version >= JAVA_1_5_VERSION) {
-    for(int i = 0; i < inner_classes->length(); i += 4) {
-      for(int j = i + 4; j < inner_classes->length(); j += 4) {
+    for(int i = 0; i < length * 4; i += 4) {
+      for(int j = i + 4; j < length * 4; j += 4) {
         guarantee_property((inner_classes->ushort_at(i)   != inner_classes->ushort_at(j) ||
                             inner_classes->ushort_at(i+1) != inner_classes->ushort_at(j+1) ||
                             inner_classes->ushort_at(i+2) != inner_classes->ushort_at(j+2) ||
@@ -2379,16 +2620,27 @@
     }
   }
 
+  // Set EnclosingMethod class and method indexes.
+  if (parsed_enclosingmethod_attribute) {
+    inner_classes->short_at_put(index++, enclosing_method_class_index);
+    inner_classes->short_at_put(index++, enclosing_method_method_index);
+  }
+  assert(index == size, "wrong size");
+
   // Update instanceKlass with inner class info.
-  k->set_inner_classes(inner_classes());
+  set_class_inner_classes(inner_classes);
+
+  // Restore buffer's current position.
+  cfs->set_current(current_mark);
+
   return length;
 }
 
-void ClassFileParser::parse_classfile_synthetic_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
-  k->set_is_synthetic();
+void ClassFileParser::parse_classfile_synthetic_attribute(constantPoolHandle cp, TRAPS) {
+  set_class_synthetic_flag(true);
 }
 
-void ClassFileParser::parse_classfile_signature_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_signature_attribute(constantPoolHandle cp, TRAPS) {
   ClassFileStream* cfs = stream();
   u2 signature_index = cfs->get_u2(CHECK);
   check_property(
@@ -2396,10 +2648,10 @@
       cp->tag_at(signature_index).is_utf8(),
     "Invalid constant pool index %u in Signature attribute in class file %s",
     signature_index, CHECK);
-  k->set_generic_signature(cp->symbol_at(signature_index));
+  set_class_generic_signature(cp->symbol_at(signature_index));
 }
 
-void ClassFileParser::parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, instanceKlassHandle k,
+void ClassFileParser::parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp,
                                                                   u4 attribute_byte_length, TRAPS) {
   ClassFileStream* cfs = stream();
   u1* current_start = cfs->current();
@@ -2471,10 +2723,12 @@
 }
 
 
-void ClassFileParser::parse_classfile_attributes(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_attributes(constantPoolHandle cp,
+                                                 ClassFileParser::ClassAnnotationCollector* parsed_annotations,
+                                                 TRAPS) {
   ClassFileStream* cfs = stream();
   // Set inner classes attribute to default sentinel
-  k->set_inner_classes(Universe::the_empty_short_array());
+  set_class_inner_classes(typeArrayHandle(THREAD, Universe::the_empty_short_array()));
   cfs->guarantee_more(2, CHECK);  // attributes_count
   u2 attributes_count = cfs->get_u2_fast();
   bool parsed_sourcefile_attribute = false;
@@ -2485,6 +2739,10 @@
   int runtime_visible_annotations_length = 0;
   u1* runtime_invisible_annotations = NULL;
   int runtime_invisible_annotations_length = 0;
+  u1* inner_classes_attribute_start = NULL;
+  u4  inner_classes_attribute_length = 0;
+  u2  enclosing_method_class_index = 0;
+  u2  enclosing_method_method_index = 0;
   // Iterate over attributes
   while (attributes_count--) {
     cfs->guarantee_more(6, CHECK);  // attribute_name_index, attribute_length
@@ -2506,10 +2764,10 @@
       } else {
         parsed_sourcefile_attribute = true;
       }
-      parse_classfile_sourcefile_attribute(cp, k, CHECK);
+      parse_classfile_sourcefile_attribute(cp, CHECK);
     } else if (tag == vmSymbols::tag_source_debug_extension()) {
       // Check for SourceDebugExtension tag
-      parse_classfile_source_debug_extension_attribute(cp, k, (int)attribute_length, CHECK);
+      parse_classfile_source_debug_extension_attribute(cp, (int)attribute_length, CHECK);
     } else if (tag == vmSymbols::tag_inner_classes()) {
       // Check for InnerClasses tag
       if (parsed_innerclasses_attribute) {
@@ -2517,11 +2775,9 @@
       } else {
         parsed_innerclasses_attribute = true;
       }
-      u2 num_of_classes = parse_classfile_inner_classes_attribute(cp, k, CHECK);
-      if (_need_verify && _major_version >= JAVA_1_5_VERSION) {
-        guarantee_property(attribute_length == sizeof(num_of_classes) + 4 * sizeof(u2) * num_of_classes,
-                          "Wrong InnerClasses attribute length in class file %s", CHECK);
-      }
+      inner_classes_attribute_start = cfs->get_u1_buffer();
+      inner_classes_attribute_length = attribute_length;
+      cfs->skip_u1(inner_classes_attribute_length, CHECK);
     } else if (tag == vmSymbols::tag_synthetic()) {
       // Check for Synthetic tag
       // Shouldn't we check that the synthetic flags wasn't already set? - not required in spec
@@ -2530,7 +2786,7 @@
           "Invalid Synthetic classfile attribute length %u in class file %s",
           attribute_length, CHECK);
       }
-      parse_classfile_synthetic_attribute(cp, k, CHECK);
+      parse_classfile_synthetic_attribute(cp, CHECK);
     } else if (tag == vmSymbols::tag_deprecated()) {
       // Check for Deprecatd tag - 4276120
       if (attribute_length != 0) {
@@ -2545,11 +2801,16 @@
             "Wrong Signature attribute length %u in class file %s",
             attribute_length, CHECK);
         }
-        parse_classfile_signature_attribute(cp, k, CHECK);
+        parse_classfile_signature_attribute(cp, CHECK);
       } else if (tag == vmSymbols::tag_runtime_visible_annotations()) {
         runtime_visible_annotations_length = attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
+        parse_annotations(runtime_visible_annotations,
+                          runtime_visible_annotations_length,
+                          cp,
+                          parsed_annotations,
+                          CHECK);
         cfs->skip_u1(runtime_visible_annotations_length, CHECK);
       } else if (PreserveAllAnnotations && tag == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = attribute_length;
@@ -2563,28 +2824,27 @@
           parsed_enclosingmethod_attribute = true;
         }
         cfs->guarantee_more(4, CHECK);  // class_index, method_index
-        u2 class_index  = cfs->get_u2_fast();
-        u2 method_index = cfs->get_u2_fast();
-        if (class_index == 0) {
+        enclosing_method_class_index  = cfs->get_u2_fast();
+        enclosing_method_method_index = cfs->get_u2_fast();
+        if (enclosing_method_class_index == 0) {
           classfile_parse_error("Invalid class index in EnclosingMethod attribute in class file %s", CHECK);
         }
         // Validate the constant pool indices and types
-        if (!cp->is_within_bounds(class_index) ||
-            !is_klass_reference(cp, class_index)) {
+        if (!cp->is_within_bounds(enclosing_method_class_index) ||
+            !is_klass_reference(cp, enclosing_method_class_index)) {
           classfile_parse_error("Invalid or out-of-bounds class index in EnclosingMethod attribute in class file %s", CHECK);
         }
-        if (method_index != 0 &&
-            (!cp->is_within_bounds(method_index) ||
-             !cp->tag_at(method_index).is_name_and_type())) {
+        if (enclosing_method_method_index != 0 &&
+            (!cp->is_within_bounds(enclosing_method_method_index) ||
+             !cp->tag_at(enclosing_method_method_index).is_name_and_type())) {
           classfile_parse_error("Invalid or out-of-bounds method index in EnclosingMethod attribute in class file %s", CHECK);
         }
-        k->set_enclosing_method_indices(class_index, method_index);
       } else if (tag == vmSymbols::tag_bootstrap_methods() &&
                  _major_version >= Verifier::INVOKEDYNAMIC_MAJOR_VERSION) {
         if (parsed_bootstrap_methods_attribute)
           classfile_parse_error("Multiple BootstrapMethods attributes in class file %s", CHECK);
         parsed_bootstrap_methods_attribute = true;
-        parse_classfile_bootstrap_methods_attribute(cp, k, attribute_length, CHECK);
+        parse_classfile_bootstrap_methods_attribute(cp, attribute_length, CHECK);
       } else {
         // Unknown attribute
         cfs->skip_u1(attribute_length, CHECK);
@@ -2599,7 +2859,21 @@
                                                      runtime_invisible_annotations,
                                                      runtime_invisible_annotations_length,
                                                      CHECK);
-  k->set_class_annotations(annotations());
+  set_class_annotations(annotations);
+
+  if (parsed_innerclasses_attribute || parsed_enclosingmethod_attribute) {
+    u2 num_of_classes = parse_classfile_inner_classes_attribute(
+                            inner_classes_attribute_start,
+                            parsed_innerclasses_attribute,
+                            enclosing_method_class_index,
+                            enclosing_method_method_index,
+                            cp, CHECK);
+    if (parsed_innerclasses_attribute &&_need_verify && _major_version >= JAVA_1_5_VERSION) {
+      guarantee_property(
+        inner_classes_attribute_length == sizeof(num_of_classes) + 4 * sizeof(u2) * num_of_classes,
+        "Wrong InnerClasses attribute length in class file %s", CHECK);
+    }
+  }
 
   if (_max_bootstrap_specifier_index >= 0) {
     guarantee_property(parsed_bootstrap_methods_attribute,
@@ -2607,6 +2881,23 @@
   }
 }
 
+void ClassFileParser::apply_parsed_class_attributes(instanceKlassHandle k) {
+  if (_synthetic_flag)
+    k->set_is_synthetic();
+  if (_sourcefile != NULL) {
+    _sourcefile->increment_refcount();
+    k->set_source_file_name(_sourcefile);
+  }
+  if (_generic_signature != NULL) {
+    _generic_signature->increment_refcount();
+    k->set_generic_signature(_generic_signature);
+  }
+  if (_sde_buffer != NULL) {
+    k->set_source_debug_extension(_sde_buffer, _sde_length);
+  }
+  k->set_inner_classes(_inner_classes());
+  k->set_class_annotations(_annotations());
+}
 
 typeArrayHandle ClassFileParser::assemble_annotations(u1* runtime_visible_annotations,
                                                       int runtime_visible_annotations_length,
@@ -2657,8 +2948,7 @@
                             jt->get_thread_stat()->perf_timers_addr(),
                             PerfClassTraceTime::PARSE_CLASS);
 
-  _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
-  _max_bootstrap_specifier_index = -1;
+  init_parsed_class_attributes();
 
   if (JvmtiExport::should_post_class_file_load_hook()) {
     // Get the cached class file bytes (if any) from the class that
@@ -2753,7 +3043,7 @@
   _relax_verify = Verifier::relax_verify_for(class_loader());
 
   // Constant pool
-  constantPoolHandle cp = parse_constant_pool(CHECK_(nullHandle));
+  constantPoolHandle cp = parse_constant_pool(class_loader, CHECK_(nullHandle));
   ConstantPoolCleaner error_handler(cp); // set constant pool to be cleaned up.
 
   int cp_size = cp->length();
@@ -2891,6 +3181,13 @@
     objArrayHandle methods_parameter_annotations(THREAD, methods_parameter_annotations_oop);
     objArrayHandle methods_default_annotations(THREAD, methods_default_annotations_oop);
 
+    // Additional attributes
+    ClassAnnotationCollector parsed_annotations;
+    parse_classfile_attributes(cp, &parsed_annotations, CHECK_(nullHandle));
+
+    // Make sure this is the end of class file stream
+    guarantee_property(cfs->at_eos(), "Extra bytes at the end of class file %s", CHECK_(nullHandle));
+
     // We check super class after class file is parsed and format is checked
     if (super_class_index > 0 && super_klass.is_null()) {
       Symbol*  sk  = cp->klass_name_at(super_class_index);
@@ -3304,7 +3601,9 @@
     klassOop ik = oopFactory::new_instanceKlass(name, vtable_size, itable_size,
                                                 static_field_size,
                                                 total_oop_map_count,
-                                                rt, CHECK_(nullHandle));
+                                                access_flags,
+                                                rt, host_klass,
+                                                CHECK_(nullHandle));
     instanceKlassHandle this_klass (THREAD, ik);
 
     assert(this_klass->static_field_size() == static_field_size, "sanity");
@@ -3312,7 +3611,6 @@
            "sanity");
 
     // Fill in information already parsed
-    this_klass->set_access_flags(access_flags);
     this_klass->set_should_verify_class(verify);
     jint lh = Klass::instance_layout_helper(instance_size, false);
     this_klass->set_layout_helper(lh);
@@ -3378,11 +3676,10 @@
       this_klass->set_has_miranda_methods(); // then set a flag
     }
 
-    // Additional attributes
-    parse_classfile_attributes(cp, this_klass, CHECK_(nullHandle));
-
-    // Make sure this is the end of class file stream
-    guarantee_property(cfs->at_eos(), "Extra bytes at the end of class file %s", CHECK_(nullHandle));
+    // Fill in field values obtained by parse_classfile_attributes
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(this_klass);
+    apply_parsed_class_attributes(this_klass);
 
     // VerifyOops believes that once this has been set, the object is completely loaded.
     // Compute transitive closure of interfaces this class implements
@@ -3397,6 +3694,7 @@
     // Do final class setup
     fill_oop_maps(this_klass, nonstatic_oop_map_count, nonstatic_oop_offsets, nonstatic_oop_counts);
 
+    // Fill in has_finalizer, has_vanilla_constructor, and layout_helper
     set_precomputed_flags(this_klass);
 
     // reinitialize modifiers, using the InnerClasses attribute
--- a/src/share/vm/classfile/classFileParser.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/classFileParser.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,8 +31,8 @@
 #include "oops/typeArrayOop.hpp"
 #include "runtime/handles.inline.hpp"
 #include "utilities/accessFlags.hpp"
+#include "classfile/symbolTable.hpp"
 
-class TempNewSymbol;
 class FieldAllocationCount;
 
 
@@ -50,11 +50,83 @@
   KlassHandle _host_klass;
   GrowableArray<Handle>* _cp_patches; // overrides for CP entries
 
+  // precomputed flags
   bool _has_finalizer;
   bool _has_empty_finalizer;
   bool _has_vanilla_constructor;
+  int _max_bootstrap_specifier_index;  // detects BSS values
 
-  int _max_bootstrap_specifier_index;
+  // class attributes parsed before the instance klass is created:
+  bool       _synthetic_flag;
+  Symbol*    _sourcefile;
+  Symbol*    _generic_signature;
+  char*      _sde_buffer;
+  int        _sde_length;
+  typeArrayHandle _inner_classes;
+  typeArrayHandle _annotations;
+
+  void set_class_synthetic_flag(bool x)           { _synthetic_flag = x; }
+  void set_class_sourcefile(Symbol* x)            { _sourcefile = x; }
+  void set_class_generic_signature(Symbol* x)     { _generic_signature = x; }
+  void set_class_sde_buffer(char* x, int len)     { _sde_buffer = x; _sde_length = len; }
+  void set_class_inner_classes(typeArrayHandle x) { _inner_classes = x; }
+  void set_class_annotations(typeArrayHandle x)   { _annotations = x; }
+  void init_parsed_class_attributes() {
+    _synthetic_flag = false;
+    _sourcefile = NULL;
+    _generic_signature = NULL;
+    _sde_buffer = NULL;
+    _sde_length = 0;
+    // initialize the other flags too:
+    _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
+    _max_bootstrap_specifier_index = -1;
+  }
+  void apply_parsed_class_attributes(instanceKlassHandle k);  // update k
+
+  class AnnotationCollector {
+  public:
+    enum Location { _in_field, _in_method, _in_class };
+    enum ID {
+      _unknown = 0,
+      _method_ForceInline,
+      _method_DontInline,
+      _method_LambdaForm_Compiled,
+      _method_LambdaForm_Hidden,
+      _annotation_LIMIT
+    };
+    const Location _location;
+    int _annotations_present;
+    AnnotationCollector(Location location)
+    : _location(location), _annotations_present(0)
+    {
+      assert((int)_annotation_LIMIT <= (int)sizeof(_annotations_present) * BitsPerByte, "");
+    }
+    // If this annotation name has an ID, report it (or _none).
+    ID annotation_index(Symbol* name);
+    // Set the annotation name:
+    void set_annotation(ID id) {
+      assert((int)id >= 0 && (int)id < (int)_annotation_LIMIT, "oob");
+      _annotations_present |= nth_bit((int)id);
+    }
+    // Report if the annotation is present.
+    bool has_any_annotations() { return _annotations_present != 0; }
+    bool has_annotation(ID id) { return (nth_bit((int)id) & _annotations_present) != 0; }
+  };
+  class FieldAnnotationCollector: public AnnotationCollector {
+  public:
+    FieldAnnotationCollector() : AnnotationCollector(_in_field) { }
+    void apply_to(FieldInfo* f);
+  };
+  class MethodAnnotationCollector: public AnnotationCollector {
+  public:
+    MethodAnnotationCollector() : AnnotationCollector(_in_method) { }
+    void apply_to(methodHandle m);
+  };
+  class ClassAnnotationCollector: public AnnotationCollector {
+  public:
+    ClassAnnotationCollector() : AnnotationCollector(_in_class) { }
+    void apply_to(instanceKlassHandle k);
+  };
 
   enum { fixed_buffer_size = 128 };
   u_char linenumbertable_buffer[fixed_buffer_size];
@@ -68,9 +140,10 @@
   void set_stream(ClassFileStream* st)             { _stream = st; }
 
   // Constant pool parsing
-  void parse_constant_pool_entries(constantPoolHandle cp, int length, TRAPS);
+  void parse_constant_pool_entries(Handle class_loader,
+                                   constantPoolHandle cp, int length, TRAPS);
 
-  constantPoolHandle parse_constant_pool(TRAPS);
+  constantPoolHandle parse_constant_pool(Handle class_loader, TRAPS);
 
   // Interface parsing
   objArrayHandle parse_interfaces(constantPoolHandle cp,
@@ -86,7 +159,9 @@
                               u2* constantvalue_index_addr,
                               bool* is_synthetic_addr,
                               u2* generic_signature_index_addr,
-                              typeArrayHandle* field_annotations, TRAPS);
+                              typeArrayHandle* field_annotations,
+                              FieldAnnotationCollector* parsed_annotations,
+                              TRAPS);
   typeArrayHandle parse_fields(Symbol* class_name,
                                constantPoolHandle cp, bool is_interface,
                                FieldAllocationCount *fac,
@@ -112,8 +187,8 @@
                                 objArrayHandle methods_parameter_annotations,
                                 objArrayHandle methods_default_annotations,
                                 TRAPS);
-  typeArrayHandle parse_exception_table(u4 code_length, u4 exception_table_length,
-                                        constantPoolHandle cp, TRAPS);
+  u2* parse_exception_table(u4 code_length, u4 exception_table_length,
+                            constantPoolHandle cp, TRAPS);
   void parse_linenumber_table(
       u4 code_attribute_length, u4 code_length,
       CompressedLineNumberWriteStream** write_stream, TRAPS);
@@ -127,21 +202,32 @@
   typeArrayOop parse_stackmap_table(u4 code_attribute_length, TRAPS);
 
   // Classfile attribute parsing
-  void parse_classfile_sourcefile_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_source_debug_extension_attribute(constantPoolHandle cp,
-                                                instanceKlassHandle k, int length, TRAPS);
-  u2   parse_classfile_inner_classes_attribute(constantPoolHandle cp,
-                                               instanceKlassHandle k, TRAPS);
-  void parse_classfile_attributes(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_synthetic_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_signature_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, instanceKlassHandle k, u4 attribute_length, TRAPS);
+  void parse_classfile_sourcefile_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_source_debug_extension_attribute(constantPoolHandle cp, int length, TRAPS);
+  u2   parse_classfile_inner_classes_attribute(u1* inner_classes_attribute_start,
+                                               bool parsed_enclosingmethod_attribute,
+                                               u2 enclosing_method_class_index,
+                                               u2 enclosing_method_method_index,
+                                               constantPoolHandle cp,
+                                               TRAPS);
+  void parse_classfile_attributes(constantPoolHandle cp,
+                                  ClassAnnotationCollector* parsed_annotations,
+                                  TRAPS);
+  void parse_classfile_synthetic_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_signature_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, u4 attribute_length, TRAPS);
 
   // Annotations handling
   typeArrayHandle assemble_annotations(u1* runtime_visible_annotations,
                                        int runtime_visible_annotations_length,
                                        u1* runtime_invisible_annotations,
                                        int runtime_invisible_annotations_length, TRAPS);
+  int skip_annotation(u1* buffer, int limit, int index);
+  int skip_annotation_value(u1* buffer, int limit, int index);
+  void parse_annotations(u1* buffer, int limit, constantPoolHandle cp,
+                         /* Results (currently, only one result is supported): */
+                         AnnotationCollector* result,
+                         TRAPS);
 
   // Final setup
   unsigned int compute_oop_map_count(instanceKlassHandle super,
--- a/src/share/vm/classfile/classLoader.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/classLoader.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -153,7 +153,7 @@
     _meta_package_names = NULL;
     _num_meta_package_names = 0;
   } else {
-    _meta_package_names = NEW_C_HEAP_ARRAY(char*, num_meta_package_names);
+    _meta_package_names = NEW_C_HEAP_ARRAY(char*, num_meta_package_names, mtClass);
     _num_meta_package_names = num_meta_package_names;
     memcpy(_meta_package_names, meta_package_names, num_meta_package_names * sizeof(char*));
   }
@@ -161,7 +161,7 @@
 
 
 MetaIndex::~MetaIndex() {
-  FREE_C_HEAP_ARRAY(char*, _meta_package_names);
+  FREE_C_HEAP_ARRAY(char*, _meta_package_names, mtClass);
 }
 
 
@@ -192,7 +192,7 @@
 }
 
 ClassPathDirEntry::ClassPathDirEntry(char* dir) : ClassPathEntry() {
-  _dir = NEW_C_HEAP_ARRAY(char, strlen(dir)+1);
+  _dir = NEW_C_HEAP_ARRAY(char, strlen(dir)+1, mtClass);
   strcpy(_dir, dir);
 }
 
@@ -229,7 +229,7 @@
 
 ClassPathZipEntry::ClassPathZipEntry(jzfile* zip, const char* zip_name) : ClassPathEntry() {
   _zip = zip;
-  _zip_name = NEW_C_HEAP_ARRAY(char, strlen(zip_name)+1);
+  _zip_name = NEW_C_HEAP_ARRAY(char, strlen(zip_name)+1, mtClass);
   strcpy(_zip_name, zip_name);
 }
 
@@ -237,7 +237,7 @@
   if (ZipClose != NULL) {
     (*ZipClose)(_zip);
   }
-  FREE_C_HEAP_ARRAY(char, _zip_name);
+  FREE_C_HEAP_ARRAY(char, _zip_name, mtClass);
 }
 
 ClassFileStream* ClassPathZipEntry::open_stream(const char* name) {
@@ -454,11 +454,11 @@
     while (sys_class_path[end] && sys_class_path[end] != os::path_separator()[0]) {
       end++;
     }
-    char* path = NEW_C_HEAP_ARRAY(char, end-start+1);
+    char* path = NEW_C_HEAP_ARRAY(char, end-start+1, mtClass);
     strncpy(path, &sys_class_path[start], end-start);
     path[end-start] = '\0';
     update_class_path_entry_list(path, false);
-    FREE_C_HEAP_ARRAY(char, path);
+    FREE_C_HEAP_ARRAY(char, path, mtClass);
     while (sys_class_path[end] == os::path_separator()[0]) {
       end++;
     }
@@ -652,13 +652,13 @@
 // in the classpath must be the same files, in the same order, even
 // though the exact name is not the same.
 
-class PackageInfo: public BasicHashtableEntry {
+class PackageInfo: public BasicHashtableEntry<mtClass> {
 public:
   const char* _pkgname;       // Package name
   int _classpath_index;       // Index of directory or JAR file loaded from
 
   PackageInfo* next() {
-    return (PackageInfo*)BasicHashtableEntry::next();
+    return (PackageInfo*)BasicHashtableEntry<mtClass>::next();
   }
 
   const char* pkgname()           { return _pkgname; }
@@ -674,7 +674,7 @@
 };
 
 
-class PackageHashtable : public BasicHashtable {
+class PackageHashtable : public BasicHashtable<mtClass> {
 private:
   inline unsigned int compute_hash(const char *s, int n) {
     unsigned int val = 0;
@@ -685,7 +685,7 @@
   }
 
   PackageInfo* bucket(int index) {
-    return (PackageInfo*)BasicHashtable::bucket(index);
+    return (PackageInfo*)BasicHashtable<mtClass>::bucket(index);
   }
 
   PackageInfo* get_entry(int index, unsigned int hash,
@@ -702,10 +702,10 @@
 
 public:
   PackageHashtable(int table_size)
-    : BasicHashtable(table_size, sizeof(PackageInfo)) {}
+    : BasicHashtable<mtClass>(table_size, sizeof(PackageInfo)) {}
 
-  PackageHashtable(int table_size, HashtableBucket* t, int number_of_entries)
-    : BasicHashtable(table_size, sizeof(PackageInfo), t, number_of_entries) {}
+  PackageHashtable(int table_size, HashtableBucket<mtClass>* t, int number_of_entries)
+    : BasicHashtable<mtClass>(table_size, sizeof(PackageInfo), t, number_of_entries) {}
 
   PackageInfo* get_entry(const char* pkgname, int n) {
     unsigned int hash = compute_hash(pkgname, n);
@@ -715,14 +715,14 @@
   PackageInfo* new_entry(char* pkgname, int n) {
     unsigned int hash = compute_hash(pkgname, n);
     PackageInfo* pp;
-    pp = (PackageInfo*)BasicHashtable::new_entry(hash);
+    pp = (PackageInfo*)BasicHashtable<mtClass>::new_entry(hash);
     pp->set_pkgname(pkgname);
     return pp;
   }
 
   void add_entry(PackageInfo* pp) {
     int index = hash_to_index(pp->hash());
-    BasicHashtable::add_entry(index, pp);
+    BasicHashtable<mtClass>::add_entry(index, pp);
   }
 
   void copy_pkgnames(const char** packages) {
@@ -742,7 +742,7 @@
 void PackageHashtable::copy_table(char** top, char* end,
                                   PackageHashtable* table) {
   // Copy (relocate) the table to the shared space.
-  BasicHashtable::copy_table(top, end);
+  BasicHashtable<mtClass>::copy_table(top, end);
 
   // Calculate the space needed for the package name strings.
   int i;
@@ -815,7 +815,7 @@
       // Package prefix found
       int n = cp - pkgname + 1;
 
-      char* new_pkgname = NEW_C_HEAP_ARRAY(char, n + 1);
+      char* new_pkgname = NEW_C_HEAP_ARRAY(char, n + 1, mtClass);
       if (new_pkgname == NULL) {
         return false;
       }
@@ -929,10 +929,10 @@
 }
 
 
-void ClassLoader::create_package_info_table(HashtableBucket *t, int length,
+void ClassLoader::create_package_info_table(HashtableBucket<mtClass> *t, int length,
                                             int number_of_entries) {
   assert(_package_hash_table == NULL, "One package info table allowed.");
-  assert(length == package_hash_table_size * sizeof(HashtableBucket),
+  assert(length == package_hash_table_size * sizeof(HashtableBucket<mtClass>),
          "bad shared package info size.");
   _package_hash_table = new PackageHashtable(package_hash_table_size, t,
                                              number_of_entries);
--- a/src/share/vm/classfile/classLoader.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/classLoader.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,7 +33,7 @@
 
 
 // Meta-index (optional, to be able to skip opening boot classpath jar files)
-class MetaIndex: public CHeapObj {
+class MetaIndex: public CHeapObj<mtClass> {
  private:
   char** _meta_package_names;
   int    _num_meta_package_names;
@@ -46,7 +46,7 @@
 
 // Class path entry (directory or zip file)
 
-class ClassPathEntry: public CHeapObj {
+class ClassPathEntry: public CHeapObj<mtClass> {
  private:
   ClassPathEntry* _next;
  public:
@@ -141,7 +141,7 @@
 
 class PackageHashtable;
 class PackageInfo;
-class HashtableBucket;
+template <MEMFLAGS F> class HashtableBucket;
 
 class ClassLoader: AllStatic {
  public:
@@ -299,7 +299,7 @@
   // Initialization
   static void initialize();
   static void create_package_info_table();
-  static void create_package_info_table(HashtableBucket *t, int length,
+  static void create_package_info_table(HashtableBucket<mtClass> *t, int length,
                                         int number_of_entries);
   static int compute_Object_vtable();
 
--- a/src/share/vm/classfile/dictionary.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/dictionary.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,16 +36,16 @@
 
 
 Dictionary::Dictionary(int table_size)
-  : TwoOopHashtable<klassOop>(table_size, sizeof(DictionaryEntry)) {
+  : TwoOopHashtable<klassOop, mtClass>(table_size, sizeof(DictionaryEntry)) {
   _current_class_index = 0;
   _current_class_entry = NULL;
 };
 
 
 
-Dictionary::Dictionary(int table_size, HashtableBucket* t,
+Dictionary::Dictionary(int table_size, HashtableBucket<mtClass>* t,
                        int number_of_entries)
-  : TwoOopHashtable<klassOop>(table_size, sizeof(DictionaryEntry), t, number_of_entries) {
+  : TwoOopHashtable<klassOop, mtClass>(table_size, sizeof(DictionaryEntry), t, number_of_entries) {
   _current_class_index = 0;
   _current_class_entry = NULL;
 };
@@ -54,7 +54,7 @@
 DictionaryEntry* Dictionary::new_entry(unsigned int hash, klassOop klass,
                                        oop loader) {
   DictionaryEntry* entry;
-  entry = (DictionaryEntry*)Hashtable<klassOop>::new_entry(hash, klass);
+  entry = (DictionaryEntry*)Hashtable<klassOop, mtClass>::new_entry(hash, klass);
   entry->set_loader(loader);
   entry->set_pd_set(NULL);
   return entry;
@@ -62,7 +62,7 @@
 
 
 DictionaryEntry* Dictionary::new_entry() {
-  DictionaryEntry* entry = (DictionaryEntry*)Hashtable<klassOop>::new_entry(0L, NULL);
+  DictionaryEntry* entry = (DictionaryEntry*)Hashtable<klassOop, mtClass>::new_entry(0L, NULL);
   entry->set_loader(NULL);
   entry->set_pd_set(NULL);
   return entry;
@@ -76,7 +76,7 @@
     entry->set_pd_set(to_delete->next());
     delete to_delete;
   }
-  Hashtable<klassOop>::free_entry(entry);
+  Hashtable<klassOop, mtClass>::free_entry(entry);
 }
 
 
@@ -554,12 +554,12 @@
 }
 
 SymbolPropertyTable::SymbolPropertyTable(int table_size)
-  : Hashtable<Symbol*>(table_size, sizeof(SymbolPropertyEntry))
+  : Hashtable<Symbol*, mtSymbol>(table_size, sizeof(SymbolPropertyEntry))
 {
 }
-SymbolPropertyTable::SymbolPropertyTable(int table_size, HashtableBucket* t,
+SymbolPropertyTable::SymbolPropertyTable(int table_size, HashtableBucket<mtSymbol>* t,
                                          int number_of_entries)
-  : Hashtable<Symbol*>(table_size, sizeof(SymbolPropertyEntry), t, number_of_entries)
+  : Hashtable<Symbol*, mtSymbol>(table_size, sizeof(SymbolPropertyEntry), t, number_of_entries)
 {
 }
 
@@ -584,7 +584,7 @@
   assert(find_entry(index, hash, sym, sym_mode) == NULL, "no double entry");
 
   SymbolPropertyEntry* p = new_entry(hash, sym, sym_mode);
-  Hashtable<Symbol*>::add_entry(index, p);
+  Hashtable<Symbol*, mtSymbol>::add_entry(index, p);
   return p;
 }
 
--- a/src/share/vm/classfile/dictionary.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/dictionary.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,7 +36,7 @@
 // The data structure for the system dictionary (and the shared system
 // dictionary).
 
-class Dictionary : public TwoOopHashtable<klassOop> {
+class Dictionary : public TwoOopHashtable<klassOop, mtClass> {
   friend class VMStructs;
 private:
   // current iteration index.
@@ -48,22 +48,22 @@
                              Symbol* name, Handle loader);
 
   DictionaryEntry* bucket(int i) {
-    return (DictionaryEntry*)Hashtable<klassOop>::bucket(i);
+    return (DictionaryEntry*)Hashtable<klassOop, mtClass>::bucket(i);
   }
 
   // The following method is not MT-safe and must be done under lock.
   DictionaryEntry** bucket_addr(int i) {
-    return (DictionaryEntry**)Hashtable<klassOop>::bucket_addr(i);
+    return (DictionaryEntry**)Hashtable<klassOop, mtClass>::bucket_addr(i);
   }
 
   void add_entry(int index, DictionaryEntry* new_entry) {
-    Hashtable<klassOop>::add_entry(index, (HashtableEntry<oop>*)new_entry);
+    Hashtable<klassOop, mtClass>::add_entry(index, (HashtableEntry<oop, mtClass>*)new_entry);
   }
 
 
 public:
   Dictionary(int table_size);
-  Dictionary(int table_size, HashtableBucket* t, int number_of_entries);
+  Dictionary(int table_size, HashtableBucket<mtClass>* t, int number_of_entries);
 
   DictionaryEntry* new_entry(unsigned int hash, klassOop klass, oop loader);
 
@@ -129,7 +129,7 @@
 // The following classes can be in dictionary.cpp, but we need these
 // to be in header file so that SA's vmStructs can access.
 
-class ProtectionDomainEntry :public CHeapObj {
+class ProtectionDomainEntry :public CHeapObj<mtClass> {
   friend class VMStructs;
  public:
   ProtectionDomainEntry* _next;
@@ -147,7 +147,7 @@
 // An entry in the system dictionary, this describes a class as
 // { klassOop, loader, protection_domain }.
 
-class DictionaryEntry : public HashtableEntry<klassOop> {
+class DictionaryEntry : public HashtableEntry<klassOop, mtClass> {
   friend class VMStructs;
  private:
   // Contains the set of approved protection domains that can access
@@ -166,11 +166,11 @@
   klassOop* klass_addr() { return (klassOop*)literal_addr(); }
 
   DictionaryEntry* next() const {
-    return (DictionaryEntry*)HashtableEntry<klassOop>::next();
+    return (DictionaryEntry*)HashtableEntry<klassOop, mtClass>::next();
   }
 
   DictionaryEntry** next_addr() {
-    return (DictionaryEntry**)HashtableEntry<klassOop>::next_addr();
+    return (DictionaryEntry**)HashtableEntry<klassOop, mtClass>::next_addr();
   }
 
   oop loader() const { return _loader; }
@@ -228,7 +228,7 @@
 
 // Entry in a SymbolPropertyTable, mapping a single Symbol*
 // to a managed and an unmanaged pointer.
-class SymbolPropertyEntry : public HashtableEntry<Symbol*> {
+class SymbolPropertyEntry : public HashtableEntry<Symbol*, mtSymbol> {
   friend class VMStructs;
  private:
   intptr_t _symbol_mode;  // secondary key
@@ -248,11 +248,11 @@
   void set_property_data(address p) { _property_data = p; }
 
   SymbolPropertyEntry* next() const {
-    return (SymbolPropertyEntry*)HashtableEntry<Symbol*>::next();
+    return (SymbolPropertyEntry*)HashtableEntry<Symbol*, mtSymbol>::next();
   }
 
   SymbolPropertyEntry** next_addr() {
-    return (SymbolPropertyEntry**)HashtableEntry<Symbol*>::next_addr();
+    return (SymbolPropertyEntry**)HashtableEntry<Symbol*, mtSymbol>::next_addr();
   }
 
   oop* property_oop_addr()          { return &_property_oop; }
@@ -278,16 +278,16 @@
 // A system-internal mapping of symbols to pointers, both managed
 // and unmanaged.  Used to record the auto-generation of each method
 // MethodHandle.invoke(S)T, for all signatures (S)T.
-class SymbolPropertyTable : public Hashtable<Symbol*> {
+class SymbolPropertyTable : public Hashtable<Symbol*, mtSymbol> {
   friend class VMStructs;
 private:
   SymbolPropertyEntry* bucket(int i) {
-    return (SymbolPropertyEntry*) Hashtable<Symbol*>::bucket(i);
+    return (SymbolPropertyEntry*) Hashtable<Symbol*, mtSymbol>::bucket(i);
   }
 
   // The following method is not MT-safe and must be done under lock.
   SymbolPropertyEntry** bucket_addr(int i) {
-    return (SymbolPropertyEntry**) Hashtable<Symbol*>::bucket_addr(i);
+    return (SymbolPropertyEntry**) Hashtable<Symbol*, mtSymbol>::bucket_addr(i);
   }
 
   void add_entry(int index, SymbolPropertyEntry* new_entry) {
@@ -298,7 +298,7 @@
   }
 
   SymbolPropertyEntry* new_entry(unsigned int hash, Symbol* symbol, intptr_t symbol_mode) {
-    SymbolPropertyEntry* entry = (SymbolPropertyEntry*) Hashtable<Symbol*>::new_entry(hash, symbol);
+    SymbolPropertyEntry* entry = (SymbolPropertyEntry*) Hashtable<Symbol*, mtSymbol>::new_entry(hash, symbol);
     // Hashtable with Symbol* literal must increment and decrement refcount.
     symbol->increment_refcount();
     entry->set_symbol_mode(symbol_mode);
@@ -309,17 +309,17 @@
 
 public:
   SymbolPropertyTable(int table_size);
-  SymbolPropertyTable(int table_size, HashtableBucket* t, int number_of_entries);
+  SymbolPropertyTable(int table_size, HashtableBucket<mtSymbol>* t, int number_of_entries);
 
   void free_entry(SymbolPropertyEntry* entry) {
     // decrement Symbol refcount here because hashtable doesn't.
     entry->literal()->decrement_refcount();
-    Hashtable<Symbol*>::free_entry(entry);
+    Hashtable<Symbol*, mtSymbol>::free_entry(entry);
   }
 
   unsigned int compute_hash(Symbol* sym, intptr_t symbol_mode) {
     // Use the regular identity_hash.
-    return Hashtable<Symbol*>::compute_hash(sym) ^ symbol_mode;
+    return Hashtable<Symbol*, mtSymbol>::compute_hash(sym) ^ symbol_mode;
   }
 
   int index_for(Symbol* name, intptr_t symbol_mode) {
--- a/src/share/vm/classfile/javaAssertions.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/javaAssertions.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -58,7 +58,7 @@
   // it is never freed, so will be leaked (along with other option strings -
   // e.g., bootclasspath) if a process creates/destroys multiple VMs.
   int len = (int)strlen(name);
-  char *name_copy = NEW_C_HEAP_ARRAY(char, len + 1);
+  char *name_copy = NEW_C_HEAP_ARRAY(char, len + 1, mtClass);
   strcpy(name_copy, name);
 
   // Figure out which list the new item should go on.  Names that end in "..."
--- a/src/share/vm/classfile/javaAssertions.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/javaAssertions.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -68,7 +68,7 @@
   static OptionList*    _packages;      // Options for package trees.
 };
 
-class JavaAssertions::OptionList: public CHeapObj {
+class JavaAssertions::OptionList: public CHeapObj<mtClass> {
 public:
   inline OptionList(const char* name, bool enable, OptionList* next);
 
--- a/src/share/vm/classfile/javaClasses.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -126,6 +126,13 @@
   if (!find_field(ik, name_symbol, signature_symbol, &fd, allow_super)) {
     ResourceMark rm;
     tty->print_cr("Invalid layout of %s at %s", ik->external_name(), name_symbol->as_C_string());
+#ifndef PRODUCT
+    klass_oop->print();
+    tty->print_cr("all fields:");
+    for (AllFieldStream fs(instanceKlass::cast(klass_oop)); !fs.done(); fs.next()) {
+      tty->print_cr("  name: %s, sig: %s, flags: %08x", fs.name()->as_C_string(), fs.signature()->as_C_string(), fs.access_flags().as_int());
+    }
+#endif //PRODUCT
     fatal("Invalid layout of preloaded class");
   }
   dest_offset = fd.offset();
@@ -1455,6 +1462,7 @@
   nmethod* nm = NULL;
   bool skip_fillInStackTrace_check = false;
   bool skip_throwableInit_check = false;
+  bool skip_hidden = !ShowHiddenFrames;
 
   for (frame fr = thread->last_frame(); max_depth != total_count;) {
     methodOop method = NULL;
@@ -1534,6 +1542,9 @@
         skip_throwableInit_check = true;
       }
     }
+    if (method->is_hidden()) {
+      if (skip_hidden)  continue;
+    }
     bt.push(method, bci, CHECK);
     total_count++;
   }
@@ -1724,6 +1735,8 @@
   java_lang_StackTraceElement::set_methodName(element(), methodname);
   // Fill in source file name
   Symbol* source = instanceKlass::cast(method->method_holder())->source_file_name();
+  if (ShowHiddenFrames && source == NULL)
+    source = vmSymbols::unknown_class_name();
   oop filename = StringTable::intern(source, CHECK_0);
   java_lang_StackTraceElement::set_fileName(element(), filename);
   // File in source line number
@@ -1736,6 +1749,9 @@
   } else {
     // Returns -1 if no LineNumberTable, and otherwise actual line number
     line_number = method->line_number_from_bci(bci);
+    if (line_number == -1 && ShowHiddenFrames) {
+      line_number = bci + 1000000;
+    }
   }
   java_lang_StackTraceElement::set_lineNumber(element(), line_number);
 
@@ -2377,8 +2393,7 @@
 // Support for java_lang_invoke_MethodHandle
 
 int java_lang_invoke_MethodHandle::_type_offset;
-int java_lang_invoke_MethodHandle::_vmtarget_offset;
-int java_lang_invoke_MethodHandle::_vmentry_offset;
+int java_lang_invoke_MethodHandle::_form_offset;
 
 int java_lang_invoke_MemberName::_clazz_offset;
 int java_lang_invoke_MemberName::_name_offset;
@@ -2387,21 +2402,16 @@
 int java_lang_invoke_MemberName::_vmtarget_offset;
 int java_lang_invoke_MemberName::_vmindex_offset;
 
-int java_lang_invoke_DirectMethodHandle::_vmindex_offset;
-
-int java_lang_invoke_BoundMethodHandle::_argument_offset;
-int java_lang_invoke_BoundMethodHandle::_vmargslot_offset;
-
-int java_lang_invoke_AdapterMethodHandle::_conversion_offset;
-
-int java_lang_invoke_CountingMethodHandle::_vmcount_offset;
+int java_lang_invoke_LambdaForm::_vmentry_offset;
 
 void java_lang_invoke_MethodHandle::compute_offsets() {
   klassOop klass_oop = SystemDictionary::MethodHandle_klass();
   if (klass_oop != NULL && EnableInvokeDynamic) {
-    bool allow_super = false;
-    compute_offset(_type_offset,      klass_oop, vmSymbols::type_name(),      vmSymbols::java_lang_invoke_MethodType_signature(), allow_super);
-    METHODHANDLE_INJECTED_FIELDS(INJECTED_FIELD_COMPUTE_OFFSET);
+    compute_offset(_type_offset, klass_oop, vmSymbols::type_name(), vmSymbols::java_lang_invoke_MethodType_signature());
+    compute_optional_offset(_form_offset, klass_oop, vmSymbols::form_name(), vmSymbols::java_lang_invoke_LambdaForm_signature());
+    if (_form_offset == 0) {
+      EnableInvokeDynamic = false;
+    }
   }
 }
 
@@ -2412,50 +2422,17 @@
     compute_offset(_name_offset,      klass_oop, vmSymbols::name_name(),      vmSymbols::string_signature());
     compute_offset(_type_offset,      klass_oop, vmSymbols::type_name(),      vmSymbols::object_signature());
     compute_offset(_flags_offset,     klass_oop, vmSymbols::flags_name(),     vmSymbols::int_signature());
-    compute_offset(_vmindex_offset,   klass_oop, vmSymbols::vmindex_name(),   vmSymbols::int_signature());
     MEMBERNAME_INJECTED_FIELDS(INJECTED_FIELD_COMPUTE_OFFSET);
   }
 }
 
-void java_lang_invoke_DirectMethodHandle::compute_offsets() {
-  klassOop k = SystemDictionary::DirectMethodHandle_klass();
-  if (k != NULL && EnableInvokeDynamic) {
-    DIRECTMETHODHANDLE_INJECTED_FIELDS(INJECTED_FIELD_COMPUTE_OFFSET);
-  }
-}
-
-void java_lang_invoke_BoundMethodHandle::compute_offsets() {
-  klassOop k = SystemDictionary::BoundMethodHandle_klass();
-  if (k != NULL && EnableInvokeDynamic) {
-    compute_offset(_vmargslot_offset, k, vmSymbols::vmargslot_name(), vmSymbols::int_signature(),    true);
-    compute_offset(_argument_offset,  k, vmSymbols::argument_name(),  vmSymbols::object_signature(), true);
+void java_lang_invoke_LambdaForm::compute_offsets() {
+  klassOop klass_oop = SystemDictionary::LambdaForm_klass();
+  if (klass_oop != NULL && EnableInvokeDynamic) {
+    compute_offset(_vmentry_offset, klass_oop, vmSymbols::vmentry_name(), vmSymbols::java_lang_invoke_MemberName_signature());
   }
 }
 
-void java_lang_invoke_AdapterMethodHandle::compute_offsets() {
-  klassOop k = SystemDictionary::AdapterMethodHandle_klass();
-  if (k != NULL && EnableInvokeDynamic) {
-    compute_offset(_conversion_offset, k, vmSymbols::conversion_name(), vmSymbols::int_signature(), true);
-  }
-}
-
-void java_lang_invoke_CountingMethodHandle::compute_offsets() {
-  klassOop k = SystemDictionary::CountingMethodHandle_klass();
-  if (k != NULL && EnableInvokeDynamic) {
-    compute_offset(_vmcount_offset, k, vmSymbols::vmcount_name(), vmSymbols::int_signature(), true);
-  }
-}
-
-int java_lang_invoke_CountingMethodHandle::vmcount(oop mh) {
-  assert(is_instance(mh), "CMH only");
-  return mh->int_field(_vmcount_offset);
-}
-
-void java_lang_invoke_CountingMethodHandle::set_vmcount(oop mh, int count) {
-  assert(is_instance(mh), "CMH only");
-  mh->int_field_put(_vmcount_offset, count);
-}
-
 oop java_lang_invoke_MethodHandle::type(oop mh) {
   return mh->obj_field(_type_offset);
 }
@@ -2464,31 +2441,14 @@
   mh->obj_field_put(_type_offset, mtype);
 }
 
-// fetch type.form.vmslots, which is the number of JVM stack slots
-// required to carry the arguments of this MH
-int java_lang_invoke_MethodHandle::vmslots(oop mh) {
-  oop mtype = type(mh);
-  if (mtype == NULL)  return 0;  // Java code would get NPE
-  oop form = java_lang_invoke_MethodType::form(mtype);
-  if (form == NULL)   return 0;  // Java code would get NPE
-  return java_lang_invoke_MethodTypeForm::vmslots(form);
+oop java_lang_invoke_MethodHandle::form(oop mh) {
+  assert(_form_offset != 0, "");
+  return mh->obj_field(_form_offset);
 }
 
-// fetch the low-level entry point for this mh
-MethodHandleEntry* java_lang_invoke_MethodHandle::vmentry(oop mh) {
-  return (MethodHandleEntry*) mh->address_field(_vmentry_offset);
-}
-
-void java_lang_invoke_MethodHandle::set_vmentry(oop mh, MethodHandleEntry* me) {
-  assert(_vmentry_offset != 0, "must be present");
-
-  // This is always the final step that initializes a valid method handle:
-  mh->release_address_field_put(_vmentry_offset, (address) me);
-
-  // There should be enough memory barriers on exit from native methods
-  // to ensure that the MH is fully initialized to all threads before
-  // Java code can publish it in global data structures.
-  // But just in case, we use release_address_field_put.
+void java_lang_invoke_MethodHandle::set_form(oop mh, oop lform) {
+  assert(_form_offset != 0, "");
+  mh->obj_field_put(_form_offset, lform);
 }
 
 /// MemberName accessors
@@ -2540,57 +2500,40 @@
 
 void java_lang_invoke_MemberName::set_vmtarget(oop mname, oop ref) {
   assert(is_instance(mname), "wrong type");
+#ifdef ASSERT
+  // check the type of the vmtarget
+  if (ref != NULL) {
+    switch (flags(mname) & (MN_IS_METHOD |
+                            MN_IS_CONSTRUCTOR |
+                            MN_IS_FIELD)) {
+    case MN_IS_METHOD:
+    case MN_IS_CONSTRUCTOR:
+      assert(ref->is_method(), "should be a method");
+      break;
+    case MN_IS_FIELD:
+      assert(ref->is_klass(), "should be a class");
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  }
+#endif //ASSERT
   mname->obj_field_put(_vmtarget_offset, ref);
 }
 
-int java_lang_invoke_MemberName::vmindex(oop mname) {
-  assert(is_instance(mname), "wrong type");
-  return mname->int_field(_vmindex_offset);
-}
-
-void java_lang_invoke_MemberName::set_vmindex(oop mname, int index) {
+intptr_t java_lang_invoke_MemberName::vmindex(oop mname) {
   assert(is_instance(mname), "wrong type");
-  mname->int_field_put(_vmindex_offset, index);
-}
-
-oop java_lang_invoke_MethodHandle::vmtarget(oop mh) {
-  assert(is_instance(mh), "MH only");
-  return mh->obj_field(_vmtarget_offset);
-}
-
-void java_lang_invoke_MethodHandle::set_vmtarget(oop mh, oop ref) {
-  assert(is_instance(mh), "MH only");
-  mh->obj_field_put(_vmtarget_offset, ref);
-}
-
-int java_lang_invoke_DirectMethodHandle::vmindex(oop mh) {
-  assert(is_instance(mh), "DMH only");
-  return mh->int_field(_vmindex_offset);
+  return (intptr_t) mname->address_field(_vmindex_offset);
 }
 
-void java_lang_invoke_DirectMethodHandle::set_vmindex(oop mh, int index) {
-  assert(is_instance(mh), "DMH only");
-  mh->int_field_put(_vmindex_offset, index);
-}
-
-int java_lang_invoke_BoundMethodHandle::vmargslot(oop mh) {
-  assert(is_instance(mh), "BMH only");
-  return mh->int_field(_vmargslot_offset);
+void java_lang_invoke_MemberName::set_vmindex(oop mname, intptr_t index) {
+  assert(is_instance(mname), "wrong type");
+  mname->address_field_put(_vmindex_offset, (address) index);
 }
 
-oop java_lang_invoke_BoundMethodHandle::argument(oop mh) {
-  assert(is_instance(mh), "BMH only");
-  return mh->obj_field(_argument_offset);
-}
-
-int java_lang_invoke_AdapterMethodHandle::conversion(oop mh) {
-  assert(is_instance(mh), "AMH only");
-  return mh->int_field(_conversion_offset);
-}
-
-void java_lang_invoke_AdapterMethodHandle::set_conversion(oop mh, int conv) {
-  assert(is_instance(mh), "AMH only");
-  mh->int_field_put(_conversion_offset, conv);
+oop java_lang_invoke_LambdaForm::vmentry(oop lform) {
+  assert(is_instance(lform), "wrong type");
+  return lform->obj_field(_vmentry_offset);
 }
 
 
@@ -2598,14 +2541,12 @@
 
 int java_lang_invoke_MethodType::_rtype_offset;
 int java_lang_invoke_MethodType::_ptypes_offset;
-int java_lang_invoke_MethodType::_form_offset;
 
 void java_lang_invoke_MethodType::compute_offsets() {
   klassOop k = SystemDictionary::MethodType_klass();
   if (k != NULL) {
     compute_offset(_rtype_offset,  k, vmSymbols::rtype_name(),  vmSymbols::class_signature());
     compute_offset(_ptypes_offset, k, vmSymbols::ptypes_name(), vmSymbols::class_array_signature());
-    compute_offset(_form_offset,   k, vmSymbols::form_name(),   vmSymbols::java_lang_invoke_MethodTypeForm_signature());
   }
 }
 
@@ -2635,6 +2576,8 @@
 }
 
 bool java_lang_invoke_MethodType::equals(oop mt1, oop mt2) {
+  if (mt1 == mt2)
+    return true;
   if (rtype(mt1) != rtype(mt2))
     return false;
   if (ptype_count(mt1) != ptype_count(mt2))
@@ -2656,11 +2599,6 @@
   return (objArrayOop) mt->obj_field(_ptypes_offset);
 }
 
-oop java_lang_invoke_MethodType::form(oop mt) {
-  assert(is_instance(mt), "must be a MethodType");
-  return mt->obj_field(_form_offset);
-}
-
 oop java_lang_invoke_MethodType::ptype(oop mt, int idx) {
   return ptypes(mt)->obj_at(idx);
 }
@@ -2669,62 +2607,20 @@
   return ptypes(mt)->length();
 }
 
-
-
-// Support for java_lang_invoke_MethodTypeForm
-
-int java_lang_invoke_MethodTypeForm::_vmslots_offset;
-int java_lang_invoke_MethodTypeForm::_vmlayout_offset;
-int java_lang_invoke_MethodTypeForm::_erasedType_offset;
-int java_lang_invoke_MethodTypeForm::_genericInvoker_offset;
-
-void java_lang_invoke_MethodTypeForm::compute_offsets() {
-  klassOop k = SystemDictionary::MethodTypeForm_klass();
-  if (k != NULL) {
-    compute_optional_offset(_vmslots_offset,    k, vmSymbols::vmslots_name(),    vmSymbols::int_signature(), true);
-    compute_optional_offset(_vmlayout_offset,   k, vmSymbols::vmlayout_name(),   vmSymbols::object_signature());
-    compute_optional_offset(_erasedType_offset, k, vmSymbols::erasedType_name(), vmSymbols::java_lang_invoke_MethodType_signature(), true);
-    compute_optional_offset(_genericInvoker_offset, k, vmSymbols::genericInvoker_name(), vmSymbols::java_lang_invoke_MethodHandle_signature(), true);
-    if (_genericInvoker_offset == 0)  _genericInvoker_offset = -1;  // set to explicit "empty" value
-    METHODTYPEFORM_INJECTED_FIELDS(INJECTED_FIELD_COMPUTE_OFFSET);
+int java_lang_invoke_MethodType::ptype_slot_count(oop mt) {
+  objArrayOop pts = ptypes(mt);
+  int count = pts->length();
+  int slots = 0;
+  for (int i = 0; i < count; i++) {
+    BasicType bt = java_lang_Class::as_BasicType(pts->obj_at(i));
+    slots += type2size[bt];
   }
-}
-
-int java_lang_invoke_MethodTypeForm::vmslots(oop mtform) {
-  assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
-  assert(_vmslots_offset > 0, "");
-  return mtform->int_field(_vmslots_offset);
+  return slots;
 }
 
-oop java_lang_invoke_MethodTypeForm::vmlayout(oop mtform) {
-  assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
-  assert(_vmlayout_offset > 0, "");
-  return mtform->obj_field(_vmlayout_offset);
-}
-
-oop java_lang_invoke_MethodTypeForm::init_vmlayout(oop mtform, oop cookie) {
-  assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
-  oop previous = vmlayout(mtform);
-  if (previous != NULL) {
-    return previous;  // someone else beat us to it
-  }
-  HeapWord* cookie_addr = (HeapWord*) mtform->obj_field_addr<oop>(_vmlayout_offset);
-  OrderAccess::storestore();  // make sure our copy is fully committed
-  previous = oopDesc::atomic_compare_exchange_oop(cookie, cookie_addr, previous);
-  if (previous != NULL) {
-    return previous;  // someone else beat us to it
-  }
-  return cookie;
-}
-
-oop java_lang_invoke_MethodTypeForm::erasedType(oop mtform) {
-  assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
-  return mtform->obj_field(_erasedType_offset);
-}
-
-oop java_lang_invoke_MethodTypeForm::genericInvoker(oop mtform) {
-  assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
-  return mtform->obj_field(_genericInvoker_offset);
+int java_lang_invoke_MethodType::rtype_slot_count(oop mt) {
+  BasicType bt = java_lang_Class::as_BasicType(rtype(mt));
+  return type2size[bt];
 }
 
 
@@ -2738,17 +2634,6 @@
   if (k != NULL) {
     compute_offset(_target_offset, k, vmSymbols::target_name(), vmSymbols::java_lang_invoke_MethodHandle_signature());
   }
-
-  // Disallow compilation of CallSite.setTargetNormal and CallSite.setTargetVolatile
-  // (For C2:  keep this until we have throttling logic for uncommon traps.)
-  if (k != NULL) {
-    instanceKlass* ik = instanceKlass::cast(k);
-    methodOop m_normal   = ik->lookup_method(vmSymbols::setTargetNormal_name(),   vmSymbols::setTarget_signature());
-    methodOop m_volatile = ik->lookup_method(vmSymbols::setTargetVolatile_name(), vmSymbols::setTarget_signature());
-    guarantee(m_normal != NULL && m_volatile != NULL, "must exist");
-    m_normal->set_not_compilable_quietly();
-    m_volatile->set_not_compilable_quietly();
-  }
 }
 
 
@@ -2809,10 +2694,26 @@
 }
 
 oop java_lang_ClassLoader::parent(oop loader) {
-  assert(loader->is_oop(), "loader must be oop");
+  assert(is_instance(loader), "loader must be oop");
   return loader->obj_field(parent_offset);
 }
 
+bool java_lang_ClassLoader::isAncestor(oop loader, oop cl) {
+  assert(is_instance(loader), "loader must be oop");
+  assert(cl == NULL || is_instance(cl), "cl argument must be oop");
+  oop acl = loader;
+  debug_only(jint loop_count = 0);
+  // This loop taken verbatim from ClassLoader.java:
+  do {
+    acl = parent(acl);
+    if (cl == acl) {
+      return true;
+    }
+    assert(++loop_count > 0, "loop_count overflow");
+  } while (acl != NULL);
+  return false;
+}
+
 
 // For class loader classes, parallelCapable defined
 // based on non-null field
@@ -2933,7 +2834,6 @@
 int java_lang_AssertionStatusDirectives::packageEnabled_offset;
 int java_lang_AssertionStatusDirectives::deflt_offset;
 int java_nio_Buffer::_limit_offset;
-int sun_misc_AtomicLongCSImpl::_value_offset;
 int java_util_concurrent_locks_AbstractOwnableSynchronizer::_owner_offset = 0;
 int sun_reflect_ConstantPool::_cp_oop_offset;
 int sun_reflect_UnsafeStaticFieldAccessorImpl::_base_offset;
@@ -2993,21 +2893,6 @@
   compute_offset(_limit_offset, k, vmSymbols::limit_name(), vmSymbols::int_signature());
 }
 
-// Support for intrinsification of sun.misc.AtomicLongCSImpl.attemptUpdate
-int sun_misc_AtomicLongCSImpl::value_offset() {
-  assert(SystemDictionary::AtomicLongCSImpl_klass() != NULL, "can't call this");
-  return _value_offset;
-}
-
-
-void sun_misc_AtomicLongCSImpl::compute_offsets() {
-  klassOop k = SystemDictionary::AtomicLongCSImpl_klass();
-  // If this class is not present, its value field offset won't be referenced.
-  if (k != NULL) {
-    compute_offset(_value_offset, k, vmSymbols::value_name(), vmSymbols::long_signature());
-  }
-}
-
 void java_util_concurrent_locks_AbstractOwnableSynchronizer::initialize(TRAPS) {
   if (_owner_offset != 0) return;
 
@@ -3088,13 +2973,9 @@
   if (EnableInvokeDynamic) {
     java_lang_invoke_MethodHandle::compute_offsets();
     java_lang_invoke_MemberName::compute_offsets();
-    java_lang_invoke_DirectMethodHandle::compute_offsets();
-    java_lang_invoke_BoundMethodHandle::compute_offsets();
-    java_lang_invoke_AdapterMethodHandle::compute_offsets();
+    java_lang_invoke_LambdaForm::compute_offsets();
     java_lang_invoke_MethodType::compute_offsets();
-    java_lang_invoke_MethodTypeForm::compute_offsets();
     java_lang_invoke_CallSite::compute_offsets();
-    java_lang_invoke_CountingMethodHandle::compute_offsets();
   }
   java_security_AccessControlContext::compute_offsets();
   // Initialize reflection classes. The layouts of these classes
@@ -3112,7 +2993,6 @@
     sun_reflect_ConstantPool::compute_offsets();
     sun_reflect_UnsafeStaticFieldAccessorImpl::compute_offsets();
   }
-  sun_misc_AtomicLongCSImpl::compute_offsets();
 
   // generated interpreter code wants to know about the offsets we just computed:
   AbstractAssembler::update_delayed_values();
@@ -3323,7 +3203,14 @@
     }
   }
   ResourceMark rm;
-  tty->print_cr("Invalid layout of %s at %s", instanceKlass::cast(klass_oop)->external_name(), name()->as_C_string());
+  tty->print_cr("Invalid layout of %s at %s/%s%s", instanceKlass::cast(klass_oop)->external_name(), name()->as_C_string(), signature()->as_C_string(), may_be_java ? " (may_be_java)" : "");
+#ifndef PRODUCT
+  klass_oop->print();
+  tty->print_cr("all fields:");
+  for (AllFieldStream fs(instanceKlass::cast(klass_oop)); !fs.done(); fs.next()) {
+    tty->print_cr("  name: %s, sig: %s, flags: %08x", fs.name()->as_C_string(), fs.signature()->as_C_string(), fs.access_flags().as_int());
+  }
+#endif //PRODUCT
   fatal("Invalid layout of preloaded class");
   return -1;
 }
--- a/src/share/vm/classfile/javaClasses.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/javaClasses.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -883,19 +883,14 @@
 
 // Interface to java.lang.invoke.MethodHandle objects
 
-#define METHODHANDLE_INJECTED_FIELDS(macro)                               \
-  macro(java_lang_invoke_MethodHandle, vmentry,  intptr_signature, false) \
-  macro(java_lang_invoke_MethodHandle, vmtarget, object_signature, true)
-
 class MethodHandleEntry;
 
 class java_lang_invoke_MethodHandle: AllStatic {
   friend class JavaClasses;
 
  private:
-  static int _vmentry_offset;            // assembly code trampoline for MH
-  static int _vmtarget_offset;           // class-specific target reference
-  static int _type_offset;              // the MethodType of this MH
+  static int _type_offset;               // the MethodType of this MH
+  static int _form_offset;               // the LambdaForm of this MH
 
   static void compute_offsets();
 
@@ -904,13 +899,8 @@
   static oop            type(oop mh);
   static void       set_type(oop mh, oop mtype);
 
-  static oop            vmtarget(oop mh);
-  static void       set_vmtarget(oop mh, oop target);
-
-  static MethodHandleEntry* vmentry(oop mh);
-  static void       set_vmentry(oop mh, MethodHandleEntry* data);
-
-  static int            vmslots(oop mh);
+  static oop            form(oop mh);
+  static void       set_form(oop mh, oop lform);
 
   // Testers
   static bool is_subclass(klassOop klass) {
@@ -922,149 +912,45 @@
 
   // Accessors for code generation:
   static int type_offset_in_bytes()             { return _type_offset; }
-  static int vmtarget_offset_in_bytes()         { return _vmtarget_offset; }
-  static int vmentry_offset_in_bytes()          { return _vmentry_offset; }
+  static int form_offset_in_bytes()             { return _form_offset; }
 };
 
-#define DIRECTMETHODHANDLE_INJECTED_FIELDS(macro)                          \
-  macro(java_lang_invoke_DirectMethodHandle, vmindex, int_signature, true)
+// Interface to java.lang.invoke.LambdaForm objects
+// (These are a private interface for managing adapter code generation.)
 
-class java_lang_invoke_DirectMethodHandle: public java_lang_invoke_MethodHandle {
+class java_lang_invoke_LambdaForm: AllStatic {
   friend class JavaClasses;
 
  private:
-  static int _vmindex_offset;           // negative or vtable idx or itable idx
+  static int _vmentry_offset;  // type is MemberName
+
   static void compute_offsets();
 
  public:
   // Accessors
-  static int            vmindex(oop mh);
-  static void       set_vmindex(oop mh, int index);
+  static oop            vmentry(oop lform);
+  static void       set_vmentry(oop lform, oop invoker);
 
   // Testers
   static bool is_subclass(klassOop klass) {
-    return Klass::cast(klass)->is_subclass_of(SystemDictionary::DirectMethodHandle_klass());
+    return SystemDictionary::LambdaForm_klass() != NULL &&
+      Klass::cast(klass)->is_subclass_of(SystemDictionary::LambdaForm_klass());
   }
   static bool is_instance(oop obj) {
     return obj != NULL && is_subclass(obj->klass());
   }
 
   // Accessors for code generation:
-  static int vmindex_offset_in_bytes()          { return _vmindex_offset; }
-};
-
-class java_lang_invoke_BoundMethodHandle: public java_lang_invoke_MethodHandle {
-  friend class JavaClasses;
-
- private:
-  static int _argument_offset;          // argument value bound into this MH
-  static int _vmargslot_offset;         // relevant argument slot (<= vmslots)
-  static void compute_offsets();
-
-public:
-  static oop            argument(oop mh);
-  static void       set_argument(oop mh, oop ref);
-
-  static jint           vmargslot(oop mh);
-  static void       set_vmargslot(oop mh, jint slot);
-
-  // Testers
-  static bool is_subclass(klassOop klass) {
-    return Klass::cast(klass)->is_subclass_of(SystemDictionary::BoundMethodHandle_klass());
-  }
-  static bool is_instance(oop obj) {
-    return obj != NULL && is_subclass(obj->klass());
-  }
-
-  static int argument_offset_in_bytes()         { return _argument_offset; }
-  static int vmargslot_offset_in_bytes()        { return _vmargslot_offset; }
+  static int vmentry_offset_in_bytes()          { return _vmentry_offset; }
 };
 
-class java_lang_invoke_AdapterMethodHandle: public java_lang_invoke_BoundMethodHandle {
-  friend class JavaClasses;
-
- private:
-  static int _conversion_offset;        // type of conversion to apply
-  static void compute_offsets();
-
- public:
-  static int            conversion(oop mh);
-  static void       set_conversion(oop mh, int conv);
-
-  // Testers
-  static bool is_subclass(klassOop klass) {
-    return Klass::cast(klass)->is_subclass_of(SystemDictionary::AdapterMethodHandle_klass());
-  }
-  static bool is_instance(oop obj) {
-    return obj != NULL && is_subclass(obj->klass());
-  }
-
-  // Relevant integer codes (keep these in synch. with MethodHandleNatives.Constants):
-  enum {
-    OP_RETYPE_ONLY   = 0x0, // no argument changes; straight retype
-    OP_RETYPE_RAW    = 0x1, // straight retype, trusted (void->int, Object->T)
-    OP_CHECK_CAST    = 0x2, // ref-to-ref conversion; requires a Class argument
-    OP_PRIM_TO_PRIM  = 0x3, // converts from one primitive to another
-    OP_REF_TO_PRIM   = 0x4, // unboxes a wrapper to produce a primitive
-    OP_PRIM_TO_REF   = 0x5, // boxes a primitive into a wrapper
-    OP_SWAP_ARGS     = 0x6, // swap arguments (vminfo is 2nd arg)
-    OP_ROT_ARGS      = 0x7, // rotate arguments (vminfo is displaced arg)
-    OP_DUP_ARGS      = 0x8, // duplicates one or more arguments (at TOS)
-    OP_DROP_ARGS     = 0x9, // remove one or more argument slots
-    OP_COLLECT_ARGS  = 0xA, // combine arguments using an auxiliary function
-    OP_SPREAD_ARGS   = 0xB, // expand in place a varargs array (of known size)
-    OP_FOLD_ARGS     = 0xC, // combine but do not remove arguments; prepend result
-    //OP_UNUSED_13   = 0xD, // unused code, perhaps for reified argument lists
-    CONV_OP_LIMIT    = 0xE, // limit of CONV_OP enumeration
-
-    CONV_OP_MASK     = 0xF00, // this nybble contains the conversion op field
-    CONV_TYPE_MASK   = 0x0F,  // fits T_ADDRESS and below
-    CONV_VMINFO_MASK = 0x0FF, // LSB is reserved for JVM use
-    CONV_VMINFO_SHIFT     =  0, // position of bits in CONV_VMINFO_MASK
-    CONV_OP_SHIFT         =  8, // position of bits in CONV_OP_MASK
-    CONV_DEST_TYPE_SHIFT  = 12, // byte 2 has the adapter BasicType (if needed)
-    CONV_SRC_TYPE_SHIFT   = 16, // byte 2 has the source BasicType (if needed)
-    CONV_STACK_MOVE_SHIFT = 20, // high 12 bits give signed SP change
-    CONV_STACK_MOVE_MASK  = (1 << (32 - CONV_STACK_MOVE_SHIFT)) - 1
-  };
-
-  static int conversion_offset_in_bytes()       { return _conversion_offset; }
-};
-
-
-// A simple class that maintains an invocation count
-class java_lang_invoke_CountingMethodHandle: public java_lang_invoke_MethodHandle {
-  friend class JavaClasses;
-
- private:
-  static int _vmcount_offset;
-  static void compute_offsets();
-
- public:
-  // Accessors
-  static int            vmcount(oop mh);
-  static void       set_vmcount(oop mh, int count);
-
-  // Testers
-  static bool is_subclass(klassOop klass) {
-    return SystemDictionary::CountingMethodHandle_klass() != NULL &&
-      Klass::cast(klass)->is_subclass_of(SystemDictionary::CountingMethodHandle_klass());
-  }
-  static bool is_instance(oop obj) {
-    return obj != NULL && is_subclass(obj->klass());
-  }
-
-  // Accessors for code generation:
-  static int vmcount_offset_in_bytes()          { return _vmcount_offset; }
-};
-
-
 
 // Interface to java.lang.invoke.MemberName objects
 // (These are a private interface for Java code to query the class hierarchy.)
 
-#define MEMBERNAME_INJECTED_FIELDS(macro)                              \
-  macro(java_lang_invoke_MemberName, vmtarget, object_signature, true)
+#define MEMBERNAME_INJECTED_FIELDS(macro)                               \
+  macro(java_lang_invoke_MemberName, vmindex,  intptr_signature, false) \
+  macro(java_lang_invoke_MemberName, vmtarget, object_signature, false)
 
 class java_lang_invoke_MemberName: AllStatic {
   friend class JavaClasses;
@@ -1076,7 +962,7 @@
   //    private Object     type;        // may be null if not yet materialized
   //    private int        flags;       // modifier bits; see reflect.Modifier
   //    private Object     vmtarget;    // VM-specific target value
-  //    private int        vmindex;     // method index within class or interface
+  //    private intptr_t   vmindex;     // member index within class or interface
   static int _clazz_offset;
   static int _name_offset;
   static int _type_offset;
@@ -1100,15 +986,11 @@
   static int            flags(oop mname);
   static void       set_flags(oop mname, int flags);
 
-  static int            modifiers(oop mname) { return (u2) flags(mname); }
-  static void       set_modifiers(oop mname, int mods)
-                                { set_flags(mname, (flags(mname) &~ (u2)-1) | (u2)mods); }
-
   static oop            vmtarget(oop mname);
   static void       set_vmtarget(oop mname, oop target);
 
-  static int            vmindex(oop mname);
-  static void       set_vmindex(oop mname, int index);
+  static intptr_t       vmindex(oop mname);
+  static void       set_vmindex(oop mname, intptr_t index);
 
   // Testers
   static bool is_subclass(klassOop klass) {
@@ -1124,9 +1006,11 @@
     MN_IS_CONSTRUCTOR      = 0x00020000, // constructor
     MN_IS_FIELD            = 0x00040000, // field
     MN_IS_TYPE             = 0x00080000, // nested type
-    MN_SEARCH_SUPERCLASSES = 0x00100000, // for MHN.getMembers
-    MN_SEARCH_INTERFACES   = 0x00200000, // for MHN.getMembers
-    VM_INDEX_UNINITIALIZED = -99
+    MN_REFERENCE_KIND_SHIFT = 24, // refKind
+    MN_REFERENCE_KIND_MASK = 0x0F000000 >> MN_REFERENCE_KIND_SHIFT,
+    // The SEARCH_* bits are not for MN.flags but for the matchFlags argument of MHN.getMembers:
+    MN_SEARCH_SUPERCLASSES = 0x00100000, // walk super classes
+    MN_SEARCH_INTERFACES   = 0x00200000  // walk implemented interfaces
   };
 
   // Accessors for code generation:
@@ -1147,7 +1031,6 @@
  private:
   static int _rtype_offset;
   static int _ptypes_offset;
-  static int _form_offset;
 
   static void compute_offsets();
 
@@ -1155,11 +1038,13 @@
   // Accessors
   static oop            rtype(oop mt);
   static objArrayOop    ptypes(oop mt);
-  static oop            form(oop mt);
 
   static oop            ptype(oop mt, int index);
   static int            ptype_count(oop mt);
 
+  static int            ptype_slot_count(oop mt);  // extra counts for long/double
+  static int            rtype_slot_count(oop mt);  // extra counts for long/double
+
   static Symbol*        as_signature(oop mt, bool intern_if_not_found, TRAPS);
   static void           print_signature(oop mt, outputStream* st);
 
@@ -1172,40 +1057,6 @@
   // Accessors for code generation:
   static int rtype_offset_in_bytes()            { return _rtype_offset; }
   static int ptypes_offset_in_bytes()           { return _ptypes_offset; }
-  static int form_offset_in_bytes()             { return _form_offset; }
-};
-
-#define METHODTYPEFORM_INJECTED_FIELDS(macro)                              \
-  macro(java_lang_invoke_MethodTypeForm, vmslots,  int_signature,    true) \
-  macro(java_lang_invoke_MethodTypeForm, vmlayout, object_signature, true)
-
-class java_lang_invoke_MethodTypeForm: AllStatic {
-  friend class JavaClasses;
-
- private:
-  static int _vmslots_offset;           // number of argument slots needed
-  static int _vmlayout_offset;          // object describing internal calling sequence
-  static int _erasedType_offset;        // erasedType = canonical MethodType
-  static int _genericInvoker_offset;    // genericInvoker = adapter for invokeGeneric
-
-  static void compute_offsets();
-
- public:
-  // Accessors
-  static int            vmslots(oop mtform);
-  static void       set_vmslots(oop mtform, int vmslots);
-
-  static oop            erasedType(oop mtform);
-  static oop            genericInvoker(oop mtform);
-
-  static oop            vmlayout(oop mtform);
-  static oop       init_vmlayout(oop mtform, oop cookie);
-
-  // Accessors for code generation:
-  static int vmslots_offset_in_bytes()          { return _vmslots_offset; }
-  static int vmlayout_offset_in_bytes()         { return _vmlayout_offset; }
-  static int erasedType_offset_in_bytes()       { return _erasedType_offset; }
-  static int genericInvoker_offset_in_bytes()   { return _genericInvoker_offset; }
 };
 
 
@@ -1275,6 +1126,7 @@
 
  public:
   static oop parent(oop loader);
+  static bool isAncestor(oop loader, oop cl);
 
   // Support for parallelCapable field
   static bool parallelCapable(oop the_class_mirror);
@@ -1284,6 +1136,14 @@
   // Fix for 4474172
   static oop  non_reflection_class_loader(oop loader);
 
+  // Testers
+  static bool is_subclass(klassOop klass) {
+    return Klass::cast(klass)->is_subclass_of(SystemDictionary::ClassLoader_klass());
+  }
+  static bool is_instance(oop obj) {
+    return obj != NULL && is_subclass(obj->klass());
+  }
+
   // Debugging
   friend class JavaClasses;
 };
@@ -1383,15 +1243,6 @@
   static void compute_offsets();
 };
 
-class sun_misc_AtomicLongCSImpl: AllStatic {
- private:
-  static int _value_offset;
-
- public:
-  static int  value_offset();
-  static void compute_offsets();
-};
-
 class java_util_concurrent_locks_AbstractOwnableSynchronizer : AllStatic {
  private:
   static int  _owner_offset;
@@ -1434,10 +1285,7 @@
 
 #define ALL_INJECTED_FIELDS(macro)          \
   CLASS_INJECTED_FIELDS(macro)              \
-  METHODHANDLE_INJECTED_FIELDS(macro)       \
-  DIRECTMETHODHANDLE_INJECTED_FIELDS(macro) \
-  MEMBERNAME_INJECTED_FIELDS(macro)         \
-  METHODTYPEFORM_INJECTED_FIELDS(macro)
+  MEMBERNAME_INJECTED_FIELDS(macro)
 
 // Interface to hard-coded offset checking
 
--- a/src/share/vm/classfile/loaderConstraints.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/loaderConstraints.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,7 +31,7 @@
 #include "utilities/hashtable.inline.hpp"
 
 LoaderConstraintTable::LoaderConstraintTable(int nof_buckets)
-  : Hashtable<klassOop>(nof_buckets, sizeof(LoaderConstraintEntry)) {};
+  : Hashtable<klassOop, mtClass>(nof_buckets, sizeof(LoaderConstraintEntry)) {};
 
 
 LoaderConstraintEntry* LoaderConstraintTable::new_entry(
@@ -39,7 +39,7 @@
                                  klassOop klass, int num_loaders,
                                  int max_loaders) {
   LoaderConstraintEntry* entry;
-  entry = (LoaderConstraintEntry*)Hashtable<klassOop>::new_entry(hash, klass);
+  entry = (LoaderConstraintEntry*)Hashtable<klassOop, mtClass>::new_entry(hash, klass);
   entry->set_name(name);
   entry->set_num_loaders(num_loaders);
   entry->set_max_loaders(max_loaders);
@@ -49,7 +49,7 @@
 void LoaderConstraintTable::free_entry(LoaderConstraintEntry *entry) {
   // decrement name refcount before freeing
   entry->name()->decrement_refcount();
-  Hashtable<klassOop>::free_entry(entry);
+  Hashtable<klassOop, mtClass>::free_entry(entry);
 }
 
 
@@ -164,7 +164,7 @@
 
         // Purge entry
         *p = probe->next();
-        FREE_C_HEAP_ARRAY(oop, probe->loaders());
+        FREE_C_HEAP_ARRAY(oop, probe->loaders(), mtClass);
         free_entry(probe);
       } else {
 #ifdef ASSERT
@@ -224,7 +224,7 @@
         int index = hash_to_index(hash);
         LoaderConstraintEntry* p;
         p = new_entry(hash, class_name, klass, 2, 2);
-        p->set_loaders(NEW_C_HEAP_ARRAY(oop, 2));
+        p->set_loaders(NEW_C_HEAP_ARRAY(oop, 2, mtClass));
         p->set_loader(0, class_loader1());
         p->set_loader(1, class_loader2());
         p->set_klass(klass);
@@ -340,10 +340,10 @@
                                                     int nfree) {
     if (p->max_loaders() - p->num_loaders() < nfree) {
         int n = nfree + p->num_loaders();
-        oop* new_loaders = NEW_C_HEAP_ARRAY(oop, n);
+        oop* new_loaders = NEW_C_HEAP_ARRAY(oop, n, mtClass);
         memcpy(new_loaders, p->loaders(), sizeof(oop) * p->num_loaders());
         p->set_max_loaders(n);
-        FREE_C_HEAP_ARRAY(oop, p->loaders());
+        FREE_C_HEAP_ARRAY(oop, p->loaders(), mtClass);
         p->set_loaders(new_loaders);
     }
 }
@@ -425,7 +425,7 @@
   }
 
   *pp2 = p2->next();
-  FREE_C_HEAP_ARRAY(oop, p2->loaders());
+  FREE_C_HEAP_ARRAY(oop, p2->loaders(), mtClass);
   free_entry(p2);
   return;
 }
--- a/src/share/vm/classfile/loaderConstraints.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/loaderConstraints.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,7 +31,7 @@
 
 class LoaderConstraintEntry;
 
-class LoaderConstraintTable : public Hashtable<klassOop> {
+class LoaderConstraintTable : public Hashtable<klassOop, mtClass> {
   friend class VMStructs;
 private:
 
@@ -53,11 +53,11 @@
   void free_entry(LoaderConstraintEntry *entry);
 
   LoaderConstraintEntry* bucket(int i) {
-    return (LoaderConstraintEntry*)Hashtable<klassOop>::bucket(i);
+    return (LoaderConstraintEntry*)Hashtable<klassOop, mtClass>::bucket(i);
   }
 
   LoaderConstraintEntry** bucket_addr(int i) {
-    return (LoaderConstraintEntry**)Hashtable<klassOop>::bucket_addr(i);
+    return (LoaderConstraintEntry**)Hashtable<klassOop, mtClass>::bucket_addr(i);
   }
 
   // GC support
@@ -94,7 +94,7 @@
 #endif
 };
 
-class LoaderConstraintEntry : public HashtableEntry<klassOop> {
+class LoaderConstraintEntry : public HashtableEntry<klassOop, mtClass> {
   friend class VMStructs;
 private:
   Symbol*                _name;                   // class name
@@ -109,14 +109,14 @@
   void set_klass(klassOop k) { set_literal(k); }
 
   LoaderConstraintEntry* next() {
-    return (LoaderConstraintEntry*)HashtableEntry<klassOop>::next();
+    return (LoaderConstraintEntry*)HashtableEntry<klassOop, mtClass>::next();
   }
 
   LoaderConstraintEntry** next_addr() {
-    return (LoaderConstraintEntry**)HashtableEntry<klassOop>::next_addr();
+    return (LoaderConstraintEntry**)HashtableEntry<klassOop, mtClass>::next_addr();
   }
   void set_next(LoaderConstraintEntry* next) {
-    HashtableEntry<klassOop>::set_next(next);
+    HashtableEntry<klassOop, mtClass>::set_next(next);
   }
 
   Symbol* name() { return _name; }
--- a/src/share/vm/classfile/placeholders.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/placeholders.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 PlaceholderEntry* PlaceholderTable::new_entry(int hash, Symbol* name,
                                               oop loader, bool havesupername,
                                               Symbol* supername) {
-  PlaceholderEntry* entry = (PlaceholderEntry*)Hashtable<Symbol*>::new_entry(hash, name);
+  PlaceholderEntry* entry = (PlaceholderEntry*)Hashtable<Symbol*, mtClass>::new_entry(hash, name);
   // Hashtable with Symbol* literal must increment and decrement refcount.
   name->increment_refcount();
   entry->set_loader(loader);
@@ -52,7 +52,7 @@
   // decrement Symbol refcount here because Hashtable doesn't.
   entry->literal()->decrement_refcount();
   if (entry->supername() != NULL) entry->supername()->decrement_refcount();
-  Hashtable<Symbol*>::free_entry(entry);
+  Hashtable<Symbol*, mtClass>::free_entry(entry);
 }
 
 
@@ -166,7 +166,7 @@
   }
 
 PlaceholderTable::PlaceholderTable(int table_size)
-    : TwoOopHashtable<Symbol*>(table_size, sizeof(PlaceholderEntry)) {
+    : TwoOopHashtable<Symbol*, mtClass>(table_size, sizeof(PlaceholderEntry)) {
 }
 
 
--- a/src/share/vm/classfile/placeholders.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/placeholders.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 // being loaded, as well as arrays of primitives.
 //
 
-class PlaceholderTable : public TwoOopHashtable<Symbol*> {
+class PlaceholderTable : public TwoOopHashtable<Symbol*, mtClass> {
   friend class VMStructs;
 
 public:
@@ -44,15 +44,15 @@
   void free_entry(PlaceholderEntry* entry);
 
   PlaceholderEntry* bucket(int i) {
-    return (PlaceholderEntry*)Hashtable<Symbol*>::bucket(i);
+    return (PlaceholderEntry*)Hashtable<Symbol*, mtClass>::bucket(i);
   }
 
   PlaceholderEntry** bucket_addr(int i) {
-    return (PlaceholderEntry**)Hashtable<Symbol*>::bucket_addr(i);
+    return (PlaceholderEntry**)Hashtable<Symbol*, mtClass>::bucket_addr(i);
   }
 
   void add_entry(int index, PlaceholderEntry* new_entry) {
-    Hashtable<Symbol*>::add_entry(index, (HashtableEntry<Symbol*>*)new_entry);
+    Hashtable<Symbol*, mtClass>::add_entry(index, (HashtableEntry<Symbol*, mtClass>*)new_entry);
   }
 
   void add_entry(int index, unsigned int hash, Symbol* name,
@@ -116,7 +116,7 @@
 // For DEFINE_CLASS, the head of the queue owns the
 // define token and the rest of the threads wait to return the
 // result the first thread gets.
-class SeenThread: public CHeapObj {
+class SeenThread: public CHeapObj<mtInternal> {
 private:
    Thread *_thread;
    SeenThread* _stnext;
@@ -152,7 +152,7 @@
 // on store ordering here.
 // The system dictionary is the only user of this class.
 
-class PlaceholderEntry : public HashtableEntry<Symbol*> {
+class PlaceholderEntry : public HashtableEntry<Symbol*, mtClass> {
   friend class VMStructs;
 
 
@@ -206,11 +206,11 @@
   void               set_defineThreadQ(SeenThread* SeenThread) { _defineThreadQ = SeenThread; }
 
   PlaceholderEntry* next() const {
-    return (PlaceholderEntry*)HashtableEntry<Symbol*>::next();
+    return (PlaceholderEntry*)HashtableEntry<Symbol*, mtClass>::next();
   }
 
   PlaceholderEntry** next_addr() {
-    return (PlaceholderEntry**)HashtableEntry<Symbol*>::next_addr();
+    return (PlaceholderEntry**)HashtableEntry<Symbol*, mtClass>::next_addr();
   }
 
   // Test for equality
--- a/src/share/vm/classfile/resolutionErrors.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/resolutionErrors.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -67,7 +67,7 @@
 ResolutionErrorEntry* ResolutionErrorTable::new_entry(int hash, constantPoolOop pool,
                                                       int cp_index, Symbol* error)
 {
-  ResolutionErrorEntry* entry = (ResolutionErrorEntry*)Hashtable<constantPoolOop>::new_entry(hash, pool);
+  ResolutionErrorEntry* entry = (ResolutionErrorEntry*)Hashtable<constantPoolOop, mtClass>::new_entry(hash, pool);
   entry->set_cp_index(cp_index);
   NOT_PRODUCT(entry->set_error(NULL);)
   entry->set_error(error);
@@ -79,13 +79,13 @@
   // decrement error refcount
   assert(entry->error() != NULL, "error should be set");
   entry->error()->decrement_refcount();
-  Hashtable<constantPoolOop>::free_entry(entry);
+  Hashtable<constantPoolOop, mtClass>::free_entry(entry);
 }
 
 
 // create resolution error table
 ResolutionErrorTable::ResolutionErrorTable(int table_size)
-    : Hashtable<constantPoolOop>(table_size, sizeof(ResolutionErrorEntry)) {
+    : Hashtable<constantPoolOop, mtClass>(table_size, sizeof(ResolutionErrorEntry)) {
 }
 
 // GC support
--- a/src/share/vm/classfile/resolutionErrors.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/resolutionErrors.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,7 +33,7 @@
 // ResolutionError objects are used to record errors encountered during
 // constant pool resolution (JVMS 5.4.3).
 
-class ResolutionErrorTable : public Hashtable<constantPoolOop> {
+class ResolutionErrorTable : public Hashtable<constantPoolOop, mtClass> {
 
 public:
   ResolutionErrorTable(int table_size);
@@ -42,15 +42,16 @@
   void free_entry(ResolutionErrorEntry *entry);
 
   ResolutionErrorEntry* bucket(int i) {
-    return (ResolutionErrorEntry*)Hashtable<constantPoolOop>::bucket(i);
+    return (ResolutionErrorEntry*)Hashtable<constantPoolOop, mtClass>::bucket(i);
   }
 
   ResolutionErrorEntry** bucket_addr(int i) {
-    return (ResolutionErrorEntry**)Hashtable<constantPoolOop>::bucket_addr(i);
+    return (ResolutionErrorEntry**)Hashtable<constantPoolOop, mtClass>::bucket_addr(i);
   }
 
   void add_entry(int index, ResolutionErrorEntry* new_entry) {
-    Hashtable<constantPoolOop>::add_entry(index, (HashtableEntry<constantPoolOop>*)new_entry);
+    Hashtable<constantPoolOop, mtClass>::add_entry(index,
+      (HashtableEntry<constantPoolOop, mtClass>*)new_entry);
   }
 
   void add_entry(int index, unsigned int hash,
@@ -74,7 +75,7 @@
 };
 
 
-class ResolutionErrorEntry : public HashtableEntry<constantPoolOop> {
+class ResolutionErrorEntry : public HashtableEntry<constantPoolOop, mtClass> {
  private:
   int               _cp_index;
   Symbol*           _error;
@@ -90,11 +91,11 @@
   void               set_error(Symbol* e);
 
   ResolutionErrorEntry* next() const {
-    return (ResolutionErrorEntry*)HashtableEntry<constantPoolOop>::next();
+    return (ResolutionErrorEntry*)HashtableEntry<constantPoolOop, mtClass>::next();
   }
 
   ResolutionErrorEntry** next_addr() {
-    return (ResolutionErrorEntry**)HashtableEntry<constantPoolOop>::next_addr();
+    return (ResolutionErrorEntry**)HashtableEntry<constantPoolOop, mtClass>::next_addr();
   }
 
   // GC support
--- a/src/share/vm/classfile/stackMapFrame.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/stackMapFrame.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,9 @@
 #include "utilities/globalDefinitions.hpp"
 
 StackMapFrame::StackMapFrame(u2 max_locals, u2 max_stack, ClassVerifier* v) :
-                      _offset(0), _locals_size(0), _stack_size(0), _flags(0),
-                      _max_locals(max_locals), _max_stack(max_stack),
-                      _verifier(v) {
+                      _offset(0), _locals_size(0), _stack_size(0),
+                      _stack_mark(0), _flags(0), _max_locals(max_locals),
+                      _max_stack(max_stack), _verifier(v) {
   Thread* thr = v->thread();
   _locals = NEW_RESOURCE_ARRAY_IN_THREAD(thr, VerificationType, max_locals);
   _stack = NEW_RESOURCE_ARRAY_IN_THREAD(thr, VerificationType, max_stack);
@@ -157,17 +157,17 @@
   }
 }
 
-
-bool StackMapFrame::is_assignable_to(
+// Returns the location of the first mismatch, or 'len' if there are no
+// mismatches
+int StackMapFrame::is_assignable_to(
     VerificationType* from, VerificationType* to, int32_t len, TRAPS) const {
-  for (int32_t i = 0; i < len; i++) {
-    bool subtype = to[i].is_assignable_from(
-      from[i], verifier(), THREAD);
-    if (!subtype) {
-      return false;
+  int32_t i = 0;
+  for (i = 0; i < len; i++) {
+    if (!to[i].is_assignable_from(from[i], verifier(), THREAD)) {
+      break;
     }
   }
-  return true;
+  return i;
 }
 
 bool StackMapFrame::has_flag_match_exception(
@@ -209,50 +209,84 @@
 }
 
 bool StackMapFrame::is_assignable_to(
-    const StackMapFrame* target, bool is_exception_handler, TRAPS) const {
-  if (_max_locals != target->max_locals() ||
-      _stack_size != target->stack_size()) {
+    const StackMapFrame* target, bool is_exception_handler,
+    ErrorContext* ctx, TRAPS) const {
+  if (_max_locals != target->max_locals()) {
+    *ctx = ErrorContext::locals_size_mismatch(
+        _offset, (StackMapFrame*)this, (StackMapFrame*)target);
+    return false;
+  }
+  if (_stack_size != target->stack_size()) {
+    *ctx = ErrorContext::stack_size_mismatch(
+        _offset, (StackMapFrame*)this, (StackMapFrame*)target);
     return false;
   }
   // Only need to compare type elements up to target->locals() or target->stack().
   // The remaining type elements in this state can be ignored because they are
   // assignable to bogus type.
-  bool match_locals = is_assignable_to(
-    _locals, target->locals(), target->locals_size(), CHECK_false);
-  bool match_stack = is_assignable_to(
-    _stack, target->stack(), _stack_size, CHECK_false);
+  int mismatch_loc;
+  mismatch_loc = is_assignable_to(
+    _locals, target->locals(), target->locals_size(), THREAD);
+  if (mismatch_loc != target->locals_size()) {
+    *ctx = ErrorContext::bad_type(target->offset(),
+        TypeOrigin::local(mismatch_loc, (StackMapFrame*)this),
+        TypeOrigin::sm_local(mismatch_loc, (StackMapFrame*)target));
+    return false;
+  }
+  mismatch_loc = is_assignable_to(_stack, target->stack(), _stack_size, THREAD);
+  if (mismatch_loc != _stack_size) {
+    *ctx = ErrorContext::bad_type(target->offset(),
+        TypeOrigin::stack(mismatch_loc, (StackMapFrame*)this),
+        TypeOrigin::sm_stack(mismatch_loc, (StackMapFrame*)target));
+    return false;
+  }
+
   bool match_flags = (_flags | target->flags()) == target->flags();
-
-  return match_locals && match_stack &&
-    (match_flags || (is_exception_handler && has_flag_match_exception(target)));
+  if (match_flags || is_exception_handler && has_flag_match_exception(target)) {
+    return true;
+  } else {
+    *ctx = ErrorContext::bad_flags(target->offset(),
+        (StackMapFrame*)this, (StackMapFrame*)target);
+    return false;
+  }
 }
 
 VerificationType StackMapFrame::pop_stack_ex(VerificationType type, TRAPS) {
   if (_stack_size <= 0) {
-    verifier()->verify_error(_offset, "Operand stack underflow");
+    verifier()->verify_error(
+        ErrorContext::stack_underflow(_offset, this),
+        "Operand stack underflow");
     return VerificationType::bogus_type();
   }
   VerificationType top = _stack[--_stack_size];
   bool subtype = type.is_assignable_from(
     top, verifier(), CHECK_(VerificationType::bogus_type()));
   if (!subtype) {
-    verifier()->verify_error(_offset, "Bad type on operand stack");
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset, stack_top_ctx(),
+            TypeOrigin::implicit(type)),
+        "Bad type on operand stack");
     return VerificationType::bogus_type();
   }
-  NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
   return top;
 }
 
 VerificationType StackMapFrame::get_local(
     int32_t index, VerificationType type, TRAPS) {
   if (index >= _max_locals) {
-    verifier()->verify_error(_offset, "Local variable table overflow");
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return VerificationType::bogus_type();
   }
   bool subtype = type.is_assignable_from(_locals[index],
     verifier(), CHECK_(VerificationType::bogus_type()));
   if (!subtype) {
-    verifier()->verify_error(_offset, "Bad local variable type");
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset,
+          TypeOrigin::local(index, this),
+          TypeOrigin::implicit(type)),
+        "Bad local variable type");
     return VerificationType::bogus_type();
   }
   if(index >= _locals_size) { _locals_size = index + 1; }
@@ -264,23 +298,37 @@
   assert(type1.is_long() || type1.is_double(), "must be long/double");
   assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
   if (index >= _locals_size - 1) {
-    verifier()->verify_error(_offset, "get long/double overflows locals");
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "get long/double overflows locals");
     return;
   }
-  bool subtype1 = type1.is_assignable_from(
-    _locals[index], verifier(), CHECK);
-  bool subtype2 = type2.is_assignable_from(
-    _locals[index+1], verifier(), CHECK);
-  if (!subtype1 || !subtype2) {
-    verifier()->verify_error(_offset, "Bad local variable type");
-    return;
+  bool subtype = type1.is_assignable_from(_locals[index], verifier(), CHECK);
+  if (!subtype) {
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset,
+            TypeOrigin::local(index, this), TypeOrigin::implicit(type1)),
+        "Bad local variable type");
+  } else {
+    subtype = type2.is_assignable_from(_locals[index + 1], verifier(), CHECK);
+    if (!subtype) {
+      /* Unreachable? All local store routines convert a split long or double
+       * into a TOP during the store.  So we should never end up seeing an
+       * orphaned half.  */
+      verifier()->verify_error(
+          ErrorContext::bad_type(_offset,
+              TypeOrigin::local(index + 1, this), TypeOrigin::implicit(type2)),
+          "Bad local variable type");
+    }
   }
 }
 
 void StackMapFrame::set_local(int32_t index, VerificationType type, TRAPS) {
   assert(!type.is_check(), "Must be a real type");
   if (index >= _max_locals) {
-    verifier()->verify_error("Local variable table overflow", _offset);
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return;
   }
   // If type at index is double or long, set the next location to be unusable
@@ -310,7 +358,9 @@
   assert(type1.is_long() || type1.is_double(), "must be long/double");
   assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
   if (index >= _max_locals - 1) {
-    verifier()->verify_error("Local variable table overflow", _offset);
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return;
   }
   // If type at index+1 is double or long, set the next location to be unusable
@@ -336,21 +386,30 @@
   }
 }
 
-#ifndef PRODUCT
-
-void StackMapFrame::print() const {
-  tty->print_cr("stackmap_frame[%d]:", _offset);
-  tty->print_cr("flags = 0x%x", _flags);
-  tty->print("locals[%d] = { ", _locals_size);
-  for (int32_t i = 0; i < _locals_size; i++) {
-    _locals[i].print_on(tty);
-  }
-  tty->print_cr(" }");
-  tty->print("stack[%d] = { ", _stack_size);
-  for (int32_t j = 0; j < _stack_size; j++) {
-    _stack[j].print_on(tty);
-  }
-  tty->print_cr(" }");
+TypeOrigin StackMapFrame::stack_top_ctx() {
+  return TypeOrigin::stack(_stack_size, this);
 }
 
-#endif
+void StackMapFrame::print_on(outputStream* str) const {
+  str->indent().print_cr("bci: @%d", _offset);
+  str->indent().print_cr("flags: {%s }",
+      flag_this_uninit() ? " flagThisUninit" : "");
+  str->indent().print("locals: {");
+  for (int32_t i = 0; i < _locals_size; ++i) {
+    str->print(" ");
+    _locals[i].print_on(str);
+    if (i != _locals_size - 1) {
+      str->print(",");
+    }
+  }
+  str->print_cr(" }");
+  str->indent().print("stack: {");
+  for (int32_t j = 0; j < _stack_size; ++j) {
+    str->print(" ");
+    _stack[j].print_on(str);
+    if (j != _stack_size - 1) {
+      str->print(",");
+    }
+  }
+  str->print_cr(" }");
+}
--- a/src/share/vm/classfile/stackMapFrame.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/stackMapFrame.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,8 @@
 
 // A StackMapFrame represents one frame in the stack map attribute.
 
+class TypeContext;
+
 enum {
   FLAG_THIS_UNINIT = 0x01
 };
@@ -47,6 +49,10 @@
   int32_t _locals_size;  // number of valid type elements in _locals
   int32_t _stack_size;   // number of valid type elements in _stack
 
+  int32_t _stack_mark;   // Records the size of the stack prior to an
+                         // instruction modification, to allow rewinding
+                         // when/if an error occurs.
+
   int32_t _max_locals;
   int32_t _max_stack;
 
@@ -56,6 +62,31 @@
 
   ClassVerifier* _verifier;  // the verifier verifying this method
 
+  StackMapFrame(const StackMapFrame& cp) :
+      _offset(cp._offset), _locals_size(cp._locals_size),
+      _stack_size(cp._stack_size), _stack_mark(cp._stack_mark),
+      _max_locals(cp._max_locals), _max_stack(cp._max_stack),
+      _flags(cp._flags) {
+    _locals = NEW_RESOURCE_ARRAY(VerificationType, _max_locals);
+    for (int i = 0; i < _max_locals; ++i) {
+      if (i < _locals_size) {
+        _locals[i] = cp._locals[i];
+      } else {
+        _locals[i] = VerificationType::bogus_type();
+      }
+    }
+    int ss = MAX2(_stack_size, _stack_mark);
+    _stack = NEW_RESOURCE_ARRAY(VerificationType, _max_stack);
+    for (int i = 0; i < _max_stack; ++i) {
+      if (i < ss) {
+        _stack[i] = cp._stack[i];
+      } else {
+        _stack[i] = VerificationType::bogus_type();
+      }
+    }
+    _verifier = NULL;
+  }
+
  public:
   // constructors
 
@@ -77,16 +108,21 @@
                 ClassVerifier* v) : _offset(offset), _flags(flags),
                                     _locals_size(locals_size),
                                     _stack_size(stack_size),
+                                    _stack_mark(-1),
                                     _max_locals(max_locals),
                                     _max_stack(max_stack),
                                     _locals(locals), _stack(stack),
                                     _verifier(v) { }
 
+  static StackMapFrame* copy(StackMapFrame* smf) {
+    return new StackMapFrame(*smf);
+  }
+
   inline void set_offset(int32_t offset)      { _offset = offset; }
   inline void set_verifier(ClassVerifier* v)  { _verifier = v; }
   inline void set_flags(u1 flags)             { _flags = flags; }
   inline void set_locals_size(u2 locals_size) { _locals_size = locals_size; }
-  inline void set_stack_size(u2 stack_size)   { _stack_size = stack_size; }
+  inline void set_stack_size(u2 stack_size)   { _stack_size = _stack_mark = stack_size; }
   inline void clear_stack()                   { _stack_size = 0; }
   inline int32_t offset()   const             { return _offset; }
   inline ClassVerifier* verifier() const      { return _verifier; }
@@ -134,14 +170,37 @@
   void copy_stack(const StackMapFrame* src);
 
   // Return true if this stack map frame is assignable to target.
-  bool is_assignable_to(const StackMapFrame* target,
-                        bool is_exception_handler, TRAPS) const;
+  bool is_assignable_to(
+      const StackMapFrame* target, bool is_exception_handler,
+      ErrorContext* ctx, TRAPS) const;
+
+  inline void set_mark() {
+#ifdef DEBUG
+    // Put bogus type to indicate it's no longer valid.
+    if (_stack_mark != -1) {
+      for (int i = _stack_mark; i >= _stack_size; --i) {
+        _stack[i] = VerificationType::bogus_type();
+      }
+    }
+#endif // def DEBUG
+    _stack_mark = _stack_size;
+  }
+
+  // Used when an error occurs and we want to reset the stack to the state
+  // it was before operands were popped off.
+  void restore() {
+    if (_stack_mark != -1) {
+      _stack_size = _stack_mark;
+    }
+  }
 
   // Push type into stack type array.
   inline void push_stack(VerificationType type, TRAPS) {
     assert(!type.is_check(), "Must be a real type");
     if (_stack_size >= _max_stack) {
-      verifier()->verify_error(_offset, "Operand stack overflow");
+      verifier()->verify_error(
+          ErrorContext::stack_overflow(_offset, this),
+          "Operand stack overflow");
       return;
     }
     _stack[_stack_size++] = type;
@@ -152,7 +211,9 @@
     assert(type1.is_long() || type1.is_double(), "must be long/double");
     assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
     if (_stack_size >= _max_stack - 1) {
-      verifier()->verify_error(_offset, "Operand stack overflow");
+      verifier()->verify_error(
+          ErrorContext::stack_overflow(_offset, this),
+          "Operand stack overflow");
       return;
     }
     _stack[_stack_size++] = type1;
@@ -162,13 +223,12 @@
   // Pop and return the top type on stack without verifying.
   inline VerificationType pop_stack(TRAPS) {
     if (_stack_size <= 0) {
-      verifier()->verify_error(_offset, "Operand stack underflow");
+      verifier()->verify_error(
+          ErrorContext::stack_underflow(_offset, this),
+          "Operand stack underflow");
       return VerificationType::bogus_type();
     }
-    // Put bogus type to indicate it's no longer valid.
-    // Added to make it consistent with the other pop_stack method.
     VerificationType top = _stack[--_stack_size];
-    NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
     return top;
   }
 
@@ -180,8 +240,7 @@
       bool subtype = type.is_assignable_from(
         top, verifier(), CHECK_(VerificationType::bogus_type()));
       if (subtype) {
-        _stack_size --;
-        NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
+        --_stack_size;
         return top;
       }
     }
@@ -199,8 +258,6 @@
       bool subtype2 = type2.is_assignable_from(top2, verifier(), CHECK);
       if (subtype1 && subtype2) {
         _stack_size -= 2;
-        NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
-        NOT_PRODUCT( _stack[_stack_size+1] = VerificationType::bogus_type(); )
         return;
       }
     }
@@ -208,6 +265,14 @@
     pop_stack_ex(type2, THREAD);
   }
 
+  VerificationType local_at(int index) {
+    return _locals[index];
+  }
+
+  VerificationType stack_at(int index) {
+    return _stack[index];
+  }
+
   // Uncommon case that throws exceptions.
   VerificationType pop_stack_ex(VerificationType type, TRAPS);
 
@@ -226,13 +291,14 @@
 
   // Private auxiliary method used only in is_assignable_to(StackMapFrame).
   // Returns true if src is assignable to target.
-  bool is_assignable_to(
+  int is_assignable_to(
     VerificationType* src, VerificationType* target, int32_t len, TRAPS) const;
 
   bool has_flag_match_exception(const StackMapFrame* target) const;
 
-  // Debugging
-  void print() const PRODUCT_RETURN;
+  TypeOrigin stack_top_ctx();
+
+  void print_on(outputStream* str) const;
 };
 
 #endif // SHARE_VM_CLASSFILE_STACKMAPFRAME_HPP
--- a/src/share/vm/classfile/stackMapTable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/stackMapTable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,9 @@
       _frame_array[i] = frame;
       int offset = frame->offset();
       if (offset >= code_len || code_data[offset] == 0) {
-        frame->verifier()->verify_error("StackMapTable error: bad offset");
+        frame->verifier()->verify_error(
+            ErrorContext::bad_stackmap(i, frame),
+            "StackMapTable error: bad offset");
         return;
       }
       pre_frame = frame;
@@ -68,12 +70,9 @@
 
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target,
-    bool match, bool update, TRAPS) const {
+    bool match, bool update, ErrorContext* ctx, TRAPS) const {
   int index = get_index_from_offset(target);
-
-  return match_stackmap(
-    frame, target, index, match,
-    update, CHECK_VERIFY_(frame->verifier(), false));
+  return match_stackmap(frame, target, index, match, update, ctx, THREAD);
 }
 
 // Match and/or update current_frame to the frame in stackmap table with
@@ -88,23 +87,23 @@
 // unconditional branch:                                 true   true
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target, int32_t frame_index,
-    bool match, bool update, TRAPS) const {
+    bool match, bool update, ErrorContext* ctx, TRAPS) const {
   if (frame_index < 0 || frame_index >= _frame_count) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Expecting a stackmap frame at branch target %d", target);
+    *ctx = ErrorContext::missing_stackmap(frame->offset());
+    frame->verifier()->verify_error(
+        *ctx, "Expecting a stackmap frame at branch target %d", target);
     return false;
   }
 
+  StackMapFrame *stackmap_frame = _frame_array[frame_index];
   bool result = true;
-  StackMapFrame *stackmap_frame = _frame_array[frame_index];
   if (match) {
     // when checking handler target, match == true && update == false
     bool is_exception_handler = !update;
     // Has direct control flow from last instruction, need to match the two
     // frames.
-    result = frame->is_assignable_to(
-      stackmap_frame, is_exception_handler,
-      CHECK_VERIFY_(frame->verifier(), false));
+    result = frame->is_assignable_to(stackmap_frame, is_exception_handler,
+        ctx, CHECK_VERIFY_(frame->verifier(), result));
   }
   if (update) {
     // Use the frame in stackmap table as current frame
@@ -125,11 +124,12 @@
 
 void StackMapTable::check_jump_target(
     StackMapFrame* frame, int32_t target, TRAPS) const {
+  ErrorContext ctx;
   bool match = match_stackmap(
-    frame, target, true, false, CHECK_VERIFY(frame->verifier()));
+    frame, target, true, false, &ctx, CHECK_VERIFY(frame->verifier()));
   if (!match || (target < 0 || target >= _code_length)) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Inconsistent stackmap frames at branch target %d", target);
+    frame->verifier()->verify_error(ctx,
+        "Inconsistent stackmap frames at branch target %d", target);
     return;
   }
   // check if uninitialized objects exist on backward branches
@@ -139,25 +139,25 @@
 void StackMapTable::check_new_object(
     const StackMapFrame* frame, int32_t target, TRAPS) const {
   if (frame->offset() > target && frame->has_new_object()) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Uninitialized object exists on backward branch %d", target);
+    frame->verifier()->verify_error(
+        ErrorContext::bad_code(frame->offset()),
+        "Uninitialized object exists on backward branch %d", target);
     return;
   }
 }
 
-#ifndef PRODUCT
-
-void StackMapTable::print() const {
-  tty->print_cr("StackMapTable: frame_count = %d", _frame_count);
-  tty->print_cr("table = { ");
-  for (int32_t i = 0; i < _frame_count; i++) {
-    _frame_array[i]->print();
+void StackMapTable::print_on(outputStream* str) const {
+  str->indent().print_cr("StackMapTable: frame_count = %d", _frame_count);
+  str->indent().print_cr("table = { ");
+  {
+    streamIndentor si(str);
+    for (int32_t i = 0; i < _frame_count; ++i) {
+      _frame_array[i]->print_on(str);
+    }
   }
-  tty->print_cr(" }");
+  str->print_cr(" }");
 }
 
-#endif
-
 int32_t StackMapReader::chop(
     VerificationType* locals, int32_t length, int32_t chops) {
   if (locals == NULL) return -1;
--- a/src/share/vm/classfile/stackMapTable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/stackMapTable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #define SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP
 
 #include "classfile/stackMapFrame.hpp"
+#include "classfile/verifier.hpp"
 #include "memory/allocation.hpp"
 #include "oops/constantPoolOop.hpp"
 #include "oops/methodOop.hpp"
@@ -73,12 +74,12 @@
   // specified offset. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset,
-    bool match, bool update, TRAPS) const;
+    bool match, bool update, ErrorContext* ctx, TRAPS) const;
   // Match and/or update current_frame to the frame in stackmap table with
   // specified offset and frame index. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset, int32_t frame_index,
-    bool match, bool update, TRAPS) const;
+    bool match, bool update, ErrorContext* ctx, TRAPS) const;
 
   // Check jump instructions. Make sure there are no uninitialized
   // instances on backward branch.
@@ -93,8 +94,7 @@
   void check_new_object(
     const StackMapFrame* frame, int32_t target, TRAPS) const;
 
-  // Debugging
-  void print() const PRODUCT_RETURN;
+  void print_on(outputStream* str) const;
 };
 
 class StackMapStream : StackObj {
--- a/src/share/vm/classfile/stackMapTableFormat.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/stackMapTableFormat.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -135,7 +135,6 @@
                 !is_object() && !is_uninitialized()));
   }
 
-#ifdef ASSERT
   void print_on(outputStream* st) {
     switch (tag()) {
       case ITEM_Top: st->print("Top"); break;
@@ -154,14 +153,13 @@
         assert(false, "Bad verification_type_info");
     }
   }
-#endif
 };
 
 #define FOR_EACH_STACKMAP_FRAME_TYPE(macro, arg1, arg2) \
   macro(same_frame, arg1, arg2) \
   macro(same_frame_extended, arg1, arg2) \
-  macro(same_frame_1_stack_item_frame, arg1, arg2) \
-  macro(same_frame_1_stack_item_extended, arg1, arg2) \
+  macro(same_locals_1_stack_item_frame, arg1, arg2) \
+  macro(same_locals_1_stack_item_extended, arg1, arg2) \
   macro(chop_frame, arg1, arg2) \
   macro(append_frame, arg1, arg2) \
   macro(full_frame, arg1, arg2)
@@ -203,9 +201,8 @@
   // that we don't read past a particular memory limit.  It returns false
   // if any part of the data structure is outside the specified memory bounds.
   inline bool verify(address start, address end) const;
-#ifdef ASSERT
-  inline void print_on(outputStream* st) const;
-#endif
+
+  inline void print_on(outputStream* st, int current_offset) const;
 
   // Create as_xxx and is_xxx methods for the subtypes
 #define FRAME_TYPE_DECL(stackmap_frame_type, arg1, arg2) \
@@ -263,11 +260,9 @@
     return true;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame(%d)", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_frame(@%d)", offset_delta() + current_offset);
   }
-#endif
 };
 
 class same_frame_extended : public stack_map_frame {
@@ -311,14 +306,12 @@
     return frame_type_addr() + size() <= end;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_extended(%d)", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_frame_extended(@%d)", offset_delta() + current_offset);
   }
-#endif
 };
 
-class same_frame_1_stack_item_frame : public stack_map_frame {
+class same_locals_1_stack_item_frame : public stack_map_frame {
  private:
   address type_addr() const { return frame_type_addr() + sizeof(u1); }
 
@@ -332,14 +325,14 @@
     return tag >= 64 && tag < 128;
   }
 
-  static same_frame_1_stack_item_frame* at(address addr) {
+  static same_locals_1_stack_item_frame* at(address addr) {
     assert(is_frame_type(*addr), "Wrong frame id");
-    return (same_frame_1_stack_item_frame*)addr;
+    return (same_locals_1_stack_item_frame*)addr;
   }
 
-  static same_frame_1_stack_item_frame* create_at(
+  static same_locals_1_stack_item_frame* create_at(
       address addr, int offset_delta, verification_type_info* vti) {
-    same_frame_1_stack_item_frame* sm = (same_frame_1_stack_item_frame*)addr;
+    same_locals_1_stack_item_frame* sm = (same_locals_1_stack_item_frame*)addr;
     sm->set_offset_delta(offset_delta);
     if (vti != NULL) {
       sm->set_type(vti);
@@ -382,16 +375,15 @@
     return types()->verify(start, end);
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_1_stack_item_frame(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_locals_1_stack_item_frame(@%d,",
+        offset_delta() + current_offset);
     types()->print_on(st);
     st->print(")");
   }
-#endif
 };
 
-class same_frame_1_stack_item_extended : public stack_map_frame {
+class same_locals_1_stack_item_extended : public stack_map_frame {
  private:
   address offset_delta_addr() const { return frame_type_addr() + sizeof(u1); }
   address type_addr() const { return offset_delta_addr() + sizeof(u2); }
@@ -403,15 +395,15 @@
     return tag == _frame_id;
   }
 
-  static same_frame_1_stack_item_extended* at(address addr) {
+  static same_locals_1_stack_item_extended* at(address addr) {
     assert(is_frame_type(*addr), "Wrong frame id");
-    return (same_frame_1_stack_item_extended*)addr;
+    return (same_locals_1_stack_item_extended*)addr;
   }
 
-  static same_frame_1_stack_item_extended* create_at(
+  static same_locals_1_stack_item_extended* create_at(
       address addr, int offset_delta, verification_type_info* vti) {
-    same_frame_1_stack_item_extended* sm =
-       (same_frame_1_stack_item_extended*)addr;
+    same_locals_1_stack_item_extended* sm =
+       (same_locals_1_stack_item_extended*)addr;
     sm->set_frame_type(_frame_id);
     sm->set_offset_delta(offset_delta);
     if (vti != NULL) {
@@ -448,13 +440,12 @@
     return type_addr() < end && types()->verify(start, end);
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_1_stack_item_extended(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_locals_1_stack_item_extended(@%d,",
+        offset_delta() + current_offset);
     types()->print_on(st);
     st->print(")");
   }
-#endif
 };
 
 class chop_frame : public stack_map_frame {
@@ -517,11 +508,9 @@
     return frame_type_addr() + size() <= end;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("chop_frame(%d,%d)", offset_delta(), chops());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("chop_frame(@%d,%d)", offset_delta() + current_offset, chops());
   }
-#endif
 };
 
 class append_frame : public stack_map_frame {
@@ -618,9 +607,8 @@
     return false;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("append_frame(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("append_frame(@%d,", offset_delta() + current_offset);
     verification_type_info* vti = types();
     for (int i = 0; i < number_of_types(); ++i) {
       vti->print_on(st);
@@ -631,7 +619,6 @@
     }
     st->print(")");
   }
-#endif
 };
 
 class full_frame : public stack_map_frame {
@@ -774,9 +761,8 @@
     return true;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("full_frame(%d,{", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("full_frame(@%d,{", offset_delta() + current_offset);
     verification_type_info* vti = locals();
     for (int i = 0; i < num_locals(); ++i) {
       vti->print_on(st);
@@ -798,7 +784,6 @@
     }
     st->print("})");
   }
-#endif
 };
 
 #define VIRTUAL_DISPATCH(stack_frame_type, func_name, args) \
@@ -852,11 +837,9 @@
   return false;
 }
 
-#ifdef ASSERT
-void stack_map_frame::print_on(outputStream* st) const {
-  FOR_EACH_STACKMAP_FRAME_TYPE(VOID_VIRTUAL_DISPATCH, print_on, (st));
+void stack_map_frame::print_on(outputStream* st, int offs = -1) const {
+  FOR_EACH_STACKMAP_FRAME_TYPE(VOID_VIRTUAL_DISPATCH, print_on, (st, offs));
 }
-#endif
 
 #undef VIRTUAL_DISPATCH
 #undef VOID_VIRTUAL_DISPATCH
@@ -873,16 +856,46 @@
 FOR_EACH_STACKMAP_FRAME_TYPE(AS_SUBTYPE_DEF, x, x)
 #undef AS_SUBTYPE_DEF
 
+class stack_map_table {
+ private:
+  address number_of_entries_addr() const {
+    return (address)this;
+  }
+  address entries_addr() const {
+    return number_of_entries_addr() + sizeof(u2);
+  }
+
+ protected:
+  // No constructors  - should be 'private', but GCC issues a warning if it is
+  stack_map_table() {}
+  stack_map_table(const stack_map_table&) {}
+
+ public:
+
+  static stack_map_table* at(address addr) {
+    return (stack_map_table*)addr;
+  }
+
+  u2 number_of_entries() const {
+    return Bytes::get_Java_u2(number_of_entries_addr());
+  }
+  stack_map_frame* entries() const {
+    return stack_map_frame::at(entries_addr());
+  }
+
+  void set_number_of_entries(u2 num) {
+    Bytes::put_Java_u2(number_of_entries_addr(), num);
+  }
+};
+
 class stack_map_table_attribute {
  private:
   address name_index_addr() const {
       return (address)this; }
   address attribute_length_addr() const {
       return name_index_addr() + sizeof(u2); }
-  address number_of_entries_addr() const {
+  address stack_map_table_addr() const {
       return attribute_length_addr() + sizeof(u4); }
-  address entries_addr() const {
-      return number_of_entries_addr() + sizeof(u2); }
 
  protected:
   // No constructors  - should be 'private', but GCC issues a warning if it is
@@ -896,17 +909,11 @@
   }
 
   u2 name_index() const {
-       return Bytes::get_Java_u2(name_index_addr()); }
+    return Bytes::get_Java_u2(name_index_addr()); }
   u4 attribute_length() const {
-      return Bytes::get_Java_u4(attribute_length_addr()); }
-  u2 number_of_entries() const {
-      return Bytes::get_Java_u2(number_of_entries_addr()); }
-  stack_map_frame* entries() const {
-    return stack_map_frame::at(entries_addr());
-  }
-
-  static size_t header_size() {
-      return sizeof(u2) + sizeof(u4);
+    return Bytes::get_Java_u4(attribute_length_addr()); }
+  stack_map_table* table() const {
+    return stack_map_table::at(stack_map_table_addr());
   }
 
   void set_name_index(u2 idx) {
@@ -915,9 +922,8 @@
   void set_attribute_length(u4 len) {
     Bytes::put_Java_u4(attribute_length_addr(), len);
   }
-  void set_number_of_entries(u2 num) {
-    Bytes::put_Java_u2(number_of_entries_addr(), num);
-  }
 };
 
+#undef FOR_EACH_STACKMAP_FRAME_TYPE
+
 #endif // SHARE_VM_CLASSFILE_STACKMAPTABLEFORMAT_HPP
--- a/src/share/vm/classfile/symbolTable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/symbolTable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
+#include "memory/allocation.inline.hpp"
 #include "memory/filemap.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "oops/oop.inline.hpp"
@@ -39,22 +40,40 @@
 // --------------------------------------------------------------------------
 
 SymbolTable* SymbolTable::_the_table = NULL;
+// Static arena for symbols that are not deallocated
+Arena* SymbolTable::_arena = NULL;
 bool SymbolTable::_needs_rehashing = false;
 
-Symbol* SymbolTable::allocate_symbol(const u1* name, int len, TRAPS) {
+Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
   assert (len <= Symbol::max_length(), "should be checked by caller");
 
-  Symbol* sym = new (len) Symbol(name, len);
+  Symbol* sym;
+  // Allocate symbols in the C heap when dumping shared spaces in case there
+  // are temporary symbols we can remove.
+  if (c_heap || DumpSharedSpaces) {
+    // refcount starts as 1
+    sym = new (len, THREAD) Symbol(name, len, 1);
+  } else {
+    sym = new (len, arena(), THREAD) Symbol(name, len, -1);
+  }
   assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
   return sym;
 }
 
+void SymbolTable::initialize_symbols(int arena_alloc_size) {
+  // Initialize the arena for global symbols, size passed in depends on CDS.
+  if (arena_alloc_size == 0) {
+    _arena = new (mtSymbol) Arena();
+  } else {
+    _arena = new (mtSymbol) Arena(arena_alloc_size);
+  }
+}
 
 // Call function for all symbols in the symbol table.
 void SymbolTable::symbols_do(SymbolClosure *cl) {
   const int n = the_table()->table_size();
   for (int i = 0; i < n; i++) {
-    for (HashtableEntry<Symbol*>* p = the_table()->bucket(i);
+    for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
          p != NULL;
          p = p->next()) {
       cl->do_symbol(p->literal_addr());
@@ -66,15 +85,14 @@
 int SymbolTable::symbols_counted = 0;
 
 // Remove unreferenced symbols from the symbol table
-// This is done late during GC.  This doesn't use the hash table unlink because
-// it assumes that the literals are oops.
+// This is done late during GC.
 void SymbolTable::unlink() {
   int removed = 0;
   int total = 0;
   size_t memory_total = 0;
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i);
-    HashtableEntry<Symbol*>* entry = the_table()->bucket(i);
+    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
+    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
     while (entry != NULL) {
       // Shared entries are normally at the end of the bucket and if we run into
       // a shared entry, then there is nothing more to remove. However, if we
@@ -98,7 +116,7 @@
         p = entry->next_addr();
       }
       // get next entry
-      entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p);
+      entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
     }
   }
   symbols_removed += removed;
@@ -135,7 +153,7 @@
 Symbol* SymbolTable::lookup(int index, const char* name,
                               int len, unsigned int hash) {
   int count = 0;
-  for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
+  for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
     count++;  // count all entries in this bucket, not just ones with same hash
     if (e->hash() == hash) {
       Symbol* sym = e->literal();
@@ -147,7 +165,7 @@
     }
   }
   // If the bucket size is too deep check if this hash code is insufficient.
-  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+  if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
     _needs_rehashing = check_rehash_table(count);
   }
   return NULL;
@@ -181,7 +199,7 @@
   MutexLocker ml(SymbolTable_lock, THREAD);
 
   // Otherwise, add to symbol to table
-  return the_table()->basic_add(index, (u1*)name, len, hashValue, CHECK_NULL);
+  return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
 }
 
 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
@@ -217,10 +235,10 @@
   // We can't include the code in No_Safepoint_Verifier because of the
   // ResourceMark.
 
-  // Allocation must be done before grabbing the SymbolTable_lock lock
+  // Grab SymbolTable_lock first.
   MutexLocker ml(SymbolTable_lock, THREAD);
 
-  return the_table()->basic_add(index, (u1*)buffer, len, hashValue, CHECK_NULL);
+  return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
 }
 
 Symbol* SymbolTable::lookup_only(const char* name, int len,
@@ -239,7 +257,7 @@
   unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
   int index = the_table()->hash_to_index(hash);
 
-  for (HashtableEntry<Symbol*>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
+  for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
     if (e->hash() == hash) {
       Symbol* literal_sym = e->literal();
       if (sym == literal_sym) {
@@ -284,28 +302,43 @@
   }
 }
 
-void SymbolTable::add(constantPoolHandle cp, int names_count,
+void SymbolTable::add(Handle class_loader, constantPoolHandle cp,
+                      int names_count,
                       const char** names, int* lengths, int* cp_indices,
                       unsigned int* hashValues, TRAPS) {
   // Grab SymbolTable_lock first.
   MutexLocker ml(SymbolTable_lock, THREAD);
 
   SymbolTable* table = the_table();
-  bool added = table->basic_add(cp, names_count, names, lengths,
+  bool added = table->basic_add(class_loader, cp, names_count, names, lengths,
                                 cp_indices, hashValues, CHECK);
   if (!added) {
     // do it the hard way
     for (int i=0; i<names_count; i++) {
       int index = table->hash_to_index(hashValues[i]);
-      Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i],
-                                       hashValues[i], CHECK);
+      bool c_heap = class_loader() != NULL;
+      Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
       cp->symbol_at_put(cp_indices[i], sym);
     }
   }
 }
 
+Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
+  unsigned int hash;
+  Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
+  if (result != NULL) {
+    return result;
+  }
+  // Grab SymbolTable_lock first.
+  MutexLocker ml(SymbolTable_lock, THREAD);
+
+  SymbolTable* table = the_table();
+  int index = table->hash_to_index(hash);
+  return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
+}
+
 Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
-                               unsigned int hashValue_arg, TRAPS) {
+                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
          "proposed name of symbol must be stable");
 
@@ -340,16 +373,18 @@
   }
 
   // Create a new symbol.
-  Symbol* sym = allocate_symbol(name, len, CHECK_NULL);
+  Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
   assert(sym->equals((char*)name, len), "symbol must be properly initialized");
 
-  HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
-  sym->increment_refcount();  // increment refcount in external hashtable
+  HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
   add_entry(index, entry);
   return sym;
 }
 
-bool SymbolTable::basic_add(constantPoolHandle cp, int names_count,
+// This version of basic_add adds symbols in batch from the constant pool
+// parsing.
+bool SymbolTable::basic_add(Handle class_loader, constantPoolHandle cp,
+                            int names_count,
                             const char** names, int* lengths,
                             int* cp_indices, unsigned int* hashValues,
                             TRAPS) {
@@ -384,22 +419,23 @@
       cp->symbol_at_put(cp_indices[i], test);
       assert(test->refcount() != 0, "lookup should have incremented the count");
     } else {
-      Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], CHECK_(false));
+      // Create a new symbol.  The null class loader is never unloaded so these
+      // are allocated specially in a permanent arena.
+      bool c_heap = class_loader() != NULL;
+      Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
       assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
-      HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
-      sym->increment_refcount();  // increment refcount in external hashtable
+      HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
       add_entry(index, entry);
       cp->symbol_at_put(cp_indices[i], sym);
     }
   }
-
   return true;
 }
 
 
 void SymbolTable::verify() {
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    HashtableEntry<Symbol*>* p = the_table()->bucket(i);
+    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
     for ( ; p != NULL; p = p->next()) {
       Symbol* s = (Symbol*)(p->literal());
       guarantee(s != NULL, "symbol is NULL");
@@ -415,7 +451,7 @@
   NumberSeq summary;
   for (int i = 0; i < the_table()->table_size(); ++i) {
     int count = 0;
-    for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
+    for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(i);
        e != NULL; e = e->next()) {
       count++;
     }
@@ -452,7 +488,7 @@
   int memory_total = 0;
   int count = 0;
   for (i = 0; i < the_table()->table_size(); i++) {
-    HashtableEntry<Symbol*>* p = the_table()->bucket(i);
+    HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
     for ( ; p != NULL; p = p->next()) {
       memory_total += p->literal()->object_size();
       count++;
@@ -477,6 +513,8 @@
           ((float)symbols_removed/(float)symbols_counted)* 100);
   }
   tty->print_cr("Reference counts         %5d", Symbol::_total_count);
+  tty->print_cr("Symbol arena size        %5d used %5d",
+                 arena()->size_in_bytes(), arena()->used());
   tty->print_cr("Histogram of symbol length:");
   tty->print_cr("%8s %5d", "Total  ", total);
   tty->print_cr("%8s %5d", "Maximum", max_symbols);
@@ -511,15 +549,15 @@
 
 void SymbolTable::print() {
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i);
-    HashtableEntry<Symbol*>* entry = the_table()->bucket(i);
+    HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
+    HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
     if (entry != NULL) {
       while (entry != NULL) {
         tty->print(PTR_FORMAT " ", entry->literal());
         entry->literal()->print();
         tty->print(" %d", entry->literal()->refcount());
         p = entry->next_addr();
-        entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p);
+        entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
       }
       tty->cr();
     }
@@ -581,7 +619,7 @@
 oop StringTable::lookup(int index, jchar* name,
                         int len, unsigned int hash) {
   int count = 0;
-  for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
+  for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
     count++;
     if (l->hash() == hash) {
       if (java_lang_String::equals(l->literal(), name, len)) {
@@ -590,7 +628,7 @@
     }
   }
   // If the bucket size is too deep check if this hash code is insufficient.
-  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+  if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
     _needs_rehashing = check_rehash_table(count);
   }
   return NULL;
@@ -626,7 +664,7 @@
     return test;
   }
 
-  HashtableEntry<oop>* entry = new_entry(hashValue, string());
+  HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
   add_entry(index, entry);
   return string();
 }
@@ -711,8 +749,8 @@
   // entries at a safepoint.
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    HashtableEntry<oop>** p = the_table()->bucket_addr(i);
-    HashtableEntry<oop>* entry = the_table()->bucket(i);
+    HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
+    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
     while (entry != NULL) {
       // Shared entries are normally at the end of the bucket and if we run into
       // a shared entry, then there is nothing more to remove. However, if we
@@ -728,15 +766,15 @@
         *p = entry->next();
         the_table()->free_entry(entry);
       }
-      entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p);
+      entry = (HashtableEntry<oop, mtSymbol>*)HashtableEntry<oop, mtSymbol>::make_ptr(*p);
     }
   }
 }
 
 void StringTable::oops_do(OopClosure* f) {
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    HashtableEntry<oop>** p = the_table()->bucket_addr(i);
-    HashtableEntry<oop>* entry = the_table()->bucket(i);
+    HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
+    HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
     while (entry != NULL) {
       f->do_oop((oop*)entry->literal_addr());
 
@@ -748,14 +786,14 @@
       } else {
         p = entry->next_addr();
       }
-      entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p);
+      entry = (HashtableEntry<oop, mtSymbol>*)HashtableEntry<oop, mtSymbol>::make_ptr(*p);
     }
   }
 }
 
 void StringTable::verify() {
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    HashtableEntry<oop>* p = the_table()->bucket(i);
+    HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
     for ( ; p != NULL; p = p->next()) {
       oop s = p->literal();
       guarantee(s != NULL, "interned string is NULL");
@@ -771,7 +809,7 @@
 void StringTable::dump(outputStream* st) {
   NumberSeq summary;
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    HashtableEntry<oop>* p = the_table()->bucket(i);
+    HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
     int count = 0;
     for ( ; p != NULL; p = p->next()) {
       count++;
--- a/src/share/vm/classfile/symbolTable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/symbolTable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -57,12 +57,15 @@
 
   // Operator= increments reference count.
   void operator=(const TempNewSymbol &s) {
+    //clear();  //FIXME
     _temp = s._temp;
     if (_temp !=NULL) _temp->increment_refcount();
   }
 
   // Decrement reference counter so it can go away if it's unique
-  ~TempNewSymbol() { if (_temp != NULL) _temp->decrement_refcount(); }
+  void clear() { if (_temp != NULL)  _temp->decrement_refcount();  _temp = NULL; }
+
+  ~TempNewSymbol() { clear(); }
 
   // Operators so they can be used like Symbols
   Symbol* operator -> () const                   { return _temp; }
@@ -71,7 +74,7 @@
   operator Symbol*()                             { return _temp; }
 };
 
-class SymbolTable : public Hashtable<Symbol*> {
+class SymbolTable : public Hashtable<Symbol*, mtSymbol> {
   friend class VMStructs;
   friend class ClassFileParser;
 
@@ -86,23 +89,24 @@
   static int symbols_removed;
   static int symbols_counted;
 
-  Symbol* allocate_symbol(const u1* name, int len, TRAPS);   // Assumes no characters larger than 0x7F
+  Symbol* allocate_symbol(const u1* name, int len, bool c_heap, TRAPS); // Assumes no characters larger than 0x7F
 
   // Adding elements
-  Symbol* basic_add(int index, u1* name, int len,
-                      unsigned int hashValue, TRAPS);
-  bool basic_add(constantPoolHandle cp, int names_count,
+  Symbol* basic_add(int index, u1* name, int len, unsigned int hashValue,
+                    bool c_heap, TRAPS);
+
+  bool basic_add(Handle class_loader, constantPoolHandle cp, int names_count,
                  const char** names, int* lengths, int* cp_indices,
                  unsigned int* hashValues, TRAPS);
 
-  static void new_symbols(constantPoolHandle cp, int names_count,
+  static void new_symbols(Handle class_loader, constantPoolHandle cp,
+                          int names_count,
                           const char** name, int* lengths,
                           int* cp_indices, unsigned int* hashValues,
                           TRAPS) {
-    add(cp, names_count, name, lengths, cp_indices, hashValues, THREAD);
+    add(class_loader, cp, names_count, name, lengths, cp_indices, hashValues, THREAD);
   }
 
-
   // Table size
   enum {
     symbol_table_size = 20011
@@ -111,15 +115,22 @@
   Symbol* lookup(int index, const char* name, int len, unsigned int hash);
 
   SymbolTable()
-    : Hashtable<Symbol*>(symbol_table_size, sizeof (HashtableEntry<Symbol*>)) {}
+    : Hashtable<Symbol*, mtSymbol>(symbol_table_size, sizeof (HashtableEntry<Symbol*, mtSymbol>)) {}
 
-  SymbolTable(HashtableBucket* t, int number_of_entries)
-    : Hashtable<Symbol*>(symbol_table_size, sizeof (HashtableEntry<Symbol*>), t,
+  SymbolTable(HashtableBucket<mtSymbol>* t, int number_of_entries)
+    : Hashtable<Symbol*, mtSymbol>(symbol_table_size, sizeof (HashtableEntry<Symbol*, mtSymbol>), t,
                 number_of_entries) {}
 
+  // Arena for permanent symbols (null class loader) that are never unloaded
+  static Arena*  _arena;
+  static Arena* arena() { return _arena; }  // called for statistics
+
+  static void initialize_symbols(int arena_alloc_size = 0);
 public:
   enum {
-    symbol_alloc_batch_size = 8
+    symbol_alloc_batch_size = 8,
+    // Pick initial size based on java -version size measurements
+    symbol_alloc_arena_size = 360*K
   };
 
   // The symbol table
@@ -128,14 +139,18 @@
   static void create_table() {
     assert(_the_table == NULL, "One symbol table allowed.");
     _the_table = new SymbolTable();
+    initialize_symbols(symbol_alloc_arena_size);
   }
 
-  static void create_table(HashtableBucket* t, int length,
+  static void create_table(HashtableBucket<mtSymbol>* t, int length,
                            int number_of_entries) {
     assert(_the_table == NULL, "One symbol table allowed.");
-    assert(length == symbol_table_size * sizeof(HashtableBucket),
+    assert(length == symbol_table_size * sizeof(HashtableBucket<mtSymbol>),
            "bad shared symbol size.");
     _the_table = new SymbolTable(t, number_of_entries);
+    // if CDS give symbol table a default arena size since most symbols
+    // are already allocated in the shared misc section.
+    initialize_symbols();
   }
 
   static unsigned int hash_symbol(const char* s, int len);
@@ -155,7 +170,7 @@
   static Symbol* lookup_unicode(const jchar* name, int len, TRAPS);
   static Symbol* lookup_only_unicode(const jchar* name, int len, unsigned int& hash);
 
-  static void add(constantPoolHandle cp, int names_count,
+  static void add(Handle class_loader, constantPoolHandle cp, int names_count,
                   const char** names, int* lengths, int* cp_indices,
                   unsigned int* hashValues, TRAPS);
 
@@ -178,6 +193,9 @@
     return lookup(sym, begin, end, THREAD);
   }
 
+  // Create a symbol in the arena for symbols that are not deleted
+  static Symbol* new_permanent_symbol(const char* name, TRAPS);
+
   // Symbol lookup
   static Symbol* lookup(int index, const char* name, int len, TRAPS);
 
@@ -203,13 +221,13 @@
 
   // Sharing
   static void copy_buckets(char** top, char*end) {
-    the_table()->Hashtable<Symbol*>::copy_buckets(top, end);
+    the_table()->Hashtable<Symbol*, mtSymbol>::copy_buckets(top, end);
   }
   static void copy_table(char** top, char*end) {
-    the_table()->Hashtable<Symbol*>::copy_table(top, end);
+    the_table()->Hashtable<Symbol*, mtSymbol>::copy_table(top, end);
   }
   static void reverse(void* boundary = NULL) {
-    the_table()->Hashtable<Symbol*>::reverse(boundary);
+    the_table()->Hashtable<Symbol*, mtSymbol>::reverse(boundary);
   }
 
   // Rehash the symbol table if it gets out of balance
@@ -217,8 +235,7 @@
   static bool needs_rehashing()         { return _needs_rehashing; }
 };
 
-
-class StringTable : public Hashtable<oop> {
+class StringTable : public Hashtable<oop, mtSymbol> {
   friend class VMStructs;
 
 private:
@@ -234,11 +251,11 @@
 
   oop lookup(int index, jchar* chars, int length, unsigned int hashValue);
 
-  StringTable() : Hashtable<oop>((int)StringTableSize,
-                                 sizeof (HashtableEntry<oop>)) {}
+  StringTable() : Hashtable<oop, mtSymbol>((int)StringTableSize,
+                              sizeof (HashtableEntry<oop, mtSymbol>)) {}
 
-  StringTable(HashtableBucket* t, int number_of_entries)
-    : Hashtable<oop>((int)StringTableSize, sizeof (HashtableEntry<oop>), t,
+  StringTable(HashtableBucket<mtSymbol>* t, int number_of_entries)
+    : Hashtable<oop, mtSymbol>((int)StringTableSize, sizeof (HashtableEntry<oop, mtSymbol>), t,
                      number_of_entries) {}
 public:
   // The string table
@@ -249,10 +266,10 @@
     _the_table = new StringTable();
   }
 
-  static void create_table(HashtableBucket* t, int length,
+  static void create_table(HashtableBucket<mtSymbol>* t, int length,
                            int number_of_entries) {
     assert(_the_table == NULL, "One string table allowed.");
-    assert((size_t)length == StringTableSize * sizeof(HashtableBucket),
+    assert((size_t)length == StringTableSize * sizeof(HashtableBucket<mtSymbol>),
            "bad shared string size.");
     _the_table = new StringTable(t, number_of_entries);
   }
@@ -286,13 +303,13 @@
 
   // Sharing
   static void copy_buckets(char** top, char*end) {
-    the_table()->Hashtable<oop>::copy_buckets(top, end);
+    the_table()->Hashtable<oop, mtSymbol>::copy_buckets(top, end);
   }
   static void copy_table(char** top, char*end) {
-    the_table()->Hashtable<oop>::copy_table(top, end);
+    the_table()->Hashtable<oop, mtSymbol>::copy_table(top, end);
   }
   static void reverse() {
-    the_table()->Hashtable<oop>::reverse();
+    the_table()->Hashtable<oop, mtSymbol>::reverse();
   }
 
   // Rehash the symbol table if it gets out of balance
--- a/src/share/vm/classfile/systemDictionary.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/systemDictionary.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,6 +30,7 @@
 #include "classfile/resolutionErrors.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "compiler/compileBroker.hpp"
 #include "interpreter/bytecodeStream.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/gcLocker.hpp"
@@ -193,7 +194,10 @@
 // Forwards to resolve_instance_class_or_null
 
 klassOop SystemDictionary::resolve_or_null(Symbol* class_name, Handle class_loader, Handle protection_domain, TRAPS) {
-  assert(!THREAD->is_Compiler_thread(), "Can not load classes with the Compiler thread");
+  assert(!THREAD->is_Compiler_thread(),
+         err_msg("can not load classes with compiler thread: class=%s, classloader=%s",
+                 class_name->as_C_string(),
+                 class_loader.is_null() ? "null" : class_loader->klass()->klass_part()->name()->as_C_string()));
   if (FieldType::is_array(class_name)) {
     return resolve_array_class_or_null(class_name, class_loader, protection_domain, CHECK_NULL);
   } else if (FieldType::is_obj(class_name)) {
@@ -1168,9 +1172,9 @@
 }
 
 
-void SystemDictionary::set_shared_dictionary(HashtableBucket* t, int length,
+void SystemDictionary::set_shared_dictionary(HashtableBucket<mtClass>* t, int length,
                                              int number_of_entries) {
-  assert(length == _nof_buckets * sizeof(HashtableBucket),
+  assert(length == _nof_buckets * sizeof(HashtableBucket<mtClass>),
          "bad shared dictionary size.");
   _shared_dictionary = new Dictionary(_nof_buckets, t, number_of_entries);
 }
@@ -2358,72 +2362,134 @@
 }
 
 
-methodOop SystemDictionary::find_method_handle_invoke(Symbol* name,
-                                                      Symbol* signature,
-                                                      KlassHandle accessing_klass,
-                                                      TRAPS) {
-  if (!EnableInvokeDynamic)  return NULL;
-  vmSymbols::SID name_id = vmSymbols::find_sid(name);
-  assert(name_id != vmSymbols::NO_SID, "must be a known name");
-  unsigned int hash  = invoke_method_table()->compute_hash(signature, name_id);
+methodHandle SystemDictionary::find_method_handle_intrinsic(vmIntrinsics::ID iid,
+                                                            Symbol* signature,
+                                                            TRAPS) {
+  methodHandle empty;
+  assert(EnableInvokeDynamic, "");
+  assert(MethodHandles::is_signature_polymorphic(iid) &&
+         MethodHandles::is_signature_polymorphic_intrinsic(iid) &&
+         iid != vmIntrinsics::_invokeGeneric,
+         err_msg("must be a known MH intrinsic iid=%d: %s", iid, vmIntrinsics::name_at(iid)));
+
+  unsigned int hash  = invoke_method_table()->compute_hash(signature, iid);
   int          index = invoke_method_table()->hash_to_index(hash);
-  SymbolPropertyEntry* spe = invoke_method_table()->find_entry(index, hash, signature, name_id);
-  methodHandle non_cached_result;
+  SymbolPropertyEntry* spe = invoke_method_table()->find_entry(index, hash, signature, iid);
+  methodHandle m;
   if (spe == NULL || spe->property_oop() == NULL) {
     spe = NULL;
     // Must create lots of stuff here, but outside of the SystemDictionary lock.
-    if (THREAD->is_Compiler_thread())
-      return NULL;              // do not attempt from within compiler
-    bool for_invokeGeneric = (name_id != vmSymbols::VM_SYMBOL_ENUM_NAME(invokeExact_name));
-    bool found_on_bcp = false;
-    Handle mt = find_method_handle_type(signature, accessing_klass,
-                                        for_invokeGeneric,
-                                        found_on_bcp, CHECK_NULL);
-    KlassHandle  mh_klass = SystemDictionaryHandles::MethodHandle_klass();
-    methodHandle m = methodOopDesc::make_invoke_method(mh_klass, name, signature,
-                                                       mt, CHECK_NULL);
+    m = methodOopDesc::make_method_handle_intrinsic(iid, signature, CHECK_(empty));
+    CompileBroker::compile_method(m, InvocationEntryBci, CompLevel_highest_tier,
+                                  methodHandle(), CompileThreshold, "MH", CHECK_(empty));
+
     // Now grab the lock.  We might have to throw away the new method,
     // if a racing thread has managed to install one at the same time.
-    if (found_on_bcp) {
-      MutexLocker ml(SystemDictionary_lock, Thread::current());
-      spe = invoke_method_table()->find_entry(index, hash, signature, name_id);
+    {
+      MutexLocker ml(SystemDictionary_lock, THREAD);
+      spe = invoke_method_table()->find_entry(index, hash, signature, iid);
       if (spe == NULL)
-        spe = invoke_method_table()->add_entry(index, hash, signature, name_id);
-      if (spe->property_oop() == NULL) {
+        spe = invoke_method_table()->add_entry(index, hash, signature, iid);
+      if (spe->property_oop() == NULL)
         spe->set_property_oop(m());
-        // Link m to his method type, if it is suitably generic.
-        oop mtform = java_lang_invoke_MethodType::form(mt());
-        if (mtform != NULL && mt() == java_lang_invoke_MethodTypeForm::erasedType(mtform)
-            // vmlayout must be an invokeExact:
-            && name_id == vmSymbols::VM_SYMBOL_ENUM_NAME(invokeExact_name)
-            && java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() > 0) {
-          java_lang_invoke_MethodTypeForm::init_vmlayout(mtform, m());
-        }
+    }
+  }
+
+  assert(spe != NULL && spe->property_oop() != NULL, "");
+  m = methodOop(spe->property_oop());
+  assert(m->is_method(), "");
+
+  return m;
+}
+
+// Helper for unpacking the return value from linkMethod and linkCallSite.
+static methodHandle unpack_method_and_appendix(Handle mname,
+                                               objArrayHandle appendix_box,
+                                               Handle* appendix_result,
+                                               TRAPS) {
+  methodHandle empty;
+  if (mname.not_null()) {
+    oop vmtarget = java_lang_invoke_MemberName::vmtarget(mname());
+    if (vmtarget != NULL && vmtarget->is_method()) {
+      methodOop m = methodOop(vmtarget);
+      oop appendix = appendix_box->obj_at(0);
+      if (TraceMethodHandles) {
+    #ifndef PRODUCT
+        tty->print("Linked method="INTPTR_FORMAT": ", m);
+        m->print();
+        if (appendix != NULL) { tty->print("appendix = "); appendix->print(); }
+        tty->cr();
+    #endif //PRODUCT
       }
-    } else {
-      non_cached_result = m;
+      (*appendix_result) = Handle(THREAD, appendix);
+      return methodHandle(THREAD, m);
     }
   }
-  if (spe != NULL && spe->property_oop() != NULL) {
-    assert(spe->property_oop()->is_method(), "");
-    return (methodOop) spe->property_oop();
-  } else {
-    return non_cached_result();
+  THROW_MSG_(vmSymbols::java_lang_LinkageError(), "bad value from MethodHandleNatives", empty);
+  return empty;
+}
+
+methodHandle SystemDictionary::find_method_handle_invoker(Symbol* name,
+                                                          Symbol* signature,
+                                                          KlassHandle accessing_klass,
+                                                          Handle* appendix_result,
+                                                          TRAPS) {
+  methodHandle empty;
+  assert(EnableInvokeDynamic, "");
+  assert(!THREAD->is_Compiler_thread(), "");
+  Handle method_type =
+    SystemDictionary::find_method_handle_type(signature, accessing_klass, CHECK_(empty));
+  if (false) {  // FIXME: Decide if the Java upcall should resolve signatures.
+    method_type = java_lang_String::create_from_symbol(signature, CHECK_(empty));
   }
+
+  KlassHandle  mh_klass = SystemDictionaryHandles::MethodHandle_klass();
+  int ref_kind = JVM_REF_invokeVirtual;
+  Handle name_str = StringTable::intern(name, CHECK_(empty));
+  objArrayHandle appendix_box = oopFactory::new_objArray(SystemDictionary::Object_klass(), 1, CHECK_(empty));
+  assert(appendix_box->obj_at(0) == NULL, "");
+
+  // call java.lang.invoke.MethodHandleNatives::linkMethod(... String, MethodType) -> MemberName
+  JavaCallArguments args;
+  args.push_oop(accessing_klass()->java_mirror());
+  args.push_int(ref_kind);
+  args.push_oop(mh_klass()->java_mirror());
+  args.push_oop(name_str());
+  args.push_oop(method_type());
+  args.push_oop(appendix_box());
+  JavaValue result(T_OBJECT);
+  JavaCalls::call_static(&result,
+                         SystemDictionary::MethodHandleNatives_klass(),
+                         vmSymbols::linkMethod_name(),
+                         vmSymbols::linkMethod_signature(),
+                         &args, CHECK_(empty));
+  Handle mname(THREAD, (oop) result.get_jobject());
+  return unpack_method_and_appendix(mname, appendix_box, appendix_result, THREAD);
 }
 
+
 // Ask Java code to find or construct a java.lang.invoke.MethodType for the given
 // signature, as interpreted relative to the given class loader.
 // Because of class loader constraints, all method handle usage must be
 // consistent with this loader.
 Handle SystemDictionary::find_method_handle_type(Symbol* signature,
                                                  KlassHandle accessing_klass,
-                                                 bool for_invokeGeneric,
-                                                 bool& return_bcp_flag,
                                                  TRAPS) {
+  Handle empty;
+  vmIntrinsics::ID null_iid = vmIntrinsics::_none;  // distinct from all method handle invoker intrinsics
+  unsigned int hash  = invoke_method_table()->compute_hash(signature, null_iid);
+  int          index = invoke_method_table()->hash_to_index(hash);
+  SymbolPropertyEntry* spe = invoke_method_table()->find_entry(index, hash, signature, null_iid);
+  if (spe != NULL && spe->property_oop() != NULL) {
+    assert(java_lang_invoke_MethodType::is_instance(spe->property_oop()), "");
+    return Handle(THREAD, spe->property_oop());
+  } else if (THREAD->is_Compiler_thread()) {
+    warning("SystemDictionary::find_method_handle_type called from compiler thread");  // FIXME
+    return Handle();  // do not attempt from within compiler, unless it was cached
+  }
+
   Handle class_loader, protection_domain;
   bool is_on_bcp = true;  // keep this true as long as we can materialize from the boot classloader
-  Handle empty;
   int npts = ArgumentCount(signature).size();
   objArrayHandle pts = oopFactory::new_objArray(SystemDictionary::Class_klass(), npts, CHECK_(empty));
   int arg = 0;
@@ -2432,6 +2498,7 @@
   for (SignatureStream ss(signature); !ss.is_done(); ss.next()) {
     oop mirror = NULL;
     if (is_on_bcp) {
+      // Note:  class_loader & protection_domain are both null at this point.
       mirror = ss.as_java_mirror(class_loader, protection_domain,
                                  SignatureStream::ReturnNull, CHECK_(empty));
       if (mirror == NULL) {
@@ -2452,9 +2519,11 @@
       rt = Handle(THREAD, mirror);
     else
       pts->obj_at_put(arg++, mirror);
+
     // Check accessibility.
     if (ss.is_object() && accessing_klass.not_null()) {
       klassOop sel_klass = java_lang_Class::as_klassOop(mirror);
+      mirror = NULL;  // safety
       // Emulate constantPoolOopDesc::verify_constant_pool_resolve.
       if (Klass::cast(sel_klass)->oop_is_objArray())
         sel_klass = objArrayKlass::cast(sel_klass)->bottom_klass();
@@ -2477,23 +2546,18 @@
                          &args, CHECK_(empty));
   Handle method_type(THREAD, (oop) result.get_jobject());
 
-  if (for_invokeGeneric) {
-    // call java.lang.invoke.MethodHandleNatives::notifyGenericMethodType(MethodType) -> void
-    JavaCallArguments args(Handle(THREAD, method_type()));
-    JavaValue no_result(T_VOID);
-    JavaCalls::call_static(&no_result,
-                           SystemDictionary::MethodHandleNatives_klass(),
-                           vmSymbols::notifyGenericMethodType_name(),
-                           vmSymbols::notifyGenericMethodType_signature(),
-                           &args, THREAD);
-    if (HAS_PENDING_EXCEPTION) {
-      // If the notification fails, just kill it.
-      CLEAR_PENDING_EXCEPTION;
+  if (is_on_bcp) {
+    // We can cache this MethodType inside the JVM.
+    MutexLocker ml(SystemDictionary_lock, THREAD);
+    spe = invoke_method_table()->find_entry(index, hash, signature, null_iid);
+    if (spe == NULL)
+      spe = invoke_method_table()->add_entry(index, hash, signature, null_iid);
+    if (spe->property_oop() == NULL) {
+      spe->set_property_oop(method_type());
     }
   }
 
-  // report back to the caller with the MethodType and the "on_bcp" flag
-  return_bcp_flag = is_on_bcp;
+  // report back to the caller with the MethodType
   return method_type;
 }
 
@@ -2508,8 +2572,7 @@
   Handle name = java_lang_String::create_from_symbol(name_sym, CHECK_(empty));
   Handle type;
   if (signature->utf8_length() > 0 && signature->byte_at(0) == '(') {
-    bool ignore_is_on_bcp = false;
-    type = find_method_handle_type(signature, caller, false, ignore_is_on_bcp, CHECK_(empty));
+    type = find_method_handle_type(signature, caller, CHECK_(empty));
   } else {
     ResourceMark rm(THREAD);
     SignatureStream ss(signature, false);
@@ -2543,119 +2606,54 @@
 
 // Ask Java code to find or construct a java.lang.invoke.CallSite for the given
 // name and signature, as interpreted relative to the given class loader.
-Handle SystemDictionary::make_dynamic_call_site(Handle bootstrap_method,
-                                                Symbol* name,
-                                                methodHandle signature_invoker,
-                                                Handle info,
-                                                methodHandle caller_method,
-                                                int caller_bci,
-                                                TRAPS) {
-  Handle empty;
-  guarantee(bootstrap_method.not_null() &&
-            java_lang_invoke_MethodHandle::is_instance(bootstrap_method()),
+methodHandle SystemDictionary::find_dynamic_call_site_invoker(KlassHandle caller,
+                                                              Handle bootstrap_specifier,
+                                                              Symbol* name,
+                                                              Symbol* type,
+                                                              Handle* appendix_result,
+                                                              TRAPS) {
+  methodHandle empty;
+  Handle bsm, info;
+  if (java_lang_invoke_MethodHandle::is_instance(bootstrap_specifier())) {
+    bsm = bootstrap_specifier;
+  } else {
+    assert(bootstrap_specifier->is_objArray(), "");
+    objArrayHandle args(THREAD, (objArrayOop) bootstrap_specifier());
+    int len = args->length();
+    assert(len >= 1, "");
+    bsm = Handle(THREAD, args->obj_at(0));
+    if (len > 1) {
+      objArrayOop args1 = oopFactory::new_objArray(SystemDictionary::Object_klass(), len-1, CHECK_(empty));
+      for (int i = 1; i < len; i++)
+        args1->obj_at_put(i-1, args->obj_at(i));
+      info = Handle(THREAD, args1);
+    }
+  }
+  guarantee(java_lang_invoke_MethodHandle::is_instance(bsm()),
             "caller must supply a valid BSM");
 
-  Handle caller_mname = MethodHandles::new_MemberName(CHECK_(empty));
-  MethodHandles::init_MemberName(caller_mname(), caller_method());
-
-  // call java.lang.invoke.MethodHandleNatives::makeDynamicCallSite(bootm, name, mtype, info, caller_mname, caller_pos)
-  oop name_str_oop = StringTable::intern(name, CHECK_(empty)); // not a handle!
-  JavaCallArguments args(Handle(THREAD, bootstrap_method()));
-  args.push_oop(name_str_oop);
-  args.push_oop(signature_invoker->method_handle_type());
+  Handle method_name = java_lang_String::create_from_symbol(name, CHECK_(empty));
+  Handle method_type = find_method_handle_type(type, caller, CHECK_(empty));
+
+  objArrayHandle appendix_box = oopFactory::new_objArray(SystemDictionary::Object_klass(), 1, CHECK_(empty));
+  assert(appendix_box->obj_at(0) == NULL, "");
+
+  // call java.lang.invoke.MethodHandleNatives::linkCallSite(caller, bsm, name, mtype, info, &appendix)
+  JavaCallArguments args;
+  args.push_oop(caller->java_mirror());
+  args.push_oop(bsm());
+  args.push_oop(method_name());
+  args.push_oop(method_type());
   args.push_oop(info());
-  args.push_oop(caller_mname());
-  args.push_int(caller_bci);
+  args.push_oop(appendix_box);
   JavaValue result(T_OBJECT);
   JavaCalls::call_static(&result,
                          SystemDictionary::MethodHandleNatives_klass(),
-                         vmSymbols::makeDynamicCallSite_name(),
-                         vmSymbols::makeDynamicCallSite_signature(),
+                         vmSymbols::linkCallSite_name(),
+                         vmSymbols::linkCallSite_signature(),
                          &args, CHECK_(empty));
-  oop call_site_oop = (oop) result.get_jobject();
-  assert(call_site_oop->is_oop()
-         /*&& java_lang_invoke_CallSite::is_instance(call_site_oop)*/, "must be sane");
-  if (TraceMethodHandles) {
-#ifndef PRODUCT
-    tty->print_cr("Linked invokedynamic bci=%d site="INTPTR_FORMAT":", caller_bci, call_site_oop);
-    call_site_oop->print();
-    tty->cr();
-#endif //PRODUCT
-  }
-  return call_site_oop;
-}
-
-Handle SystemDictionary::find_bootstrap_method(methodHandle caller_method, int caller_bci,
-                                               int cache_index,
-                                               Handle& argument_info_result,
-                                               TRAPS) {
-  Handle empty;
-
-  constantPoolHandle pool;
-  {
-    klassOop caller = caller_method->method_holder();
-    if (!Klass::cast(caller)->oop_is_instance())  return empty;
-    pool = constantPoolHandle(THREAD, instanceKlass::cast(caller)->constants());
-  }
-
-  int constant_pool_index = pool->cache()->entry_at(cache_index)->constant_pool_index();
-  constantTag tag = pool->tag_at(constant_pool_index);
-
-  if (tag.is_invoke_dynamic()) {
-    // JVM_CONSTANT_InvokeDynamic is an ordered pair of [bootm, name&type], plus optional arguments
-    // The bootm, being a JVM_CONSTANT_MethodHandle, has its own cache entry.
-    int bsm_index = pool->invoke_dynamic_bootstrap_method_ref_index_at(constant_pool_index);
-    if (bsm_index != 0) {
-      int bsm_index_in_cache = pool->cache()->entry_at(cache_index)->bootstrap_method_index_in_cache();
-      DEBUG_ONLY(int bsm_index_2 = pool->cache()->entry_at(bsm_index_in_cache)->constant_pool_index());
-      assert(bsm_index == bsm_index_2, "BSM constant lifted to cache");
-      if (TraceMethodHandles) {
-        tty->print_cr("resolving bootstrap method for "PTR_FORMAT" at %d at cache[%d]CP[%d]...",
-                      (intptr_t) caller_method(), caller_bci, cache_index, constant_pool_index);
-      }
-      oop bsm_oop = pool->resolve_cached_constant_at(bsm_index_in_cache, CHECK_(empty));
-      if (TraceMethodHandles) {
-        tty->print_cr("bootstrap method for "PTR_FORMAT" at %d retrieved as "PTR_FORMAT":",
-                      (intptr_t) caller_method(), caller_bci, (intptr_t) bsm_oop);
-      }
-      assert(bsm_oop->is_oop(), "must be sane");
-      // caller must verify that it is of type MethodHandle
-      Handle bsm(THREAD, bsm_oop);
-      bsm_oop = NULL;  // safety
-
-      // Extract the optional static arguments.
-      Handle argument_info;  // either null, or one arg, or Object[]{arg...}
-      int argc = pool->invoke_dynamic_argument_count_at(constant_pool_index);
-      if (TraceInvokeDynamic) {
-        tty->print_cr("find_bootstrap_method: [%d/%d] CONSTANT_InvokeDynamic: %d[%d]",
-                      constant_pool_index, cache_index, bsm_index, argc);
-      }
-      if (argc > 0) {
-        objArrayHandle arg_array;
-        if (argc > 1) {
-          objArrayOop arg_array_oop = oopFactory::new_objArray(SystemDictionary::Object_klass(), argc, CHECK_(empty));
-          arg_array = objArrayHandle(THREAD, arg_array_oop);
-          argument_info = arg_array;
-        }
-        for (int arg_i = 0; arg_i < argc; arg_i++) {
-          int arg_index = pool->invoke_dynamic_argument_index_at(constant_pool_index, arg_i);
-          oop arg_oop = pool->resolve_possibly_cached_constant_at(arg_index, CHECK_(empty));
-          if (arg_array.is_null()) {
-            argument_info = Handle(THREAD, arg_oop);
-          } else {
-            arg_array->obj_at_put(arg_i, arg_oop);
-          }
-        }
-      }
-
-      argument_info_result = argument_info;  // return argument_info to caller
-      return bsm;
-    }
-  } else {
-    ShouldNotReachHere();  // verifier does not allow this
-  }
-
-  return empty;
+  Handle mname(THREAD, (oop) result.get_jobject());
+  return unpack_method_and_appendix(mname, appendix_box, appendix_result, THREAD);
 }
 
 // Since the identity hash code for symbols changes when the symbols are
@@ -2763,7 +2761,7 @@
       class_size += ik->local_interfaces()->size();
       class_size += ik->transitive_interfaces()->size();
       // We do not have to count implementors, since we only store one!
-      class_size += ik->all_fields_count() * FieldInfo::field_slots;
+      class_size += ik->fields()->length();
     }
   }
 
@@ -2771,7 +2769,6 @@
     nmethods++;
     method_size += m->size();
     // class loader uses same objArray for empty vectors, so don't count these
-    if (m->exception_table()->length() != 0)   method_size += m->exception_table()->size();
     if (m->has_stackmap_table()) {
       method_size += m->stackmap_data()->size();
     }
--- a/src/share/vm/classfile/systemDictionary.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/systemDictionary.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,6 +32,7 @@
 #include "runtime/java.hpp"
 #include "runtime/reflectionUtils.hpp"
 #include "utilities/hashtable.hpp"
+#include "utilities/hashtable.inline.hpp"
 
 // The system dictionary stores all loaded classes and maps:
 //
@@ -72,7 +73,7 @@
 class Dictionary;
 class PlaceholderTable;
 class LoaderConstraintTable;
-class HashtableBucket;
+template <MEMFLAGS F> class HashtableBucket;
 class ResolutionErrorTable;
 class SymbolPropertyTable;
 
@@ -147,15 +148,10 @@
   template(MethodHandle_klass,             java_lang_invoke_MethodHandle,             Pre_JSR292) \
   template(MemberName_klass,               java_lang_invoke_MemberName,               Pre_JSR292) \
   template(MethodHandleNatives_klass,      java_lang_invoke_MethodHandleNatives,      Pre_JSR292) \
-  template(AdapterMethodHandle_klass,      java_lang_invoke_AdapterMethodHandle,      Pre_JSR292) \
-  template(BoundMethodHandle_klass,        java_lang_invoke_BoundMethodHandle,        Pre_JSR292) \
-  template(DirectMethodHandle_klass,       java_lang_invoke_DirectMethodHandle,       Pre_JSR292) \
+  template(LambdaForm_klass,               java_lang_invoke_LambdaForm,               Opt)        \
   template(MethodType_klass,               java_lang_invoke_MethodType,               Pre_JSR292) \
-  template(MethodTypeForm_klass,           java_lang_invoke_MethodTypeForm,           Pre_JSR292) \
   template(BootstrapMethodError_klass,     java_lang_BootstrapMethodError,            Pre_JSR292) \
-  template(WrongMethodTypeException_klass, java_lang_invoke_WrongMethodTypeException, Pre_JSR292) \
   template(CallSite_klass,                 java_lang_invoke_CallSite,                 Pre_JSR292) \
-  template(CountingMethodHandle_klass,     java_lang_invoke_CountingMethodHandle,     Opt)        \
   template(ConstantCallSite_klass,         java_lang_invoke_ConstantCallSite,         Pre_JSR292) \
   template(MutableCallSite_klass,          java_lang_invoke_MutableCallSite,          Pre_JSR292) \
   template(VolatileCallSite_klass,         java_lang_invoke_VolatileCallSite,         Pre_JSR292) \
@@ -170,9 +166,6 @@
   /* It's okay if this turns out to be NULL in non-1.4 JDKs. */               \
   template(nio_Buffer_klass,             java_nio_Buffer,                Opt) \
                                                                               \
-  /* If this class isn't present, it won't be referenced. */                  \
-  template(AtomicLongCSImpl_klass,       sun_misc_AtomicLongCSImpl,   Opt)    \
-                                                                              \
   template(DownloadManager_klass,        sun_jkernel_DownloadManager, Opt_Kernel) \
                                                                               \
   template(PostVMInitHook_klass,         sun_misc_PostVMInitHook, Opt)        \
@@ -366,7 +359,7 @@
   static void copy_buckets(char** top, char* end);
   static void copy_table(char** top, char* end);
   static void reverse();
-  static void set_shared_dictionary(HashtableBucket* t, int length,
+  static void set_shared_dictionary(HashtableBucket<mtClass>* t, int length,
                                     int number_of_entries);
   // Printing
   static void print()                   PRODUCT_RETURN;
@@ -487,17 +480,24 @@
                                        Handle loader2, bool is_method, TRAPS);
 
   // JSR 292
-  // find the java.lang.invoke.MethodHandles::invoke method for a given signature
-  static methodOop find_method_handle_invoke(Symbol* name,
-                                             Symbol* signature,
-                                             KlassHandle accessing_klass,
-                                             TRAPS);
-  // ask Java to compute a java.lang.invoke.MethodType object for a given signature
+  // find a java.lang.invoke.MethodHandle.invoke* method for a given signature
+  // (asks Java to compute it if necessary, except in a compiler thread)
+  static methodHandle find_method_handle_invoker(Symbol* name,
+                                                 Symbol* signature,
+                                                 KlassHandle accessing_klass,
+                                                 Handle *appendix_result,
+                                                 TRAPS);
+  // for a given signature, find the internal MethodHandle method (linkTo* or invokeBasic)
+  // (does not ask Java, since this is a low-level intrinsic defined by the JVM)
+  static methodHandle find_method_handle_intrinsic(vmIntrinsics::ID iid,
+                                                   Symbol* signature,
+                                                   TRAPS);
+  // find a java.lang.invoke.MethodType object for a given signature
+  // (asks Java to compute it if necessary, except in a compiler thread)
   static Handle    find_method_handle_type(Symbol* signature,
                                            KlassHandle accessing_klass,
-                                           bool for_invokeGeneric,
-                                           bool& return_bcp_flag,
                                            TRAPS);
+
   // ask Java to compute a java.lang.invoke.MethodHandle object for a given CP entry
   static Handle    link_method_handle_constant(KlassHandle caller,
                                                int ref_kind, //e.g., JVM_REF_invokeVirtual
@@ -505,23 +505,14 @@
                                                Symbol* name,
                                                Symbol* signature,
                                                TRAPS);
+
   // ask Java to create a dynamic call site, while linking an invokedynamic op
-  static Handle    make_dynamic_call_site(Handle bootstrap_method,
-                                          // Callee information:
-                                          Symbol* name,
-                                          methodHandle signature_invoker,
-                                          Handle info,
-                                          // Caller information:
-                                          methodHandle caller_method,
-                                          int caller_bci,
-                                          TRAPS);
-
-  // coordinate with Java about bootstrap methods
-  static Handle    find_bootstrap_method(methodHandle caller_method,
-                                         int caller_bci,  // N.B. must be an invokedynamic
-                                         int cache_index, // must be corresponding main_entry
-                                         Handle &argument_info_result, // static BSM arguments, if any
-                                         TRAPS);
+  static methodHandle find_dynamic_call_site_invoker(KlassHandle caller,
+                                                     Handle bootstrap_method,
+                                                     Symbol* name,
+                                                     Symbol* type,
+                                                     Handle *appendix_result,
+                                                     TRAPS);
 
   // Utility for printing loader "name" as part of tracing constraints
   static const char* loader_name(oop loader) {
--- a/src/share/vm/classfile/verificationType.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/verificationType.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -110,34 +110,34 @@
   }
 }
 
-#ifndef PRODUCT
-
 void VerificationType::print_on(outputStream* st) const {
   switch (_u._data) {
-    case Bogus:            st->print(" bogus "); break;
-    case Category1:        st->print(" category1 "); break;
-    case Category2:        st->print(" category2 "); break;
-    case Category2_2nd:    st->print(" category2_2nd "); break;
-    case Boolean:          st->print(" boolean "); break;
-    case Byte:             st->print(" byte "); break;
-    case Short:            st->print(" short "); break;
-    case Char:             st->print(" char "); break;
-    case Integer:          st->print(" integer "); break;
-    case Float:            st->print(" float "); break;
-    case Long:             st->print(" long "); break;
-    case Double:           st->print(" double "); break;
-    case Long_2nd:         st->print(" long_2nd "); break;
-    case Double_2nd:       st->print(" double_2nd "); break;
-    case Null:             st->print(" null "); break;
+    case Bogus:            st->print("top"); break;
+    case Category1:        st->print("category1"); break;
+    case Category2:        st->print("category2"); break;
+    case Category2_2nd:    st->print("category2_2nd"); break;
+    case Boolean:          st->print("boolean"); break;
+    case Byte:             st->print("byte"); break;
+    case Short:            st->print("short"); break;
+    case Char:             st->print("char"); break;
+    case Integer:          st->print("integer"); break;
+    case Float:            st->print("float"); break;
+    case Long:             st->print("long"); break;
+    case Double:           st->print("double"); break;
+    case Long_2nd:         st->print("long_2nd"); break;
+    case Double_2nd:       st->print("double_2nd"); break;
+    case Null:             st->print("null"); break;
+    case ReferenceQuery:   st->print("reference type"); break;
+    case Category1Query:   st->print("category1 type"); break;
+    case Category2Query:   st->print("category2 type"); break;
+    case Category2_2ndQuery: st->print("category2_2nd type"); break;
     default:
       if (is_uninitialized_this()) {
-        st->print(" uninitializedThis ");
+        st->print("uninitializedThis");
       } else if (is_uninitialized()) {
-        st->print(" uninitialized %d ", bci());
+        st->print("uninitialized %d", bci());
       } else {
-        st->print(" class %s ", name()->as_klass_external_name());
+        name()->print_value_on(st);
       }
   }
 }
-
-#endif
--- a/src/share/vm/classfile/verificationType.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/verificationType.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -157,7 +157,7 @@
 
   // For reference types, store the actual Symbol
   static VerificationType reference_type(Symbol* sh) {
-      assert(((uintptr_t)sh & 0x3) == 0, "Oops must be aligned");
+      assert(((uintptr_t)sh & 0x3) == 0, "Symbols must be aligned");
       // If the above assert fails in the future because oop* isn't aligned,
       // then this type encoding system will have to change to have a tag value
       // to descriminate between oops and primitives.
@@ -303,7 +303,7 @@
     return index;
   }
 
-  void print_on(outputStream* st) const PRODUCT_RETURN;
+  void print_on(outputStream* st) const;
 
  private:
 
--- a/src/share/vm/classfile/verifier.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/verifier.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,9 +26,12 @@
 #include "classfile/classFileStream.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/stackMapTable.hpp"
+#include "classfile/stackMapFrame.hpp"
+#include "classfile/stackMapTableFormat.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/verifier.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "interpreter/bytecodes.hpp"
 #include "interpreter/bytecodeStream.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
@@ -110,8 +113,11 @@
   Symbol* exception_name = NULL;
   const size_t message_buffer_len = klass->name()->utf8_length() + 1024;
   char* message_buffer = NEW_RESOURCE_ARRAY(char, message_buffer_len);
+  char* exception_message = message_buffer;
 
   const char* klassName = klass->external_name();
+  bool can_failover = FailOverToOldVerifier &&
+      klass->major_version() < NOFAILOVER_MAJOR_VERSION;
 
   // If the class should be verified, first see if we can use the split
   // verifier.  If not, or if verification fails and FailOverToOldVerifier
@@ -122,27 +128,28 @@
     }
     if (UseSplitVerifier &&
         klass->major_version() >= STACKMAP_ATTRIBUTE_MAJOR_VERSION) {
-        ClassVerifier split_verifier(
-          klass, message_buffer, message_buffer_len, THREAD);
-        split_verifier.verify_class(THREAD);
-        exception_name = split_verifier.result();
-      if (klass->major_version() < NOFAILOVER_MAJOR_VERSION &&
-          FailOverToOldVerifier && !HAS_PENDING_EXCEPTION &&
+      ClassVerifier split_verifier(klass, THREAD);
+      split_verifier.verify_class(THREAD);
+      exception_name = split_verifier.result();
+      if (can_failover && !HAS_PENDING_EXCEPTION &&
           (exception_name == vmSymbols::java_lang_VerifyError() ||
            exception_name == vmSymbols::java_lang_ClassFormatError())) {
-        if (TraceClassInitialization) {
+        if (TraceClassInitialization || VerboseVerification) {
           tty->print_cr(
             "Fail over class verification to old verifier for: %s", klassName);
         }
         exception_name = inference_verify(
           klass, message_buffer, message_buffer_len, THREAD);
       }
+      if (exception_name != NULL) {
+        exception_message = split_verifier.exception_message();
+      }
     } else {
       exception_name = inference_verify(
           klass, message_buffer, message_buffer_len, THREAD);
     }
 
-    if (TraceClassInitialization) {
+    if (TraceClassInitialization || VerboseVerification) {
       if (HAS_PENDING_EXCEPTION) {
         tty->print("Verification for %s has", klassName);
         tty->print_cr(" exception pending %s ",
@@ -173,7 +180,7 @@
       kls = kls->super();
     }
     message_buffer[message_buffer_len - 1] = '\0'; // just to be sure
-    THROW_MSG_(exception_name, message_buffer, false);
+    THROW_MSG_(exception_name, exception_message, false);
   }
 }
 
@@ -221,7 +228,7 @@
   }
 
   ResourceMark rm(THREAD);
-  if (ClassVerifier::_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying class %s with old format", klass->external_name());
   }
 
@@ -265,14 +272,252 @@
   }
 }
 
+TypeOrigin TypeOrigin::null() {
+  return TypeOrigin();
+}
+TypeOrigin TypeOrigin::local(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(CF_LOCALS, index, StackMapFrame::copy(frame),
+     frame->local_at(index));
+}
+TypeOrigin TypeOrigin::stack(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(CF_STACK, index, StackMapFrame::copy(frame),
+      frame->stack_at(index));
+}
+TypeOrigin TypeOrigin::sm_local(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(SM_LOCALS, index, StackMapFrame::copy(frame),
+      frame->local_at(index));
+}
+TypeOrigin TypeOrigin::sm_stack(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(SM_STACK, index, StackMapFrame::copy(frame),
+      frame->stack_at(index));
+}
+TypeOrigin TypeOrigin::bad_index(u2 index) {
+  return TypeOrigin(BAD_INDEX, index, NULL, VerificationType::bogus_type());
+}
+TypeOrigin TypeOrigin::cp(u2 index, VerificationType vt) {
+  return TypeOrigin(CONST_POOL, index, NULL, vt);
+}
+TypeOrigin TypeOrigin::signature(VerificationType vt) {
+  return TypeOrigin(SIG, 0, NULL, vt);
+}
+TypeOrigin TypeOrigin::implicit(VerificationType t) {
+  return TypeOrigin(IMPLICIT, 0, NULL, t);
+}
+TypeOrigin TypeOrigin::frame(StackMapFrame* frame) {
+  return TypeOrigin(FRAME_ONLY, 0, StackMapFrame::copy(frame),
+                    VerificationType::bogus_type());
+}
+
+void TypeOrigin::reset_frame() {
+  if (_frame != NULL) {
+    _frame->restore();
+  }
+}
+
+void TypeOrigin::details(outputStream* ss) const {
+  _type.print_on(ss);
+  switch (_origin) {
+    case CF_LOCALS:
+      ss->print(" (current frame, locals[%d])", _index);
+      break;
+    case CF_STACK:
+      ss->print(" (current frame, stack[%d])", _index);
+      break;
+    case SM_LOCALS:
+      ss->print(" (stack map, locals[%d])", _index);
+      break;
+    case SM_STACK:
+      ss->print(" (stack map, stack[%d])", _index);
+      break;
+    case CONST_POOL:
+      ss->print(" (constant pool %d)", _index);
+      break;
+    case SIG:
+      ss->print(" (from method signature)");
+      break;
+    case IMPLICIT:
+    case FRAME_ONLY:
+    case NONE:
+    default:
+      ;
+  }
+}
+
+#ifdef ASSERT
+void TypeOrigin::print_on(outputStream* str) const {
+  str->print("{%d,%d,%p:", _origin, _index, _frame);
+  if (_frame != NULL) {
+    _frame->print_on(str);
+  } else {
+    str->print("null");
+  }
+  str->print(",");
+  _type.print_on(str);
+  str->print("}");
+}
+#endif
+
+void ErrorContext::details(outputStream* ss, methodOop method) const {
+  if (is_valid()) {
+    ss->print_cr("");
+    ss->print_cr("Exception Details:");
+    location_details(ss, method);
+    reason_details(ss);
+    frame_details(ss);
+    bytecode_details(ss, method);
+    handler_details(ss, method);
+    stackmap_details(ss, method);
+  }
+}
+
+void ErrorContext::reason_details(outputStream* ss) const {
+  streamIndentor si(ss);
+  ss->indent().print_cr("Reason:");
+  streamIndentor si2(ss);
+  ss->indent().print("");
+  switch (_fault) {
+    case INVALID_BYTECODE:
+      ss->print("Error exists in the bytecode");
+      break;
+    case WRONG_TYPE:
+      if (_expected.is_valid()) {
+        ss->print("Type ");
+        _type.details(ss);
+        ss->print(" is not assignable to ");
+        _expected.details(ss);
+      } else {
+        ss->print("Invalid type: ");
+        _type.details(ss);
+      }
+      break;
+    case FLAGS_MISMATCH:
+      if (_expected.is_valid()) {
+        ss->print("Current frame's flags are not assignable "
+                  "to stack map frame's.");
+      } else {
+        ss->print("Current frame's flags are invalid in this context.");
+      }
+      break;
+    case BAD_CP_INDEX:
+      ss->print("Constant pool index %d is invalid", _type.index());
+      break;
+    case BAD_LOCAL_INDEX:
+      ss->print("Local index %d is invalid", _type.index());
+      break;
+    case LOCALS_SIZE_MISMATCH:
+      ss->print("Current frame's local size doesn't match stackmap.");
+      break;
+    case STACK_SIZE_MISMATCH:
+      ss->print("Current frame's stack size doesn't match stackmap.");
+      break;
+    case STACK_OVERFLOW:
+      ss->print("Exceeded max stack size.");
+      break;
+    case STACK_UNDERFLOW:
+      ss->print("Attempt to pop empty stack.");
+      break;
+    case MISSING_STACKMAP:
+      ss->print("Expected stackmap frame at this location.");
+      break;
+    case BAD_STACKMAP:
+      ss->print("Invalid stackmap specification.");
+      break;
+    case UNKNOWN:
+    default:
+      ShouldNotReachHere();
+      ss->print_cr("Unknown");
+  }
+  ss->print_cr("");
+}
+
+void ErrorContext::location_details(outputStream* ss, methodOop method) const {
+  if (_bci != -1 && method != NULL) {
+    streamIndentor si(ss);
+    const char* bytecode_name = "<invalid>";
+    if (method->validate_bci_from_bcx(_bci) != -1) {
+      Bytecodes::Code code = Bytecodes::code_or_bp_at(method->bcp_from(_bci));
+      if (Bytecodes::is_defined(code)) {
+          bytecode_name = Bytecodes::name(code);
+      } else {
+          bytecode_name = "<illegal>";
+      }
+    }
+    instanceKlass* ik = instanceKlass::cast(method->method_holder());
+    ss->indent().print_cr("Location:");
+    streamIndentor si2(ss);
+    ss->indent().print_cr("%s.%s%s @%d: %s",
+        ik->name()->as_C_string(), method->name()->as_C_string(),
+        method->signature()->as_C_string(), _bci, bytecode_name);
+  }
+}
+
+void ErrorContext::frame_details(outputStream* ss) const {
+  streamIndentor si(ss);
+  if (_type.is_valid() && _type.frame() != NULL) {
+    ss->indent().print_cr("Current Frame:");
+    streamIndentor si2(ss);
+    _type.frame()->print_on(ss);
+  }
+  if (_expected.is_valid() && _expected.frame() != NULL) {
+    ss->indent().print_cr("Stackmap Frame:");
+    streamIndentor si2(ss);
+    _expected.frame()->print_on(ss);
+  }
+}
+
+void ErrorContext::bytecode_details(outputStream* ss, methodOop method) const {
+  if (method != NULL) {
+    streamIndentor si(ss);
+    ss->indent().print_cr("Bytecode:");
+    streamIndentor si2(ss);
+    ss->print_data(method->code_base(), method->code_size(), false);
+  }
+}
+
+void ErrorContext::handler_details(outputStream* ss, methodOop method) const {
+  if (method != NULL) {
+    streamIndentor si(ss);
+    ExceptionTable table(method);
+    if (table.length() > 0) {
+      ss->indent().print_cr("Exception Handler Table:");
+      streamIndentor si2(ss);
+      for (int i = 0; i < table.length(); ++i) {
+        ss->indent().print_cr("bci [%d, %d] => handler: %d", table.start_pc(i),
+            table.end_pc(i), table.handler_pc(i));
+      }
+    }
+  }
+}
+
+void ErrorContext::stackmap_details(outputStream* ss, methodOop method) const {
+  if (method != NULL && method->has_stackmap_table()) {
+    streamIndentor si(ss);
+    ss->indent().print_cr("Stackmap Table:");
+    typeArrayOop data = method->stackmap_data();
+    stack_map_table* sm_table =
+        stack_map_table::at((address)data->byte_at_addr(0));
+    stack_map_frame* sm_frame = sm_table->entries();
+    streamIndentor si2(ss);
+    int current_offset = -1;
+    for (u2 i = 0; i < sm_table->number_of_entries(); ++i) {
+      ss->indent();
+      sm_frame->print_on(ss, current_offset);
+      ss->print_cr("");
+      current_offset += sm_frame->offset_delta();
+      sm_frame = sm_frame->next();
+    }
+  }
+}
+
 // Methods in ClassVerifier
 
-bool ClassVerifier::_verify_verbose = false;
-
 ClassVerifier::ClassVerifier(
-    instanceKlassHandle klass, char* msg, size_t msg_len, TRAPS)
-    : _thread(THREAD), _exception_type(NULL), _message(msg),
-      _message_buffer_len(msg_len), _klass(klass) {
+    instanceKlassHandle klass, TRAPS)
+    : _thread(THREAD), _exception_type(NULL), _message(NULL), _klass(klass) {
   _this_type = VerificationType::reference_type(klass->name());
   // Create list to hold symbols in reference area.
   _symbols = new GrowableArray<Symbol*>(100, 0, NULL);
@@ -290,8 +535,14 @@
   return VerificationType::reference_type(vmSymbols::java_lang_Object());
 }
 
+TypeOrigin ClassVerifier::ref_ctx(const char* sig, TRAPS) {
+  VerificationType vt = VerificationType::reference_type(
+      create_temporary_symbol(sig, (int)strlen(sig), THREAD));
+  return TypeOrigin::implicit(vt);
+}
+
 void ClassVerifier::verify_class(TRAPS) {
-  if (_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying class %s with new format",
       _klass->external_name());
   }
@@ -312,7 +563,7 @@
     verify_method(methodHandle(THREAD, m), CHECK_VERIFY(this));
   }
 
-  if (_verify_verbose || TraceClassInitialization) {
+  if (VerboseVerification || TraceClassInitialization) {
     if (was_recursively_verified())
       tty->print_cr("Recursive verification detected for: %s",
           _klass->external_name());
@@ -321,13 +572,13 @@
 
 void ClassVerifier::verify_method(methodHandle m, TRAPS) {
   _method = m;   // initialize _method
-  if (_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying method %s", m->name_and_sig_as_C_string());
   }
 
   const char* bad_type_msg = "Bad type on operand stack in %s";
 
-  int32_t max_stack = m->max_stack();
+  int32_t max_stack = m->verifier_max_stack();
   int32_t max_locals = m->max_locals();
   constantPoolHandle cp(THREAD, m->constants());
 
@@ -368,8 +619,8 @@
   StackMapTable stackmap_table(&reader, &current_frame, max_locals, max_stack,
                                code_data, code_length, CHECK_VERIFY(this));
 
-  if (_verify_verbose) {
-    stackmap_table.print();
+  if (VerboseVerification) {
+    stackmap_table.print_on(tty);
   }
 
   RawBytecodeStream bcs(m);
@@ -388,6 +639,7 @@
 
     // Set current frame's offset to bci
     current_frame.set_offset(bci);
+    current_frame.set_mark();
 
     // Make sure every offset in stackmap table point to the beginning to
     // an instruction. Match current_frame to stackmap_table entry with
@@ -396,6 +648,7 @@
       stackmap_index, bci, &current_frame, &stackmap_table,
       no_control_flow, CHECK_VERIFY(this));
 
+
     bool this_uninit = false;  // Set to true when invokespecial <init> initialized 'this'
 
     // Merge with the next instruction
@@ -406,8 +659,8 @@
       VerificationType atype;
 
 #ifndef PRODUCT
-      if (_verify_verbose) {
-        current_frame.print();
+      if (VerboseVerification) {
+        current_frame.print_on(tty);
         tty->print_cr("offset = %d,  opcode = %s", bci, Bytecodes::name(opcode));
       }
 #endif
@@ -420,7 +673,10 @@
             opcode != Bytecodes::_lstore && opcode != Bytecodes::_fload  &&
             opcode != Bytecodes::_dload  && opcode != Bytecodes::_fstore &&
             opcode != Bytecodes::_dstore) {
-          verify_error(bci, "Bad wide instruction");
+          /* Unreachable?  RawBytecodeStream's raw_next() returns 'illegal'
+           * if we encounter a wide instruction that modifies an invalid
+           * opcode (not one of the ones listed above) */
+          verify_error(ErrorContext::bad_code(bci), "Bad wide instruction");
           return;
         }
       }
@@ -532,7 +788,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_int_array()) {
-            verify_error(bci, bad_type_msg, "iaload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[I", THREAD)),
+                bad_type_msg, "iaload");
             return;
           }
           current_frame.push_stack(
@@ -544,7 +802,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_bool_array() && !atype.is_byte_array()) {
-            verify_error(bci, bad_type_msg, "baload");
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "baload");
             return;
           }
           current_frame.push_stack(
@@ -556,7 +816,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_char_array()) {
-            verify_error(bci, bad_type_msg, "caload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[C", THREAD)),
+                bad_type_msg, "caload");
             return;
           }
           current_frame.push_stack(
@@ -568,7 +830,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_short_array()) {
-            verify_error(bci, bad_type_msg, "saload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[S", THREAD)),
+                bad_type_msg, "saload");
             return;
           }
           current_frame.push_stack(
@@ -580,7 +844,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_long_array()) {
-            verify_error(bci, bad_type_msg, "laload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[J", THREAD)),
+                bad_type_msg, "laload");
             return;
           }
           current_frame.push_stack_2(
@@ -593,7 +859,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_float_array()) {
-            verify_error(bci, bad_type_msg, "faload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[F", THREAD)),
+                bad_type_msg, "faload");
             return;
           }
           current_frame.push_stack(
@@ -605,7 +873,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_double_array()) {
-            verify_error(bci, bad_type_msg, "daload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[D", THREAD)),
+                bad_type_msg, "daload");
             return;
           }
           current_frame.push_stack_2(
@@ -618,7 +888,10 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_reference_array()) {
-            verify_error(bci, bad_type_msg, "aaload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(),
+                TypeOrigin::implicit(VerificationType::reference_check())),
+                bad_type_msg, "aaload");
             return;
           }
           if (atype.is_null()) {
@@ -689,7 +962,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_int_array()) {
-            verify_error(bci, bad_type_msg, "iastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[I", THREAD)),
+                bad_type_msg, "iastore");
             return;
           }
           no_control_flow = false; break;
@@ -701,7 +976,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_bool_array() && !atype.is_byte_array()) {
-            verify_error(bci, bad_type_msg, "bastore");
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "bastore");
             return;
           }
           no_control_flow = false; break;
@@ -713,7 +990,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_char_array()) {
-            verify_error(bci, bad_type_msg, "castore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[C", THREAD)),
+                bad_type_msg, "castore");
             return;
           }
           no_control_flow = false; break;
@@ -725,7 +1004,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_short_array()) {
-            verify_error(bci, bad_type_msg, "sastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[S", THREAD)),
+                bad_type_msg, "sastore");
             return;
           }
           no_control_flow = false; break;
@@ -738,7 +1019,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_long_array()) {
-            verify_error(bci, bad_type_msg, "lastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[J", THREAD)),
+                bad_type_msg, "lastore");
             return;
           }
           no_control_flow = false; break;
@@ -750,7 +1033,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_float_array()) {
-            verify_error(bci, bad_type_msg, "fastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[F", THREAD)),
+                bad_type_msg, "fastore");
             return;
           }
           no_control_flow = false; break;
@@ -763,7 +1048,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_double_array()) {
-            verify_error(bci, bad_type_msg, "dastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[D", THREAD)),
+                bad_type_msg, "dastore");
             return;
           }
           no_control_flow = false; break;
@@ -775,7 +1062,10 @@
             VerificationType::reference_check(), CHECK_VERIFY(this));
           // more type-checking is done at runtime
           if (!atype.is_reference_array()) {
-            verify_error(bci, bad_type_msg, "aastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(),
+                TypeOrigin::implicit(VerificationType::reference_check())),
+                bad_type_msg, "aastore");
             return;
           }
           // 4938384: relaxed constraint in JVMS 3nd edition.
@@ -793,7 +1083,11 @@
             current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "pop2");
+            /* Unreachable? Would need a category2_1st on TOS
+             * which does not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "pop2");
             return;
           }
           no_control_flow = false; break;
@@ -825,7 +1119,10 @@
             type3 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup_x2");
+            /* Unreachable? Would need a category2_1st at stack depth 2 with
+             * a category1 on TOS which does not appear possible. */
+            verify_error(ErrorContext::bad_type(
+                bci, current_frame.stack_top_ctx()), bad_type_msg, "dup_x2");
             return;
           }
           current_frame.push_stack(type, CHECK_VERIFY(this));
@@ -843,7 +1140,11 @@
             type2 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2");
+            /* Unreachable?  Would need a category2_1st on TOS which does not
+             * appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2");
             return;
           }
           current_frame.push_stack(type2, CHECK_VERIFY(this));
@@ -858,11 +1159,15 @@
           if (type.is_category1()) {
             type2 = current_frame.pop_stack(
               VerificationType::category1_check(), CHECK_VERIFY(this));
-          } else if(type.is_category2_2nd()) {
-            type2 = current_frame.pop_stack
-              (VerificationType::category2_check(), CHECK_VERIFY(this));
+          } else if (type.is_category2_2nd()) {
+            type2 = current_frame.pop_stack(
+              VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x1");
+            /* Unreachable?  Would need a category2_1st on TOS which does
+             * not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x1");
             return;
           }
           type3 = current_frame.pop_stack(
@@ -885,7 +1190,11 @@
             type2 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x2");
+            /* Unreachable?  Would need a category2_1st on TOS which does
+             * not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x2");
             return;
           }
           type3 = current_frame.pop_stack(CHECK_VERIFY(this));
@@ -896,7 +1205,12 @@
             type4 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x2");
+            /* Unreachable?  Would need a category2_1st on TOS after popping
+             * a long/double or two category 1's, which does not
+             * appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x2");
             return;
           }
           current_frame.push_stack(type2, CHECK_VERIFY(this));
@@ -1176,43 +1490,50 @@
         case Bytecodes::_ireturn :
           type = current_frame.pop_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_lreturn :
           type2 = current_frame.pop_stack(
             VerificationType::long2_type(), CHECK_VERIFY(this));
           type = current_frame.pop_stack(
             VerificationType::long_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_freturn :
           type = current_frame.pop_stack(
             VerificationType::float_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_dreturn :
           type2 = current_frame.pop_stack(
             VerificationType::double2_type(),  CHECK_VERIFY(this));
           type = current_frame.pop_stack(
             VerificationType::double_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_areturn :
           type = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_return :
           if (return_type != VerificationType::bogus_type()) {
-            verify_error(bci, "Method expects no return value");
+            verify_error(ErrorContext::bad_code(bci),
+                         "Method expects a return value");
             return;
           }
           // Make sure "this" has been initialized if current method is an
           // <init>
           if (_method->name() == vmSymbols::object_initializer_name() &&
               current_frame.flag_this_uninit()) {
-            verify_error(bci,
-              "Constructor must call super() or this() before return");
+            verify_error(ErrorContext::bad_code(bci),
+                         "Constructor must call super() or this() "
+                         "before return");
             return;
           }
           no_control_flow = true; break;
@@ -1239,11 +1560,13 @@
         case Bytecodes::_new :
         {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           VerificationType new_class_type =
             cp_index_to_type(index, cp, CHECK_VERIFY(this));
           if (!new_class_type.is_object()) {
-            verify_error(bci, "Illegal new instruction");
+            verify_error(ErrorContext::bad_type(bci,
+                TypeOrigin::cp(index, new_class_type)),
+                "Illegal new instruction");
             return;
           }
           type = VerificationType::uninitialized_type(bci);
@@ -1258,13 +1581,15 @@
           no_control_flow = false; break;
         case Bytecodes::_anewarray :
           verify_anewarray(
-            bcs.get_index_u2(), cp, &current_frame, CHECK_VERIFY(this));
+            bci, bcs.get_index_u2(), cp, &current_frame, CHECK_VERIFY(this));
           no_control_flow = false; break;
         case Bytecodes::_arraylength :
           type = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!(type.is_null() || type.is_array())) {
-            verify_error(bci, bad_type_msg, "arraylength");
+            verify_error(ErrorContext::bad_type(
+                bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "arraylength");
           }
           current_frame.push_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
@@ -1272,7 +1597,7 @@
         case Bytecodes::_checkcast :
         {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           current_frame.pop_stack(object_type(), CHECK_VERIFY(this));
           VerificationType klass_type = cp_index_to_type(
             index, cp, CHECK_VERIFY(this));
@@ -1281,7 +1606,7 @@
         }
         case Bytecodes::_instanceof : {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           current_frame.pop_stack(object_type(), CHECK_VERIFY(this));
           current_frame.push_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
@@ -1296,17 +1621,18 @@
         {
           index = bcs.get_index_u2();
           u2 dim = *(bcs.bcp()+3);
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           VerificationType new_array_type =
             cp_index_to_type(index, cp, CHECK_VERIFY(this));
           if (!new_array_type.is_array()) {
-            verify_error(bci,
-              "Illegal constant pool index in multianewarray instruction");
+            verify_error(ErrorContext::bad_type(bci,
+                TypeOrigin::cp(index, new_array_type)),
+                "Illegal constant pool index in multianewarray instruction");
             return;
           }
           if (dim < 1 || new_array_type.dimensions() < dim) {
-            verify_error(bci,
-              "Illegal dimension in multianewarray instruction");
+            verify_error(ErrorContext::bad_code(bci),
+                "Illegal dimension in multianewarray instruction: %d", dim);
             return;
           }
           for (int i = 0; i < dim; i++) {
@@ -1324,7 +1650,8 @@
         default:
           // We only need to check the valid bytecodes in class file.
           // And jsr and ret are not in the new class file format in JDK1.5.
-          verify_error(bci, "Bad instruction");
+          verify_error(ErrorContext::bad_code(bci),
+              "Bad instruction: %02x", opcode);
           no_control_flow = false;
           return;
       }  // end switch
@@ -1340,7 +1667,8 @@
 
   // Make sure that control flow does not fall through end of the method
   if (!no_control_flow) {
-    verify_error(code_length, "Control flow falls through code end");
+    verify_error(ErrorContext::bad_code(code_length),
+        "Control flow falls through code end");
     return;
   }
 }
@@ -1359,7 +1687,7 @@
         code_data[bci] = BYTECODE_OFFSET;
       }
     } else {
-      verify_error(bcs.bci(), "Bad instruction");
+      verify_error(ErrorContext::bad_code(bcs.bci()), "Bad instruction");
       return NULL;
     }
   }
@@ -1368,47 +1696,50 @@
 }
 
 void ClassVerifier::verify_exception_handler_table(u4 code_length, char* code_data, int& min, int& max, TRAPS) {
-  typeArrayHandle exhandlers (THREAD, _method->exception_table());
+  ExceptionTable exhandlers(_method());
+  int exlength = exhandlers.length();
   constantPoolHandle cp (THREAD, _method->constants());
 
-  if (exhandlers() != NULL) {
-    for(int i = 0; i < exhandlers->length();) {
-      u2 start_pc = exhandlers->int_at(i++);
-      u2 end_pc = exhandlers->int_at(i++);
-      u2 handler_pc = exhandlers->int_at(i++);
-      if (start_pc >= code_length || code_data[start_pc] == 0) {
-        class_format_error("Illegal exception table start_pc %d", start_pc);
+  for(int i = 0; i < exlength; i++) {
+    //reacquire the table in case a GC happened
+    ExceptionTable exhandlers(_method());
+    u2 start_pc = exhandlers.start_pc(i);
+    u2 end_pc = exhandlers.end_pc(i);
+    u2 handler_pc = exhandlers.handler_pc(i);
+    if (start_pc >= code_length || code_data[start_pc] == 0) {
+      class_format_error("Illegal exception table start_pc %d", start_pc);
+      return;
+    }
+    if (end_pc != code_length) {   // special case: end_pc == code_length
+      if (end_pc > code_length || code_data[end_pc] == 0) {
+        class_format_error("Illegal exception table end_pc %d", end_pc);
         return;
       }
-      if (end_pc != code_length) {   // special case: end_pc == code_length
-        if (end_pc > code_length || code_data[end_pc] == 0) {
-          class_format_error("Illegal exception table end_pc %d", end_pc);
-          return;
-        }
-      }
-      if (handler_pc >= code_length || code_data[handler_pc] == 0) {
-        class_format_error("Illegal exception table handler_pc %d", handler_pc);
+    }
+    if (handler_pc >= code_length || code_data[handler_pc] == 0) {
+      class_format_error("Illegal exception table handler_pc %d", handler_pc);
+      return;
+    }
+    int catch_type_index = exhandlers.catch_type_index(i);
+    if (catch_type_index != 0) {
+      VerificationType catch_type = cp_index_to_type(
+        catch_type_index, cp, CHECK_VERIFY(this));
+      VerificationType throwable =
+        VerificationType::reference_type(vmSymbols::java_lang_Throwable());
+      bool is_subclass = throwable.is_assignable_from(
+        catch_type, this, CHECK_VERIFY(this));
+      if (!is_subclass) {
+        // 4286534: should throw VerifyError according to recent spec change
+        verify_error(ErrorContext::bad_type(handler_pc,
+            TypeOrigin::cp(catch_type_index, catch_type),
+            TypeOrigin::implicit(throwable)),
+            "Catch type is not a subclass "
+            "of Throwable in exception handler %d", handler_pc);
         return;
       }
-      int catch_type_index = exhandlers->int_at(i++);
-      if (catch_type_index != 0) {
-        VerificationType catch_type = cp_index_to_type(
-          catch_type_index, cp, CHECK_VERIFY(this));
-        VerificationType throwable =
-          VerificationType::reference_type(vmSymbols::java_lang_Throwable());
-        bool is_subclass = throwable.is_assignable_from(
-          catch_type, this, CHECK_VERIFY(this));
-        if (!is_subclass) {
-          // 4286534: should throw VerifyError according to recent spec change
-          verify_error(
-            "Catch type is not a subclass of Throwable in handler %d",
-            handler_pc);
-          return;
-        }
-      }
-      if (start_pc < min) min = start_pc;
-      if (end_pc > max) max = end_pc;
     }
+    if (start_pc < min) min = start_pc;
+    if (end_pc > max) max = end_pc;
   }
 }
 
@@ -1443,19 +1774,21 @@
   if (stackmap_index < stackmap_table->get_frame_count()) {
     u2 this_offset = stackmap_table->get_offset(stackmap_index);
     if (no_control_flow && this_offset > bci) {
-      verify_error(bci, "Expecting a stack map frame");
+      verify_error(ErrorContext::missing_stackmap(bci),
+                   "Expecting a stack map frame");
       return 0;
     }
     if (this_offset == bci) {
+      ErrorContext ctx;
       // See if current stack map can be assigned to the frame in table.
       // current_frame is the stackmap frame got from the last instruction.
       // If matched, current_frame will be updated by this method.
-      bool match = stackmap_table->match_stackmap(
+      bool matches = stackmap_table->match_stackmap(
         current_frame, this_offset, stackmap_index,
-        !no_control_flow, true, CHECK_VERIFY_(this, 0));
-      if (!match) {
+        !no_control_flow, true, &ctx, CHECK_VERIFY_(this, 0));
+      if (!matches) {
         // report type error
-        verify_error(bci, "Instruction type does not match stack map");
+        verify_error(ctx, "Instruction type does not match stack map");
         return 0;
       }
       stackmap_index++;
@@ -1465,7 +1798,7 @@
       return 0;
     }
   } else if (no_control_flow) {
-    verify_error(bci, "Expecting a stack map frame");
+    verify_error(ErrorContext::bad_code(bci), "Expecting a stack map frame");
     return 0;
   }
   return stackmap_index;
@@ -1474,51 +1807,54 @@
 void ClassVerifier::verify_exception_handler_targets(u2 bci, bool this_uninit, StackMapFrame* current_frame,
                                                      StackMapTable* stackmap_table, TRAPS) {
   constantPoolHandle cp (THREAD, _method->constants());
-  typeArrayHandle exhandlers (THREAD, _method->exception_table());
-  if (exhandlers() != NULL) {
-    for(int i = 0; i < exhandlers->length();) {
-      u2 start_pc = exhandlers->int_at(i++);
-      u2 end_pc = exhandlers->int_at(i++);
-      u2 handler_pc = exhandlers->int_at(i++);
-      int catch_type_index = exhandlers->int_at(i++);
-      if(bci >= start_pc && bci < end_pc) {
-        u1 flags = current_frame->flags();
-        if (this_uninit) {  flags |= FLAG_THIS_UNINIT; }
-        StackMapFrame* new_frame = current_frame->frame_in_exception_handler(flags);
-        if (catch_type_index != 0) {
-          // We know that this index refers to a subclass of Throwable
-          VerificationType catch_type = cp_index_to_type(
-            catch_type_index, cp, CHECK_VERIFY(this));
-          new_frame->push_stack(catch_type, CHECK_VERIFY(this));
-        } else {
-          VerificationType throwable =
-            VerificationType::reference_type(vmSymbols::java_lang_Throwable());
-          new_frame->push_stack(throwable, CHECK_VERIFY(this));
-        }
-        bool match = stackmap_table->match_stackmap(
-          new_frame, handler_pc, true, false, CHECK_VERIFY(this));
-        if (!match) {
-          verify_error(bci,
-            "Stack map does not match the one at exception handler %d",
-            handler_pc);
-          return;
-        }
+  ExceptionTable exhandlers(_method());
+  int exlength = exhandlers.length();
+  for(int i = 0; i < exlength; i++) {
+    //reacquire the table in case a GC happened
+    ExceptionTable exhandlers(_method());
+    u2 start_pc = exhandlers.start_pc(i);
+    u2 end_pc = exhandlers.end_pc(i);
+    u2 handler_pc = exhandlers.handler_pc(i);
+    int catch_type_index = exhandlers.catch_type_index(i);
+    if(bci >= start_pc && bci < end_pc) {
+      u1 flags = current_frame->flags();
+      if (this_uninit) {  flags |= FLAG_THIS_UNINIT; }
+      StackMapFrame* new_frame = current_frame->frame_in_exception_handler(flags);
+      if (catch_type_index != 0) {
+        // We know that this index refers to a subclass of Throwable
+        VerificationType catch_type = cp_index_to_type(
+          catch_type_index, cp, CHECK_VERIFY(this));
+        new_frame->push_stack(catch_type, CHECK_VERIFY(this));
+      } else {
+        VerificationType throwable =
+          VerificationType::reference_type(vmSymbols::java_lang_Throwable());
+        new_frame->push_stack(throwable, CHECK_VERIFY(this));
+      }
+      ErrorContext ctx;
+      bool matches = stackmap_table->match_stackmap(
+        new_frame, handler_pc, true, false, &ctx, CHECK_VERIFY(this));
+      if (!matches) {
+        verify_error(ctx, "Stack map does not match the one at "
+            "exception handler %d", handler_pc);
+        return;
       }
     }
   }
 }
 
-void ClassVerifier::verify_cp_index(constantPoolHandle cp, int index, TRAPS) {
+void ClassVerifier::verify_cp_index(
+    u2 bci, constantPoolHandle cp, int index, TRAPS) {
   int nconstants = cp->length();
   if ((index <= 0) || (index >= nconstants)) {
-    verify_error("Illegal constant pool index %d in class %s",
-      index, instanceKlass::cast(cp->pool_holder())->external_name());
+    verify_error(ErrorContext::bad_cp_index(bci, index),
+        "Illegal constant pool index %d in class %s",
+        index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
   }
 }
 
 void ClassVerifier::verify_cp_type(
-    int index, constantPoolHandle cp, unsigned int types, TRAPS) {
+    u2 bci, int index, constantPoolHandle cp, unsigned int types, TRAPS) {
 
   // In some situations, bytecode rewriting may occur while we're verifying.
   // In this case, a constant pool cache exists and some indices refer to that
@@ -1526,10 +1862,10 @@
   // We must check was_recursively_verified() before we get here.
   guarantee(cp->cache() == NULL, "not rewritten yet");
 
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   unsigned int tag = cp->tag_at(index).value();
   if ((types & (1 << tag)) == 0) {
-    verify_error(
+    verify_error(ErrorContext::bad_cp_index(bci, index),
       "Illegal type at constant pool entry %d in class %s",
       index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
@@ -1537,51 +1873,46 @@
 }
 
 void ClassVerifier::verify_cp_class_type(
-    int index, constantPoolHandle cp, TRAPS) {
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+    u2 bci, int index, constantPoolHandle cp, TRAPS) {
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   constantTag tag = cp->tag_at(index);
   if (!tag.is_klass() && !tag.is_unresolved_klass()) {
-    verify_error("Illegal type at constant pool entry %d in class %s",
-      index, instanceKlass::cast(cp->pool_holder())->external_name());
+    verify_error(ErrorContext::bad_cp_index(bci, index),
+        "Illegal type at constant pool entry %d in class %s",
+        index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
   }
 }
 
-void ClassVerifier::format_error_message(
-    const char* fmt, int offset, va_list va) {
-  ResourceMark rm(_thread);
-  stringStream message(_message, _message_buffer_len);
-  message.vprint(fmt, va);
-  if (!_method.is_null()) {
-    message.print(" in method %s", _method->name_and_sig_as_C_string());
-  }
-  if (offset != -1) {
-    message.print(" at offset %d", offset);
-  }
-}
+void ClassVerifier::verify_error(ErrorContext ctx, const char* msg, ...) {
+  stringStream ss;
 
-void ClassVerifier::verify_error(u2 offset, const char* fmt, ...) {
+  ctx.reset_frames();
   _exception_type = vmSymbols::java_lang_VerifyError();
+  _error_context = ctx;
   va_list va;
-  va_start(va, fmt);
-  format_error_message(fmt, offset, va);
+  va_start(va, msg);
+  ss.vprint(msg, va);
   va_end(va);
-}
-
-void ClassVerifier::verify_error(const char* fmt, ...) {
-  _exception_type = vmSymbols::java_lang_VerifyError();
-  va_list va;
-  va_start(va, fmt);
-  format_error_message(fmt, -1, va);
-  va_end(va);
+  _message = ss.as_string();
+#ifdef ASSERT
+  ResourceMark rm;
+  const char* exception_name = _exception_type->as_C_string();
+  Exceptions::debug_check_abort(exception_name, NULL);
+#endif // ndef ASSERT
 }
 
 void ClassVerifier::class_format_error(const char* msg, ...) {
+  stringStream ss;
   _exception_type = vmSymbols::java_lang_ClassFormatError();
   va_list va;
   va_start(va, msg);
-  format_error_message(msg, -1, va);
+  ss.vprint(msg, va);
   va_end(va);
+  if (!_method.is_null()) {
+    ss.print(" in method %s", _method->name_and_sig_as_C_string());
+  }
+  _message = ss.as_string();
 }
 
 klassOop ClassVerifier::load_class(Symbol* name, TRAPS) {
@@ -1617,7 +1948,7 @@
     }
   } else {
     klassOop member_klass = target_instance->find_field(field_name, field_sig, &fd);
-    if(member_klass != NULL && fd.is_protected()) {
+    if (member_klass != NULL && fd.is_protected()) {
       if (!this_class->is_same_class_package(member_klass)) {
         return true;
       }
@@ -1627,9 +1958,9 @@
 }
 
 void ClassVerifier::verify_ldc(
-    int opcode, u2 index, StackMapFrame *current_frame,
-     constantPoolHandle cp, u2 bci, TRAPS) {
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+    int opcode, u2 index, StackMapFrame* current_frame,
+    constantPoolHandle cp, u2 bci, TRAPS) {
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   constantTag tag = cp->tag_at(index);
   unsigned int types;
   if (opcode == Bytecodes::_ldc || opcode == Bytecodes::_ldc_w) {
@@ -1639,12 +1970,12 @@
             | (1 << JVM_CONSTANT_MethodHandle) | (1 << JVM_CONSTANT_MethodType);
       // Note:  The class file parser already verified the legality of
       // MethodHandle and MethodType constants.
-      verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+      verify_cp_type(bci, index, cp, types, CHECK_VERIFY(this));
     }
   } else {
     assert(opcode == Bytecodes::_ldc2_w, "must be ldc2_w");
     types = (1 << JVM_CONSTANT_Double) | (1 << JVM_CONSTANT_Long);
-    verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+    verify_cp_type(bci, index, cp, types, CHECK_VERIFY(this));
   }
   if (tag.is_string() && cp->is_pseudo_string_at(index)) {
     current_frame->push_stack(object_type(), CHECK_VERIFY(this));
@@ -1679,7 +2010,9 @@
       VerificationType::reference_type(
         vmSymbols::java_lang_invoke_MethodType()), CHECK_VERIFY(this));
   } else {
-    verify_error(bci, "Invalid index in ldc");
+    /* Unreachable? verify_cp_type has already validated the cp type. */
+    verify_error(
+        ErrorContext::bad_cp_index(bci, index), "Invalid index in ldc");
     return;
   }
 }
@@ -1695,7 +2028,8 @@
   u2 padding_offset = 1;
   while ((bcp + padding_offset) < aligned_bcp) {
     if(*(bcp + padding_offset) != 0) {
-      verify_error(bci, "Nonzero padding byte in lookswitch or tableswitch");
+      verify_error(ErrorContext::bad_code(bci),
+                   "Nonzero padding byte in lookswitch or tableswitch");
       return;
     }
     padding_offset++;
@@ -1708,20 +2042,21 @@
     jint low = (jint)Bytes::get_Java_u4(aligned_bcp + jintSize);
     jint high = (jint)Bytes::get_Java_u4(aligned_bcp + 2*jintSize);
     if (low > high) {
-      verify_error(bci,
-        "low must be less than or equal to high in tableswitch");
+      verify_error(ErrorContext::bad_code(bci),
+          "low must be less than or equal to high in tableswitch");
       return;
     }
     keys = high - low + 1;
     if (keys < 0) {
-      verify_error(bci, "too many keys in tableswitch");
+      verify_error(ErrorContext::bad_code(bci), "too many keys in tableswitch");
       return;
     }
     delta = 1;
   } else {
     keys = (int)Bytes::get_Java_u4(aligned_bcp + jintSize);
     if (keys < 0) {
-      verify_error(bci, "number of keys in lookupswitch less than 0");
+      verify_error(ErrorContext::bad_code(bci),
+                   "number of keys in lookupswitch less than 0");
       return;
     }
     delta = 2;
@@ -1730,7 +2065,8 @@
       jint this_key = Bytes::get_Java_u4(aligned_bcp + (2+2*i)*jintSize);
       jint next_key = Bytes::get_Java_u4(aligned_bcp + (2+2*i+2)*jintSize);
       if (this_key >= next_key) {
-        verify_error(bci, "Bad lookupswitch instruction");
+        verify_error(ErrorContext::bad_code(bci),
+                     "Bad lookupswitch instruction");
         return;
       }
     }
@@ -1765,7 +2101,8 @@
                                               constantPoolHandle cp,
                                               TRAPS) {
   u2 index = bcs->get_index_u2();
-  verify_cp_type(index, cp, 1 << JVM_CONSTANT_Fieldref, CHECK_VERIFY(this));
+  verify_cp_type(bcs->bci(), index, cp,
+      1 << JVM_CONSTANT_Fieldref, CHECK_VERIFY(this));
 
   // Get field name and signature
   Symbol* field_name = cp->name_ref_at(index);
@@ -1782,9 +2119,11 @@
   VerificationType ref_class_type = cp_ref_index_to_type(
     index, cp, CHECK_VERIFY(this));
   if (!ref_class_type.is_object()) {
-    verify_error(
-      "Expecting reference to class in class %s at constant pool index %d",
-      _klass->external_name(), index);
+    /* Unreachable?  Class file parser verifies Fieldref contents */
+    verify_error(ErrorContext::bad_type(bcs->bci(),
+        TypeOrigin::cp(index, ref_class_type)),
+        "Expecting reference to class in class %s at constant pool index %d",
+        _klass->external_name(), index);
     return;
   }
   VerificationType target_class_type = ref_class_type;
@@ -1842,7 +2181,10 @@
       is_assignable = target_class_type.is_assignable_from(
         stack_object_type, this, CHECK_VERIFY(this));
       if (!is_assignable) {
-        verify_error(bci, "Bad type on operand stack in putfield");
+        verify_error(ErrorContext::bad_type(bci,
+            current_frame->stack_top_ctx(),
+            TypeOrigin::cp(index, target_class_type)),
+            "Bad type on operand stack in putfield");
         return;
       }
     }
@@ -1866,7 +2208,10 @@
         is_assignable = current_type().is_assignable_from(
           stack_object_type, this, CHECK_VERIFY(this));
         if (!is_assignable) {
-          verify_error(bci, "Bad access to protected data in getfield");
+          verify_error(ErrorContext::bad_type(bci,
+              current_frame->stack_top_ctx(),
+              TypeOrigin::implicit(current_type())),
+              "Bad access to protected data in getfield");
           return;
         }
       }
@@ -1877,7 +2222,7 @@
 }
 
 void ClassVerifier::verify_invoke_init(
-    RawBytecodeStream* bcs, VerificationType ref_class_type,
+    RawBytecodeStream* bcs, u2 ref_class_index, VerificationType ref_class_type,
     StackMapFrame* current_frame, u4 code_length, bool *this_uninit,
     constantPoolHandle cp, TRAPS) {
   u2 bci = bcs->bci();
@@ -1888,7 +2233,10 @@
     klassOop superk = current_class()->super();
     if (ref_class_type.name() != current_class()->name() &&
         ref_class_type.name() != superk->klass_part()->name()) {
-      verify_error(bci, "Bad <init> method call");
+      verify_error(ErrorContext::bad_type(bci,
+          TypeOrigin::implicit(ref_class_type),
+          TypeOrigin::implicit(current_type())),
+          "Bad <init> method call");
       return;
     }
     current_frame->initialize_object(type, current_type());
@@ -1897,17 +2245,23 @@
     u2 new_offset = type.bci();
     address new_bcp = bcs->bcp() - bci + new_offset;
     if (new_offset > (code_length - 3) || (*new_bcp) != Bytecodes::_new) {
-      verify_error(new_offset, "Expecting new instruction");
+      /* Unreachable?  Stack map parsing ensures valid type and new
+       * instructions have a valid BCI. */
+      verify_error(ErrorContext::bad_code(new_offset),
+                   "Expecting new instruction");
       return;
     }
     u2 new_class_index = Bytes::get_Java_u2(new_bcp + 1);
-    verify_cp_class_type(new_class_index, cp, CHECK_VERIFY(this));
+    verify_cp_class_type(bci, new_class_index, cp, CHECK_VERIFY(this));
 
     // The method must be an <init> method of the indicated class
     VerificationType new_class_type = cp_index_to_type(
       new_class_index, cp, CHECK_VERIFY(this));
     if (!new_class_type.equals(ref_class_type)) {
-      verify_error(bci, "Call to wrong <init> method");
+      verify_error(ErrorContext::bad_type(bci,
+          TypeOrigin::cp(new_class_index, new_class_type),
+          TypeOrigin::cp(ref_class_index, ref_class_type)),
+          "Call to wrong <init> method");
       return;
     }
     // According to the VM spec, if the referent class is a superclass of the
@@ -1926,14 +2280,18 @@
         bool assignable = current_type().is_assignable_from(
           objectref_type, this, CHECK_VERIFY(this));
         if (!assignable) {
-          verify_error(bci, "Bad access to protected <init> method");
+          verify_error(ErrorContext::bad_type(bci,
+              TypeOrigin::cp(new_class_index, objectref_type),
+              TypeOrigin::implicit(current_type())),
+              "Bad access to protected <init> method");
           return;
         }
       }
     }
     current_frame->initialize_object(type, new_class_type);
   } else {
-    verify_error(bci, "Bad operand type when invoking <init>");
+    verify_error(ErrorContext::bad_type(bci, current_frame->stack_top_ctx()),
+        "Bad operand type when invoking <init>");
     return;
   }
 }
@@ -1950,7 +2308,7 @@
                       : opcode == Bytecodes::_invokedynamic
                                 ? 1 << JVM_CONSTANT_InvokeDynamic
                                 : 1 << JVM_CONSTANT_Methodref);
-  verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+  verify_cp_type(bcs->bci(), index, cp, types, CHECK_VERIFY(this));
 
   // Get method name and signature
   Symbol* method_name = cp->name_ref_at(index);
@@ -2027,11 +2385,13 @@
     // the difference between the size of the operand stack before and after the instruction
     // executes.
     if (*(bcp+3) != (nargs+1)) {
-      verify_error(bci, "Inconsistent args count operand in invokeinterface");
+      verify_error(ErrorContext::bad_code(bci),
+          "Inconsistent args count operand in invokeinterface");
       return;
     }
     if (*(bcp+4) != 0) {
-      verify_error(bci, "Fourth operand byte of invokeinterface must be zero");
+      verify_error(ErrorContext::bad_code(bci),
+          "Fourth operand byte of invokeinterface must be zero");
       return;
     }
   }
@@ -2039,7 +2399,8 @@
   if (opcode == Bytecodes::_invokedynamic) {
     address bcp = bcs->bcp();
     if (*(bcp+3) != 0 || *(bcp+4) != 0) {
-      verify_error(bci, "Third and fourth operand bytes of invokedynamic must be zero");
+      verify_error(ErrorContext::bad_code(bci),
+          "Third and fourth operand bytes of invokedynamic must be zero");
       return;
     }
   }
@@ -2048,7 +2409,8 @@
     // Make sure <init> can only be invoked by invokespecial
     if (opcode != Bytecodes::_invokespecial ||
         method_name != vmSymbols::object_initializer_name()) {
-      verify_error(bci, "Illegal call to internal method");
+      verify_error(ErrorContext::bad_code(bci),
+          "Illegal call to internal method");
       return;
     }
   } else if (opcode == Bytecodes::_invokespecial
@@ -2058,7 +2420,8 @@
     bool subtype = ref_class_type.is_assignable_from(
       current_type(), this, CHECK_VERIFY(this));
     if (!subtype) {
-      verify_error(bci, "Bad invokespecial instruction: "
+      verify_error(ErrorContext::bad_code(bci),
+          "Bad invokespecial instruction: "
           "current class isn't assignable to reference class.");
        return;
     }
@@ -2071,7 +2434,7 @@
   if (opcode != Bytecodes::_invokestatic &&
       opcode != Bytecodes::_invokedynamic) {
     if (method_name == vmSymbols::object_initializer_name()) {  // <init> method
-      verify_invoke_init(bcs, ref_class_type, current_frame,
+      verify_invoke_init(bcs, index, ref_class_type, current_frame,
         code_length, this_uninit, cp, CHECK_VERIFY(this));
     } else {   // other methods
       // Ensures that target class is assignable to method class.
@@ -2101,8 +2464,10 @@
                   // Special case: arrays pretend to implement public Object
                   // clone().
                 } else {
-                  verify_error(bci,
-                    "Bad access to protected data in invokevirtual");
+                  verify_error(ErrorContext::bad_type(bci,
+                      current_frame->stack_top_ctx(),
+                      TypeOrigin::implicit(current_type())),
+                      "Bad access to protected data in invokevirtual");
                   return;
                 }
               }
@@ -2119,7 +2484,10 @@
   if (sig_stream.type() != T_VOID) {
     if (method_name == vmSymbols::object_initializer_name()) {
       // <init> method must have a void return type
-      verify_error(bci, "Return type must be void in <init> method");
+      /* Unreachable?  Class file parser verifies that methods with '<' have
+       * void return */
+      verify_error(ErrorContext::bad_code(bci),
+          "Return type must be void in <init> method");
       return;
     }
     VerificationType return_type[2];
@@ -2137,7 +2505,7 @@
     NULL, NULL, NULL, NULL, "[Z", "[C", "[F", "[D", "[B", "[S", "[I", "[J",
   };
   if (index < T_BOOLEAN || index > T_LONG) {
-    verify_error(bci, "Illegal newarray instruction");
+    verify_error(ErrorContext::bad_code(bci), "Illegal newarray instruction");
     return VerificationType::bogus_type();
   }
 
@@ -2148,8 +2516,9 @@
 }
 
 void ClassVerifier::verify_anewarray(
-    u2 index, constantPoolHandle cp, StackMapFrame* current_frame, TRAPS) {
-  verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+    u2 bci, u2 index, constantPoolHandle cp,
+    StackMapFrame* current_frame, TRAPS) {
+  verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
   current_frame->pop_stack(
     VerificationType::integer_type(), CHECK_VERIFY(this));
 
@@ -2262,14 +2631,19 @@
 }
 
 void ClassVerifier::verify_return_value(
-    VerificationType return_type, VerificationType type, u2 bci, TRAPS) {
+    VerificationType return_type, VerificationType type, u2 bci,
+    StackMapFrame* current_frame, TRAPS) {
   if (return_type == VerificationType::bogus_type()) {
-    verify_error(bci, "Method expects a return value");
+    verify_error(ErrorContext::bad_type(bci,
+        current_frame->stack_top_ctx(), TypeOrigin::signature(return_type)),
+        "Method expects a return value");
     return;
   }
   bool match = return_type.is_assignable_from(type, this, CHECK_VERIFY(this));
   if (!match) {
-    verify_error(bci, "Bad return type");
+    verify_error(ErrorContext::bad_type(bci,
+        current_frame->stack_top_ctx(), TypeOrigin::signature(return_type)),
+        "Bad return type");
     return;
   }
 }
--- a/src/share/vm/classfile/verifier.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/verifier.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -88,18 +88,178 @@
 #define CHECK_VERIFY_(verifier, result) \
   CHECK_(result)); if ((verifier)->has_error()) return (result); (0
 
+class TypeOrigin VALUE_OBJ_CLASS_SPEC {
+ private:
+  typedef enum {
+    CF_LOCALS,  // Comes from the current frame locals
+    CF_STACK,   // Comes from the current frame expression stack
+    SM_LOCALS,  // Comes from stackmap locals
+    SM_STACK,   // Comes from stackmap expression stack
+    CONST_POOL, // Comes from the constant pool
+    SIG,        // Comes from method signature
+    IMPLICIT,   // Comes implicitly from code or context
+    BAD_INDEX,  // No type, but the index is bad
+    FRAME_ONLY, // No type, context just contains the frame
+    NONE
+  } Origin;
+
+  Origin _origin;
+  u2 _index;              // local, stack, or constant pool index
+  StackMapFrame* _frame;  // source frame if CF or SM
+  VerificationType _type; // The actual type
+
+  TypeOrigin(
+      Origin origin, u2 index, StackMapFrame* frame, VerificationType type)
+      : _origin(origin), _index(index), _frame(frame), _type(type) {}
+
+ public:
+  TypeOrigin() : _origin(NONE), _index(0), _frame(NULL) {}
+
+  static TypeOrigin null();
+  static TypeOrigin local(u2 index, StackMapFrame* frame);
+  static TypeOrigin stack(u2 index, StackMapFrame* frame);
+  static TypeOrigin sm_local(u2 index, StackMapFrame* frame);
+  static TypeOrigin sm_stack(u2 index, StackMapFrame* frame);
+  static TypeOrigin cp(u2 index, VerificationType vt);
+  static TypeOrigin signature(VerificationType vt);
+  static TypeOrigin bad_index(u2 index);
+  static TypeOrigin implicit(VerificationType t);
+  static TypeOrigin frame(StackMapFrame* frame);
+
+  void reset_frame();
+  void details(outputStream* ss) const;
+  void print_frame(outputStream* ss) const;
+  const StackMapFrame* frame() const { return _frame; }
+  bool is_valid() const { return _origin != NONE; }
+  u2 index() const { return _index; }
+
+#ifdef ASSERT
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class ErrorContext VALUE_OBJ_CLASS_SPEC {
+ private:
+  typedef enum {
+    INVALID_BYTECODE,     // There was a problem with the bytecode
+    WRONG_TYPE,           // Type value was not as expected
+    FLAGS_MISMATCH,       // Frame flags are not assignable
+    BAD_CP_INDEX,         // Invalid constant pool index
+    BAD_LOCAL_INDEX,      // Invalid local index
+    LOCALS_SIZE_MISMATCH, // Frames have differing local counts
+    STACK_SIZE_MISMATCH,  // Frames have different stack sizes
+    STACK_OVERFLOW,       // Attempt to push onto a full expression stack
+    STACK_UNDERFLOW,      // Attempt to pop and empty expression stack
+    MISSING_STACKMAP,     // No stackmap for this location and there should be
+    BAD_STACKMAP,         // Format error in stackmap
+    NO_FAULT,             // No error
+    UNKNOWN
+  } FaultType;
+
+  int _bci;
+  FaultType _fault;
+  TypeOrigin _type;
+  TypeOrigin _expected;
+
+  ErrorContext(int bci, FaultType fault) :
+      _bci(bci), _fault(fault)  {}
+  ErrorContext(int bci, FaultType fault, TypeOrigin type) :
+      _bci(bci), _fault(fault), _type(type)  {}
+  ErrorContext(int bci, FaultType fault, TypeOrigin type, TypeOrigin exp) :
+      _bci(bci), _fault(fault), _type(type), _expected(exp)  {}
+
+ public:
+  ErrorContext() : _bci(-1), _fault(NO_FAULT) {}
+
+  static ErrorContext bad_code(u2 bci) {
+    return ErrorContext(bci, INVALID_BYTECODE);
+  }
+  static ErrorContext bad_type(u2 bci, TypeOrigin type) {
+    return ErrorContext(bci, WRONG_TYPE, type);
+  }
+  static ErrorContext bad_type(u2 bci, TypeOrigin type, TypeOrigin exp) {
+    return ErrorContext(bci, WRONG_TYPE, type, exp);
+  }
+  static ErrorContext bad_flags(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, FLAGS_MISMATCH, TypeOrigin::frame(frame));
+  }
+  static ErrorContext bad_flags(u2 bci, StackMapFrame* cur, StackMapFrame* sm) {
+    return ErrorContext(bci, FLAGS_MISMATCH,
+                        TypeOrigin::frame(cur), TypeOrigin::frame(sm));
+  }
+  static ErrorContext bad_cp_index(u2 bci, u2 index) {
+    return ErrorContext(bci, BAD_CP_INDEX, TypeOrigin::bad_index(index));
+  }
+  static ErrorContext bad_local_index(u2 bci, u2 index) {
+    return ErrorContext(bci, BAD_LOCAL_INDEX, TypeOrigin::bad_index(index));
+  }
+  static ErrorContext locals_size_mismatch(
+      u2 bci, StackMapFrame* frame0, StackMapFrame* frame1) {
+    return ErrorContext(bci, LOCALS_SIZE_MISMATCH,
+        TypeOrigin::frame(frame0), TypeOrigin::frame(frame1));
+  }
+  static ErrorContext stack_size_mismatch(
+      u2 bci, StackMapFrame* frame0, StackMapFrame* frame1) {
+    return ErrorContext(bci, STACK_SIZE_MISMATCH,
+        TypeOrigin::frame(frame0), TypeOrigin::frame(frame1));
+  }
+  static ErrorContext stack_overflow(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, STACK_OVERFLOW, TypeOrigin::frame(frame));
+  }
+  static ErrorContext stack_underflow(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, STACK_UNDERFLOW, TypeOrigin::frame(frame));
+  }
+  static ErrorContext missing_stackmap(u2 bci) {
+    return ErrorContext(bci, MISSING_STACKMAP);
+  }
+  static ErrorContext bad_stackmap(int index, StackMapFrame* frame) {
+    return ErrorContext(0, BAD_STACKMAP, TypeOrigin::frame(frame));
+  }
+
+  bool is_valid() const { return _fault != NO_FAULT; }
+  int bci() const { return _bci; }
+
+  void reset_frames() {
+    _type.reset_frame();
+    _expected.reset_frame();
+  }
+
+  void details(outputStream* ss, methodOop method) const;
+
+#ifdef ASSERT
+  void print_on(outputStream* str) const {
+    str->print("error_context(%d, %d,", _bci, _fault);
+    _type.print_on(str);
+    str->print(",");
+    _expected.print_on(str);
+    str->print(")");
+  }
+#endif
+
+ private:
+  void location_details(outputStream* ss, methodOop method) const;
+  void reason_details(outputStream* ss) const;
+  void frame_details(outputStream* ss) const;
+  void bytecode_details(outputStream* ss, methodOop method) const;
+  void handler_details(outputStream* ss, methodOop method) const;
+  void stackmap_details(outputStream* ss, methodOop method) const;
+};
+
 // A new instance of this class is created for each class being verified
 class ClassVerifier : public StackObj {
  private:
   Thread* _thread;
+  GrowableArray<Symbol*>* _symbols;  // keep a list of symbols created
+
   Symbol* _exception_type;
   char* _message;
-  size_t _message_buffer_len;
-  GrowableArray<Symbol*>* _symbols;  // keep a list of symbols created
+
+  ErrorContext _error_context;  // contains information about an error
 
   void verify_method(methodHandle method, TRAPS);
   char* generate_code_data(methodHandle m, u4 code_length, TRAPS);
-  void verify_exception_handler_table(u4 code_length, char* code_data, int& min, int& max, TRAPS);
+  void verify_exception_handler_table(u4 code_length, char* code_data,
+                                      int& min, int& max, TRAPS);
   void verify_local_variable_table(u4 code_length, char* code_data, TRAPS);
 
   VerificationType cp_ref_index_to_type(
@@ -111,10 +271,10 @@
     instanceKlassHandle this_class, klassOop target_class,
     Symbol* field_name, Symbol* field_sig, bool is_method);
 
-  void verify_cp_index(constantPoolHandle cp, int index, TRAPS);
-  void verify_cp_type(
-    int index, constantPoolHandle cp, unsigned int types, TRAPS);
-  void verify_cp_class_type(int index, constantPoolHandle cp, TRAPS);
+  void verify_cp_index(u2 bci, constantPoolHandle cp, int index, TRAPS);
+  void verify_cp_type(u2 bci, int index, constantPoolHandle cp,
+      unsigned int types, TRAPS);
+  void verify_cp_class_type(u2 bci, int index, constantPoolHandle cp, TRAPS);
 
   u2 verify_stackmap_table(
     u2 stackmap_index, u2 bci, StackMapFrame* current_frame,
@@ -137,7 +297,7 @@
     constantPoolHandle cp, TRAPS);
 
   void verify_invoke_init(
-    RawBytecodeStream* bcs, VerificationType ref_class_type,
+    RawBytecodeStream* bcs, u2 ref_index, VerificationType ref_class_type,
     StackMapFrame* current_frame, u4 code_length, bool* this_uninit,
     constantPoolHandle cp, TRAPS);
 
@@ -147,10 +307,11 @@
     constantPoolHandle cp, TRAPS);
 
   VerificationType get_newarray_type(u2 index, u2 bci, TRAPS);
-  void verify_anewarray(
-    u2 index, constantPoolHandle cp, StackMapFrame* current_frame, TRAPS);
+  void verify_anewarray(u2 bci, u2 index, constantPoolHandle cp,
+      StackMapFrame* current_frame, TRAPS);
   void verify_return_value(
-    VerificationType return_type, VerificationType type, u2 offset, TRAPS);
+      VerificationType return_type, VerificationType type, u2 offset,
+      StackMapFrame* current_frame, TRAPS);
 
   void verify_iload (u2 index, StackMapFrame* current_frame, TRAPS);
   void verify_lload (u2 index, StackMapFrame* current_frame, TRAPS);
@@ -189,7 +350,7 @@
   };
 
   // constructor
-  ClassVerifier(instanceKlassHandle klass, char* msg, size_t msg_len, TRAPS);
+  ClassVerifier(instanceKlassHandle klass, TRAPS);
 
   // destructor
   ~ClassVerifier();
@@ -207,13 +368,17 @@
   // Return status modes
   Symbol* result() const { return _exception_type; }
   bool has_error() const { return result() != NULL; }
+  char* exception_message() {
+    stringStream ss;
+    ss.print(_message);
+    _error_context.details(&ss, _method());
+    return ss.as_string();
+  }
 
   // Called when verify or class format errors are encountered.
   // May throw an exception based upon the mode.
-  void verify_error(u2 offset, const char* fmt, ...);
-  void verify_error(const char* fmt, ...);
+  void verify_error(ErrorContext ctx, const char* fmt, ...);
   void class_format_error(const char* fmt, ...);
-  void format_error_message(const char* fmt, int offset, va_list args);
 
   klassOop load_class(Symbol* name, TRAPS);
 
@@ -228,10 +393,11 @@
   // their reference counts need to be decrememented when the verifier object
   // goes out of scope.  Since these symbols escape the scope in which they're
   // created, we can't use a TempNewSymbol.
-  Symbol* create_temporary_symbol(const Symbol* s, int begin, int end, TRAPS);
+  Symbol* create_temporary_symbol(
+      const Symbol* s, int begin, int end, TRAPS);
   Symbol* create_temporary_symbol(const char *s, int length, TRAPS);
 
-  static bool _verify_verbose;  // for debugging
+  TypeOrigin ref_ctx(const char* str, TRAPS);
 };
 
 inline int ClassVerifier::change_sig_to_verificationType(
--- a/src/share/vm/classfile/vmSymbols.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/vmSymbols.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -79,7 +79,7 @@
   if (!UseSharedSpaces) {
     const char* string = &vm_symbol_bodies[0];
     for (int index = (int)FIRST_SID; index < (int)SID_LIMIT; index++) {
-      Symbol* sym = SymbolTable::new_symbol(string, CHECK);
+      Symbol* sym = SymbolTable::new_permanent_symbol(string, CHECK);
       _symbols[index] = sym;
       string += strlen(string); // skip string body
       string += 1;              // skip trailing null
@@ -128,7 +128,7 @@
     // Spot-check correspondence between strings, symbols, and enums:
     assert(_symbols[NO_SID] == NULL, "must be");
     const char* str = "java/lang/Object";
-    TempNewSymbol jlo = SymbolTable::new_symbol(str, CHECK);
+    TempNewSymbol jlo = SymbolTable::new_permanent_symbol(str, CHECK);
     assert(strncmp(str, (char*)jlo->base(), jlo->utf8_length()) == 0, "");
     assert(jlo == java_lang_Object(), "");
     SID sid = VM_SYMBOL_ENUM_NAME(java_lang_Object);
@@ -147,7 +147,7 @@
     // The string "format" happens (at the moment) not to be a vmSymbol,
     // though it is a method name in java.lang.String.
     str = "format";
-    TempNewSymbol fmt = SymbolTable::new_symbol(str, CHECK);
+    TempNewSymbol fmt = SymbolTable::new_permanent_symbol(str, CHECK);
     sid = find_sid(fmt);
     assert(sid == NO_SID, "symbol index works (negative test)");
   }
@@ -332,7 +332,14 @@
   if (cname == NULL || mname == NULL || msig == NULL)  return NULL;
   klassOop k = SystemDictionary::find_well_known_klass(cname);
   if (k == NULL)  return NULL;
-  return instanceKlass::cast(k)->find_method(mname, msig);
+  methodOop m = instanceKlass::cast(k)->find_method(mname, msig);
+  if (m == NULL &&
+      cname == vmSymbols::java_lang_invoke_MethodHandle() &&
+      msig == vmSymbols::star_name()) {
+    // Any signature polymorphic method is represented by a fixed concrete signature:
+    m = instanceKlass::cast(k)->find_method(mname, vmSymbols::object_array_object_signature());
+  }
+  return m;
 }
 
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,6 +27,7 @@
 
 #include "oops/symbol.hpp"
 #include "memory/iterator.hpp"
+#include "trace/traceMacros.hpp"
 
 // The class vmSymbols is a name space for fast lookup of
 // symbols commonly used in the VM.
@@ -110,6 +111,10 @@
   template(getBootClassPathEntryForClass_name,        "getBootClassPathEntryForClass")            \
   template(sun_misc_PostVMInitHook,                   "sun/misc/PostVMInitHook")                  \
                                                                                                   \
+  /* Java runtime version access */                                                               \
+  template(sun_misc_Version,                          "sun/misc/Version")                         \
+  template(java_runtime_name_name,                    "java_runtime_name")                        \
+                                                                                                  \
   /* class file format tags */                                                                    \
   template(tag_source_file,                           "SourceFile")                               \
   template(tag_inner_classes,                         "InnerClasses")                             \
@@ -155,6 +160,7 @@
   template(java_lang_NoSuchMethodException,           "java/lang/NoSuchMethodException")          \
   template(java_lang_NullPointerException,            "java/lang/NullPointerException")           \
   template(java_lang_StringIndexOutOfBoundsException, "java/lang/StringIndexOutOfBoundsException")\
+  template(java_lang_UnsupportedOperationException,   "java/lang/UnsupportedOperationException")  \
   template(java_lang_InvalidClassException,           "java/lang/InvalidClassException")          \
   template(java_lang_reflect_InvocationTargetException, "java/lang/reflect/InvocationTargetException") \
   template(java_lang_Exception,                       "java/lang/Exception")                      \
@@ -207,10 +213,12 @@
   template(newField_signature,                        "(Lsun/reflect/FieldInfo;)Ljava/lang/reflect/Field;") \
   template(newMethod_name,                            "newMethod")                                \
   template(newMethod_signature,                       "(Lsun/reflect/MethodInfo;)Ljava/lang/reflect/Method;") \
-  /* the following two names must be in order: */                                                 \
-  template(invokeExact_name,                          "invokeExact")                              \
-  template(invokeGeneric_name,                        "invokeGeneric")                            \
-  template(invokeVarargs_name,                        "invokeVarargs")                            \
+  template(invokeBasic_name,                          "invokeBasic")                              \
+  template(linkToVirtual_name,                        "linkToVirtual")                            \
+  template(linkToStatic_name,                         "linkToStatic")                             \
+  template(linkToSpecial_name,                        "linkToSpecial")                            \
+  template(linkToInterface_name,                      "linkToInterface")                          \
+  template(compiledLambdaForm_name,                   "<compiledLambdaForm>")  /*fake name*/      \
   template(star_name,                                 "*") /*not really a name*/                  \
   template(invoke_name,                               "invoke")                                   \
   template(override_name,                             "override")                                 \
@@ -231,36 +239,33 @@
   template(base_name,                                 "base")                                     \
                                                                                                   \
   /* Support for JSR 292 & invokedynamic (JDK 1.7 and above) */                                   \
-  template(java_lang_invoke_InvokeDynamic,            "java/lang/invoke/InvokeDynamic")           \
-  template(java_lang_invoke_Linkage,                  "java/lang/invoke/Linkage")                 \
   template(java_lang_invoke_CallSite,                 "java/lang/invoke/CallSite")                \
   template(java_lang_invoke_ConstantCallSite,         "java/lang/invoke/ConstantCallSite")        \
   template(java_lang_invoke_MutableCallSite,          "java/lang/invoke/MutableCallSite")         \
   template(java_lang_invoke_VolatileCallSite,         "java/lang/invoke/VolatileCallSite")        \
   template(java_lang_invoke_MethodHandle,             "java/lang/invoke/MethodHandle")            \
   template(java_lang_invoke_MethodType,               "java/lang/invoke/MethodType")              \
-  template(java_lang_invoke_WrongMethodTypeException, "java/lang/invoke/WrongMethodTypeException") \
   template(java_lang_invoke_MethodType_signature,     "Ljava/lang/invoke/MethodType;")            \
+  template(java_lang_invoke_MemberName_signature,     "Ljava/lang/invoke/MemberName;")            \
+  template(java_lang_invoke_LambdaForm_signature,     "Ljava/lang/invoke/LambdaForm;")            \
   template(java_lang_invoke_MethodHandle_signature,   "Ljava/lang/invoke/MethodHandle;")          \
   /* internal classes known only to the JVM: */                                                   \
-  template(java_lang_invoke_MethodTypeForm,           "java/lang/invoke/MethodTypeForm")          \
-  template(java_lang_invoke_MethodTypeForm_signature, "Ljava/lang/invoke/MethodTypeForm;")        \
   template(java_lang_invoke_MemberName,               "java/lang/invoke/MemberName")              \
   template(java_lang_invoke_MethodHandleNatives,      "java/lang/invoke/MethodHandleNatives")     \
-  template(java_lang_invoke_MethodHandleImpl,         "java/lang/invoke/MethodHandleImpl")        \
-  template(java_lang_invoke_AdapterMethodHandle,      "java/lang/invoke/AdapterMethodHandle")     \
-  template(java_lang_invoke_BoundMethodHandle,        "java/lang/invoke/BoundMethodHandle")       \
-  template(java_lang_invoke_DirectMethodHandle,       "java/lang/invoke/DirectMethodHandle")      \
-  template(java_lang_invoke_CountingMethodHandle,     "java/lang/invoke/CountingMethodHandle")    \
+  template(java_lang_invoke_LambdaForm,               "java/lang/invoke/LambdaForm")              \
+  template(java_lang_invoke_ForceInline_signature,    "Ljava/lang/invoke/ForceInline;")           \
+  template(java_lang_invoke_DontInline_signature,     "Ljava/lang/invoke/DontInline;")            \
+  template(java_lang_invoke_LambdaForm_Compiled_signature, "Ljava/lang/invoke/LambdaForm$Compiled;") \
+  template(java_lang_invoke_LambdaForm_Hidden_signature, "Ljava/lang/invoke/LambdaForm$Hidden;")  \
   /* internal up-calls made only by the JVM, via class sun.invoke.MethodHandleNatives: */         \
   template(findMethodHandleType_name,                 "findMethodHandleType")                     \
   template(findMethodHandleType_signature,       "(Ljava/lang/Class;[Ljava/lang/Class;)Ljava/lang/invoke/MethodType;") \
-  template(notifyGenericMethodType_name,              "notifyGenericMethodType")                  \
-  template(notifyGenericMethodType_signature,         "(Ljava/lang/invoke/MethodType;)V")         \
   template(linkMethodHandleConstant_name,             "linkMethodHandleConstant")                 \
   template(linkMethodHandleConstant_signature, "(Ljava/lang/Class;ILjava/lang/Class;Ljava/lang/String;Ljava/lang/Object;)Ljava/lang/invoke/MethodHandle;") \
-  template(makeDynamicCallSite_name,                  "makeDynamicCallSite")                      \
-  template(makeDynamicCallSite_signature, "(Ljava/lang/invoke/MethodHandle;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/Object;Ljava/lang/invoke/MemberName;I)Ljava/lang/invoke/CallSite;") \
+  template(linkMethod_name,                           "linkMethod")                               \
+  template(linkMethod_signature, "(Ljava/lang/Class;ILjava/lang/Class;Ljava/lang/String;Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/invoke/MemberName;") \
+  template(linkCallSite_name,                         "linkCallSite")                             \
+  template(linkCallSite_signature, "(Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/invoke/MemberName;") \
   template(setTargetNormal_name,                      "setTargetNormal")                          \
   template(setTargetVolatile_name,                    "setTargetVolatile")                        \
   template(setTarget_signature,                       "(Ljava/lang/invoke/MethodHandle;)V")       \
@@ -353,22 +358,15 @@
   template(toString_name,                             "toString")                                 \
   template(values_name,                               "values")                                   \
   template(receiver_name,                             "receiver")                                 \
-  template(vmmethod_name,                             "vmmethod")                                 \
   template(vmtarget_name,                             "vmtarget")                                 \
-  template(vmentry_name,                              "vmentry")                                  \
+  template(vmindex_name,                              "vmindex")                                  \
   template(vmcount_name,                              "vmcount")                                  \
-  template(vmslots_name,                              "vmslots")                                  \
-  template(vmlayout_name,                             "vmlayout")                                 \
-  template(vmindex_name,                              "vmindex")                                  \
-  template(vmargslot_name,                            "vmargslot")                                \
+  template(vmentry_name,                              "vmentry")                                  \
   template(flags_name,                                "flags")                                    \
-  template(argument_name,                             "argument")                                 \
-  template(conversion_name,                           "conversion")                               \
   template(rtype_name,                                "rtype")                                    \
   template(ptypes_name,                               "ptypes")                                   \
   template(form_name,                                 "form")                                     \
-  template(erasedType_name,                           "erasedType")                               \
-  template(genericInvoker_name,                       "genericInvoker")                           \
+  template(basicType_name,                            "basicType")                                \
   template(append_name,                               "append")                                   \
   template(klass_name,                                "klass")                                    \
   template(resolved_constructor_name,                 "resolved_constructor")                     \
@@ -427,6 +425,7 @@
   template(throwable_throwable_signature,             "(Ljava/lang/Throwable;)Ljava/lang/Throwable;")             \
   template(class_void_signature,                      "(Ljava/lang/Class;)V")                     \
   template(class_int_signature,                       "(Ljava/lang/Class;)I")                     \
+  template(class_long_signature,                      "(Ljava/lang/Class;)J")                     \
   template(class_boolean_signature,                   "(Ljava/lang/Class;)Z")                     \
   template(throwable_string_void_signature,           "(Ljava/lang/Throwable;Ljava/lang/String;)V")               \
   template(string_array_void_signature,               "([Ljava/lang/String;)V")                                   \
@@ -542,10 +541,12 @@
   template(serializePropertiesToByteArray_signature,   "()[B")                                                    \
   template(serializeAgentPropertiesToByteArray_name,   "serializeAgentPropertiesToByteArray")                     \
   template(classRedefinedCount_name,                   "classRedefinedCount")                                     \
+                                                                                                                  \
+  /* trace signatures */                                                                                          \
+  TRACE_TEMPLATES(template)                                                                                       \
+                                                                                                                  \
   /*end*/
 
-
-
 // Here are all the intrinsics known to the runtime and the CI.
 // Each intrinsic consists of a public enum name (like _hashCode),
 // followed by a specification of its klass, name, and signature:
@@ -651,6 +652,8 @@
   do_intrinsic(_nanoTime,                 java_lang_System,       nanoTime_name,          void_long_signature,   F_S)   \
    do_name(     nanoTime_name,                                   "nanoTime")                                            \
                                                                                                                         \
+  TRACE_INTRINSICS(do_intrinsic, do_class, do_name, do_signature, do_alias)                                             \
+                                                                                                                        \
   do_intrinsic(_arraycopy,                java_lang_System,       arraycopy_name, arraycopy_signature,           F_S)   \
    do_name(     arraycopy_name,                                  "arraycopy")                                           \
    do_signature(arraycopy_signature,                             "(Ljava/lang/Object;ILjava/lang/Object;II)V")          \
@@ -716,15 +719,6 @@
   /* java/lang/ref/Reference */                                                                                         \
   do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
                                                                                                                         \
-                                                                                                                        \
-  do_class(sun_misc_AtomicLongCSImpl,     "sun/misc/AtomicLongCSImpl")                                                  \
-  do_intrinsic(_get_AtomicLong,           sun_misc_AtomicLongCSImpl, get_name, void_long_signature,              F_R)   \
-  /*   (symbols get_name and void_long_signature defined above) */                                                      \
-                                                                                                                        \
-  do_intrinsic(_attemptUpdate,            sun_misc_AtomicLongCSImpl, attemptUpdate_name, attemptUpdate_signature, F_R)  \
-   do_name(     attemptUpdate_name,                                 "attemptUpdate")                                    \
-   do_signature(attemptUpdate_signature,                            "(JJ)Z")                                            \
-                                                                                                                        \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                         \
@@ -920,15 +914,15 @@
                                                                                                                           \
   do_intrinsic(_invoke,                   java_lang_reflect_Method, invoke_name, object_object_array_object_signature, F_R) \
   /*   (symbols invoke_name and invoke_signature defined above) */                                                      \
-  do_intrinsic(_checkSpreadArgument,      java_lang_invoke_MethodHandleNatives, checkSpreadArgument_name, checkSpreadArgument_signature, F_S) \
-   do_name(    checkSpreadArgument_name,       "checkSpreadArgument")                                                   \
-   do_name(    checkSpreadArgument_signature,  "(Ljava/lang/Object;I)V")                                                \
-  do_intrinsic(_invokeExact,              java_lang_invoke_MethodHandle, invokeExact_name,   object_array_object_signature, F_RN) \
-  do_intrinsic(_invokeGeneric,            java_lang_invoke_MethodHandle, invokeGeneric_name, object_array_object_signature, F_RN) \
-  do_intrinsic(_invokeVarargs,            java_lang_invoke_MethodHandle, invokeVarargs_name, object_array_object_signature, F_R)  \
-  do_intrinsic(_invokeDynamic,            java_lang_invoke_InvokeDynamic, star_name,         object_array_object_signature, F_SN) \
-                                                                                                                        \
-  do_intrinsic(_selectAlternative,        java_lang_invoke_MethodHandleImpl, selectAlternative_name, selectAlternative_signature, F_S)  \
+  /* the polymorphic MH intrinsics must be in compact order, with _invokeGeneric first and _linkToInterface last */     \
+  do_intrinsic(_invokeGeneric,            java_lang_invoke_MethodHandle, invoke_name,           star_name, F_RN)        \
+  do_intrinsic(_invokeBasic,              java_lang_invoke_MethodHandle, invokeBasic_name,      star_name, F_RN)        \
+  do_intrinsic(_linkToVirtual,            java_lang_invoke_MethodHandle, linkToVirtual_name,    star_name, F_SN)        \
+  do_intrinsic(_linkToStatic,             java_lang_invoke_MethodHandle, linkToStatic_name,     star_name, F_SN)        \
+  do_intrinsic(_linkToSpecial,            java_lang_invoke_MethodHandle, linkToSpecial_name,    star_name, F_SN)        \
+  do_intrinsic(_linkToInterface,          java_lang_invoke_MethodHandle, linkToInterface_name,  star_name, F_SN)        \
+  /* special marker for bytecode generated for the JVM from a LambdaForm: */                                            \
+  do_intrinsic(_compiledLambdaForm,       java_lang_invoke_MethodHandle, compiledLambdaForm_name, star_name, F_RN)      \
                                                                                                                         \
   /* unboxing methods: */                                                                                               \
   do_intrinsic(_booleanValue,             java_lang_Boolean,      booleanValue_name, void_boolean_signature, F_R)       \
@@ -1061,6 +1055,10 @@
 
     ID_LIMIT,
     LAST_COMPILER_INLINE = _prefetchWriteStatic,
+    FIRST_MH_SIG_POLY    = _invokeGeneric,
+    FIRST_MH_STATIC      = _linkToVirtual,
+    LAST_MH_SIG_POLY     = _linkToInterface,
+
     FIRST_ID = _none + 1
   };
 
--- a/src/share/vm/code/codeBlob.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/codeBlob.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -144,7 +144,7 @@
   // chunk of memory, its your job to free it.
   if (p != NULL) {
     // We need to allocate a chunk big enough to hold the OopMapSet and all of its OopMaps
-    _oop_maps = (OopMapSet* )NEW_C_HEAP_ARRAY(unsigned char, p->heap_size());
+    _oop_maps = (OopMapSet* )NEW_C_HEAP_ARRAY(unsigned char, p->heap_size(), mtCode);
     p->copy_to((address)_oop_maps);
   } else {
     _oop_maps = NULL;
@@ -180,7 +180,7 @@
 
 void CodeBlob::flush() {
   if (_oop_maps) {
-    FREE_C_HEAP_ARRAY(unsigned char, _oop_maps);
+    FREE_C_HEAP_ARRAY(unsigned char, _oop_maps, mtCode);
     _oop_maps = NULL;
   }
   _comments.free();
@@ -359,43 +359,6 @@
 
 
 //----------------------------------------------------------------------------------------------------
-// Implementation of RicochetBlob
-
-RicochetBlob::RicochetBlob(
-  CodeBuffer* cb,
-  int         size,
-  int         bounce_offset,
-  int         exception_offset,
-  int         frame_size
-)
-: SingletonBlob("RicochetBlob", cb, sizeof(RicochetBlob), size, frame_size, (OopMapSet*) NULL)
-{
-  _bounce_offset = bounce_offset;
-  _exception_offset = exception_offset;
-}
-
-
-RicochetBlob* RicochetBlob::create(
-  CodeBuffer* cb,
-  int         bounce_offset,
-  int         exception_offset,
-  int         frame_size)
-{
-  RicochetBlob* blob = NULL;
-  ThreadInVMfromUnknown __tiv;  // get to VM state in case we block on CodeCache_lock
-  {
-    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    unsigned int size = allocation_size(cb, sizeof(RicochetBlob));
-    blob = new (size) RicochetBlob(cb, size, bounce_offset, exception_offset, frame_size);
-  }
-
-  trace_new_stub(blob, "RicochetBlob");
-
-  return blob;
-}
-
-
-//----------------------------------------------------------------------------------------------------
 // Implementation of DeoptimizationBlob
 
 DeoptimizationBlob::DeoptimizationBlob(
--- a/src/share/vm/code/codeBlob.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/codeBlob.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,6 @@
 // Suptypes are:
 //   nmethod            : Compiled Java methods (include method that calls to native code)
 //   RuntimeStub        : Call to VM runtime methods
-//   RicochetBlob       : Used for blocking MethodHandle adapters
 //   DeoptimizationBlob : Used for deoptimizatation
 //   ExceptionBlob      : Used for stack unrolling
 //   SafepointBlob      : Used to handle illegal instruction exceptions
@@ -99,7 +98,6 @@
   virtual bool is_buffer_blob() const            { return false; }
   virtual bool is_nmethod() const                { return false; }
   virtual bool is_runtime_stub() const           { return false; }
-  virtual bool is_ricochet_stub() const          { return false; }
   virtual bool is_deoptimization_stub() const    { return false; }
   virtual bool is_uncommon_trap_stub() const     { return false; }
   virtual bool is_exception_stub() const         { return false; }
@@ -350,50 +348,6 @@
 
 
 //----------------------------------------------------------------------------------------------------
-// RicochetBlob
-// Holds an arbitrary argument list indefinitely while Java code executes recursively.
-
-class RicochetBlob: public SingletonBlob {
-  friend class VMStructs;
- private:
-
-  int _bounce_offset;
-  int _exception_offset;
-
-  // Creation support
-  RicochetBlob(
-    CodeBuffer* cb,
-    int         size,
-    int         bounce_offset,
-    int         exception_offset,
-    int         frame_size
-  );
-
- public:
-  // Creation
-  static RicochetBlob* create(
-    CodeBuffer* cb,
-    int         bounce_offset,
-    int         exception_offset,
-    int         frame_size
-  );
-
-  // Typing
-  bool is_ricochet_stub() const { return true; }
-
-  // GC for args
-  void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map, OopClosure* f) { /* Nothing to do */ }
-
-  address bounce_addr() const           { return code_begin() + _bounce_offset; }
-  address exception_addr() const        { return code_begin() + _exception_offset; }
-  bool returns_to_bounce_addr(address pc) const {
-    address bounce_pc = bounce_addr();
-    return (pc == bounce_pc || (pc + frame::pc_return_offset) == bounce_pc);
-  }
-};
-
-
-//----------------------------------------------------------------------------------------------------
 // DeoptimizationBlob
 
 class DeoptimizationBlob: public SingletonBlob {
--- a/src/share/vm/code/codeCache.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/codeCache.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -796,7 +796,6 @@
   int nmethodCount = 0;
   int runtimeStubCount = 0;
   int adapterCount = 0;
-  int ricochetStubCount = 0;
   int deoptimizationStubCount = 0;
   int uncommonTrapStubCount = 0;
   int bufferBlobCount = 0;
@@ -841,8 +840,6 @@
       }
     } else if (cb->is_runtime_stub()) {
       runtimeStubCount++;
-    } else if (cb->is_ricochet_stub()) {
-      ricochetStubCount++;
     } else if (cb->is_deoptimization_stub()) {
       deoptimizationStubCount++;
     } else if (cb->is_uncommon_trap_stub()) {
@@ -856,7 +853,7 @@
 
   int bucketSize = 512;
   int bucketLimit = maxCodeSize / bucketSize + 1;
-  int *buckets = NEW_C_HEAP_ARRAY(int, bucketLimit);
+  int *buckets = NEW_C_HEAP_ARRAY(int, bucketLimit, mtCode);
   memset(buckets,0,sizeof(int) * bucketLimit);
 
   for (cb = first(); cb != NULL; cb = next(cb)) {
@@ -879,7 +876,6 @@
   tty->print_cr("runtime_stubs: %d",runtimeStubCount);
   tty->print_cr("adapters: %d",adapterCount);
   tty->print_cr("buffer blobs: %d",bufferBlobCount);
-  tty->print_cr("ricochet_stubs: %d",ricochetStubCount);
   tty->print_cr("deoptimization_stubs: %d",deoptimizationStubCount);
   tty->print_cr("uncommon_traps: %d",uncommonTrapStubCount);
   tty->print_cr("\nnmethod size distribution (non-zombie java)");
@@ -893,7 +889,7 @@
     }
   }
 
-  FREE_C_HEAP_ARRAY(int, buckets);
+  FREE_C_HEAP_ARRAY(int, buckets, mtCode);
 }
 
 void CodeCache::print() {
--- a/src/share/vm/code/codeCache.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/codeCache.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -88,6 +88,9 @@
   // Lookup that does not fail if you lookup a zombie method (if you call this, be sure to know
   // what you are doing)
   static CodeBlob* find_blob_unsafe(void* start) {
+    // NMT can walk the stack before code cache is created
+    if (_heap == NULL) return NULL;
+
     CodeBlob* result = (CodeBlob*)_heap->find_start(start);
     // this assert is too strong because the heap code will return the
     // heapblock containing start. That block can often be larger than
--- a/src/share/vm/code/debugInfoRec.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/debugInfoRec.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -311,6 +311,7 @@
   assert(method == NULL ||
          (method->is_native() && bci == 0) ||
          (!method->is_native() && 0 <= bci && bci < method->code_size()) ||
+         (method->is_compiled_lambda_form() && bci == -99) ||  // this might happen in C1
          bci == -1, "illegal bci");
 
   // serialize the locals/expressions/monitors
--- a/src/share/vm/code/dependencies.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/dependencies.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1033,21 +1033,25 @@
     // (Old CHA had the same limitation.)
     return context_type;
   }
-  for (int i = 0; i < nof_impls; i++) {
-    klassOop impl = instanceKlass::cast(context_type)->implementor(i);
-    if (impl == NULL) {
-      // implementors array overflowed => no exact info.
+  if (nof_impls > 0) {
+    klassOop impl = instanceKlass::cast(context_type)->implementor();
+    assert(impl != NULL, "just checking");
+    // If impl is the same as the context_type, then more than one
+    // implementor has seen. No exact info in this case.
+    if (impl == context_type) {
       return context_type;  // report an inexact witness to this sad affair
     }
     if (do_counts)
       { NOT_PRODUCT(deps_find_witness_steps++); }
     if (is_participant(impl)) {
-      if (participants_hide_witnesses)  continue;
-      // else fall through to process this guy's subclasses
+      if (!participants_hide_witnesses) {
+        ADD_SUBCLASS_CHAIN(impl);
+      }
     } else if (is_witness(impl) && !ignore_witness(impl)) {
       return impl;
+    } else {
+      ADD_SUBCLASS_CHAIN(impl);
     }
-    ADD_SUBCLASS_CHAIN(impl);
   }
 
   // Recursively process each non-trivial sibling chain.
@@ -1174,8 +1178,9 @@
   } else if (ctx->nof_implementors() != 0) {
     // if it is an interface, it must be unimplemented
     // (if it is not an interface, nof_implementors is always zero)
-    klassOop impl = ctx->implementor(0);
-    return (impl != NULL)? impl: ctxk;
+    klassOop impl = ctx->implementor();
+    assert(impl != NULL, "must be set");
+    return impl;
   } else {
     return NULL;
   }
--- a/src/share/vm/code/nmethod.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/nmethod.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -945,8 +945,12 @@
 void nmethod::print_on(outputStream* st, const char* msg) const {
   if (st != NULL) {
     ttyLocker ttyl;
-    CompileTask::print_compilation(st, this, msg);
-    if (WizardMode) st->print(" (" INTPTR_FORMAT ")", this);
+    if (WizardMode) {
+      CompileTask::print_compilation(st, this, msg, /*short_form:*/ true);
+      st->print_cr(" (" INTPTR_FORMAT ")", this);
+    } else {
+      CompileTask::print_compilation(st, this, msg, /*short_form:*/ false);
+    }
   }
 }
 
@@ -964,7 +968,9 @@
   if (printmethod) {
     print_code();
     print_pcs();
-    oop_maps()->print();
+    if (oop_maps()) {
+      oop_maps()->print();
+    }
   }
   if (PrintDebugInfo) {
     print_scopes();
--- a/src/share/vm/code/nmethod.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/nmethod.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,7 +31,7 @@
 // This class is used internally by nmethods, to cache
 // exception/pc/handler information.
 
-class ExceptionCache : public CHeapObj {
+class ExceptionCache : public CHeapObj<mtCode> {
   friend class VMStructs;
  private:
   enum { cache_size = 16 };
@@ -553,7 +553,7 @@
   static void oops_do_marking_prologue();
   static void oops_do_marking_epilogue();
   static bool oops_do_marking_is_active() { return _oops_do_mark_nmethods != NULL; }
-  DEBUG_ONLY(bool test_oops_do_mark() { return _oops_do_mark_link != NULL; })
+  bool test_oops_do_mark() { return _oops_do_mark_link != NULL; }
 
   // ScopeDesc for an instruction
   ScopeDesc* scope_desc_at(address pc);
--- a/src/share/vm/code/stubs.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/stubs.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -101,7 +101,7 @@
 // of the concrete stub (see also macro below). There's exactly
 // one stub interface instance required per stub queue.
 
-class StubInterface: public CHeapObj {
+class StubInterface: public CHeapObj<mtCode> {
  public:
   // Initialization/finalization
   virtual void    initialize(Stub* self, int size)         = 0; // called after creation (called twice if allocated via (request, commit))
@@ -152,7 +152,7 @@
 // A StubQueue maintains a queue of stubs.
 // Note: All sizes (spaces) are given in bytes.
 
-class StubQueue: public CHeapObj {
+class StubQueue: public CHeapObj<mtCode> {
   friend class VMStructs;
  private:
   StubInterface* _stub_interface;                // the interface prototype
--- a/src/share/vm/code/vmreg.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/vmreg.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,7 @@
 #include "code/vmreg.hpp"
 
 // First VMReg value that could refer to a stack slot
-VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 1) & ~1);
+VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 7) & ~7);
 
 // VMRegs are 4 bytes wide on all platforms
 const int VMRegImpl::stack_slot_size = 4;
--- a/src/share/vm/code/vmreg.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/vmreg.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -131,6 +131,10 @@
     assert((is_reg() && value() < stack0->value() - 1) || is_stack(), "must be");
     return (VMReg)(intptr_t)(value() + 1);
   }
+  VMReg next(int i) {
+    assert((is_reg() && value() < stack0->value() - i) || is_stack(), "must be");
+    return (VMReg)(intptr_t)(value() + i);
+  }
   VMReg prev() {
     assert((is_stack() && value() > stack0->value()) || (is_reg() && value() != 0), "must be");
     return (VMReg)(intptr_t)(value() - 1);
--- a/src/share/vm/code/vtableStubs.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/code/vtableStubs.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -55,6 +55,8 @@
   int index() const                              { return _index; }
   static VMReg receiver_location()               { return _receiver_location; }
   void set_next(VtableStub* n)                   { _next = n; }
+
+ public:
   address code_begin() const                     { return (address)(this + 1); }
   address code_end() const                       { return code_begin() + pd_code_size_limit(_is_vtable_stub); }
   address entry_point() const                    { return code_begin(); }
@@ -65,6 +67,7 @@
   }
   bool contains(address pc) const                { return code_begin() <= pc && pc < code_end(); }
 
+ private:
   void set_exception_points(address npe_addr, address ame_addr) {
     _npe_offset = npe_addr - code_begin();
     _ame_offset = ame_addr - code_begin();
--- a/src/share/vm/compiler/abstractCompiler.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/compiler/abstractCompiler.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,7 +29,7 @@
 
 typedef void (*initializer)(void);
 
-class AbstractCompiler : public CHeapObj {
+class AbstractCompiler : public CHeapObj<mtCompiler> {
  private:
   bool _is_initialized; // Mark whether compiler object is initialized
 
--- a/src/share/vm/compiler/compileBroker.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/compiler/compileBroker.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -407,7 +407,10 @@
     if (is_osr_method) {
       st->print(" @ %d", osr_bci);
     }
-    st->print(" (%d bytes)", method->code_size());
+    if (method->is_native())
+      st->print(" (native)");
+    else
+      st->print(" (%d bytes)", method->code_size());
   }
 
   if (msg != NULL) {
@@ -427,12 +430,17 @@
   st->print("     ");        // print compilation number
 
   // method attributes
-  const char sync_char      = method->is_synchronized()        ? 's' : ' ';
-  const char exception_char = method->has_exception_handlers() ? '!' : ' ';
-  const char monitors_char  = method->has_monitor_bytecodes()  ? 'm' : ' ';
+  if (method->is_loaded()) {
+    const char sync_char      = method->is_synchronized()        ? 's' : ' ';
+    const char exception_char = method->has_exception_handlers() ? '!' : ' ';
+    const char monitors_char  = method->has_monitor_bytecodes()  ? 'm' : ' ';
 
-  // print method attributes
-  st->print(" %c%c%c  ", sync_char, exception_char, monitors_char);
+    // print method attributes
+    st->print(" %c%c%c  ", sync_char, exception_char, monitors_char);
+  } else {
+    //         %s!bn
+    st->print("      ");     // print method attributes
+  }
 
   if (TieredCompilation) {
     st->print("  ");
@@ -444,7 +452,10 @@
 
   st->print("@ %d  ", bci);  // print bci
   method->print_short_name(st);
-  st->print(" (%d bytes)", method->code_size());
+  if (method->is_loaded())
+    st->print(" (%d bytes)", method->code_size());
+  else
+    st->print(" (not loaded)");
 
   if (msg != NULL) {
     st->print("   %s", msg);
@@ -951,7 +962,7 @@
   int compiler_count = c1_compiler_count + c2_compiler_count;
 
   _method_threads =
-    new (ResourceObj::C_HEAP) GrowableArray<CompilerThread*>(compiler_count, true);
+    new (ResourceObj::C_HEAP, mtCompiler) GrowableArray<CompilerThread*>(compiler_count, true);
 
   char name_buffer[256];
   for (int i = 0; i < c2_compiler_count; i++) {
@@ -1018,6 +1029,7 @@
          "sanity check");
   assert(!instanceKlass::cast(method->method_holder())->is_not_initialized(),
          "method holder must be initialized");
+  assert(!method->is_method_handle_intrinsic(), "do not enqueue these guys");
 
   if (CIPrintRequests) {
     tty->print("request: ");
@@ -1231,7 +1243,7 @@
   //
   // Note: A native method implies non-osr compilation which is
   //       checked with an assertion at the entry of this method.
-  if (method->is_native()) {
+  if (method->is_native() && !method->is_method_handle_intrinsic()) {
     bool in_base_library;
     address adr = NativeLookup::lookup(method, in_base_library, THREAD);
     if (HAS_PENDING_EXCEPTION) {
@@ -1264,7 +1276,7 @@
 
   // do the compilation
   if (method->is_native()) {
-    if (!PreferInterpreterNativeStubs) {
+    if (!PreferInterpreterNativeStubs || method->is_method_handle_intrinsic()) {
       // Acquire our lock.
       int compile_id;
       {
@@ -1627,7 +1639,7 @@
       }
       fp = fopen(fileBuf, "at");
       if (fp != NULL) {
-        file = NEW_C_HEAP_ARRAY(char, strlen(fileBuf)+1);
+        file = NEW_C_HEAP_ARRAY(char, strlen(fileBuf)+1, mtCompiler);
         strcpy(file, fileBuf);
         break;
       }
@@ -1637,7 +1649,7 @@
     } else {
       if (LogCompilation && Verbose)
         tty->print_cr("Opening compilation log %s", file);
-      CompileLog* log = new(ResourceObj::C_HEAP) CompileLog(file, fp, thread_id);
+      CompileLog* log = new(ResourceObj::C_HEAP, mtCompiler) CompileLog(file, fp, thread_id);
       thread->init_log(log);
 
       if (xtty != NULL) {
--- a/src/share/vm/compiler/compileBroker.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/compiler/compileBroker.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,7 +36,7 @@
 //
 // An entry in the compile queue.  It represents a pending or current
 // compilation.
-class CompileTask : public CHeapObj {
+class CompileTask : public CHeapObj<mtCompiler> {
   friend class VMStructs;
 
  private:
@@ -104,10 +104,10 @@
 
 public:
   void         print_compilation(outputStream* st = tty, bool short_form = false);
-  static void  print_compilation(outputStream* st, const nmethod* nm, const char* msg = NULL) {
+  static void  print_compilation(outputStream* st, const nmethod* nm, const char* msg = NULL, bool short_form = false) {
     print_compilation_impl(st, nm->method(), nm->compile_id(), nm->comp_level(),
                            nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false,
-                           msg);
+                           msg, short_form);
   }
 
   static void  print_inlining(outputStream* st, ciMethod* method, int inline_level, int bci, const char* msg = NULL);
@@ -131,7 +131,7 @@
 //
 // Per Compiler Performance Counters.
 //
-class CompilerCounters : public CHeapObj {
+class CompilerCounters : public CHeapObj<mtCompiler> {
 
   public:
     enum {
@@ -175,7 +175,7 @@
 // CompileQueue
 //
 // A list of CompileTasks.
-class CompileQueue : public CHeapObj {
+class CompileQueue : public CHeapObj<mtCompiler> {
  private:
   const char* _name;
   Monitor*    _lock;
--- a/src/share/vm/compiler/compileLog.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/compiler/compileLog.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -37,14 +37,14 @@
 CompileLog::CompileLog(const char* file, FILE* fp, intx thread_id)
   : _context(_context_buffer, sizeof(_context_buffer))
 {
-  initialize(new(ResourceObj::C_HEAP) fileStream(fp));
+  initialize(new(ResourceObj::C_HEAP, mtCompiler) fileStream(fp));
   _file = file;
   _file_end = 0;
   _thread_id = thread_id;
 
   _identities_limit = 0;
   _identities_capacity = 400;
-  _identities = NEW_C_HEAP_ARRAY(char, _identities_capacity);
+  _identities = NEW_C_HEAP_ARRAY(char, _identities_capacity, mtCompiler);
 
   // link into the global list
   { MutexLocker locker(CompileTaskAlloc_lock);
@@ -56,7 +56,7 @@
 CompileLog::~CompileLog() {
   delete _out;
   _out = NULL;
-  FREE_C_HEAP_ARRAY(char, _identities);
+  FREE_C_HEAP_ARRAY(char, _identities, mtCompiler);
 }
 
 
@@ -109,7 +109,7 @@
   if (id >= _identities_capacity) {
     int new_cap = _identities_capacity * 2;
     if (new_cap <= id)  new_cap = id + 100;
-    _identities = REALLOC_C_HEAP_ARRAY(char, _identities, new_cap);
+    _identities = REALLOC_C_HEAP_ARRAY(char, _identities, new_cap, mtCompiler);
     _identities_capacity = new_cap;
   }
   while (id >= _identities_limit) {
--- a/src/share/vm/compiler/compilerOracle.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/compiler/compilerOracle.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/jniHandles.hpp"
 
-class MethodMatcher : public CHeapObj {
+class MethodMatcher : public CHeapObj<mtCompiler> {
  public:
   enum Mode {
     Exact,
@@ -554,9 +554,8 @@
 
 static const char* cc_file() {
 #ifdef ASSERT
-  if (CompileCommandFile == NULL) {
+  if (CompileCommandFile == NULL)
     return default_cc_file;
-  }
 #endif
   return CompileCommandFile;
 }
--- a/src/share/vm/compiler/oopMap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/compiler/oopMap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -599,7 +599,7 @@
 
 #ifdef COMPILER2
 
-class DerivedPointerEntry : public CHeapObj {
+class DerivedPointerEntry : public CHeapObj<mtCompiler> {
  private:
   oop*     _location; // Location of derived pointer (also pointing to the base)
   intptr_t _offset;   // Offset from base pointer
@@ -621,7 +621,7 @@
   assert (!_active, "should not be active");
   assert(_list == NULL || _list->length() == 0, "table not empty");
   if (_list == NULL) {
-    _list = new (ResourceObj::C_HEAP) GrowableArray<DerivedPointerEntry*>(10, true); // Allocated on C heap
+    _list = new (ResourceObj::C_HEAP, mtCompiler) GrowableArray<DerivedPointerEntry*>(10, true); // Allocated on C heap
   }
   _active = true;
 }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1257 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp"
-#include "gc_implementation/shared/allocationStats.hpp"
-#include "gc_implementation/shared/spaceDecorator.hpp"
-#include "memory/space.inline.hpp"
-#include "runtime/globals.hpp"
-#include "utilities/ostream.hpp"
-
-////////////////////////////////////////////////////////////////////////////////
-// A binary tree based search structure for free blocks.
-// This is currently used in the Concurrent Mark&Sweep implementation.
-////////////////////////////////////////////////////////////////////////////////
-
-TreeChunk* TreeChunk::as_TreeChunk(FreeChunk* fc) {
-  // Do some assertion checking here.
-  return (TreeChunk*) fc;
-}
-
-void TreeChunk::verifyTreeChunkList() const {
-  TreeChunk* nextTC = (TreeChunk*)next();
-  if (prev() != NULL) { // interior list node shouldn'r have tree fields
-    guarantee(embedded_list()->parent() == NULL && embedded_list()->left() == NULL &&
-              embedded_list()->right()  == NULL, "should be clear");
-  }
-  if (nextTC != NULL) {
-    guarantee(as_TreeChunk(nextTC->prev()) == this, "broken chain");
-    guarantee(nextTC->size() == size(), "wrong size");
-    nextTC->verifyTreeChunkList();
-  }
-}
-
-
-TreeList* TreeList::as_TreeList(TreeChunk* tc) {
-  // This first free chunk in the list will be the tree list.
-  assert(tc->size() >= sizeof(TreeChunk), "Chunk is too small for a TreeChunk");
-  TreeList* tl = tc->embedded_list();
-  tc->set_list(tl);
-#ifdef ASSERT
-  tl->set_protecting_lock(NULL);
-#endif
-  tl->set_hint(0);
-  tl->set_size(tc->size());
-  tl->link_head(tc);
-  tl->link_tail(tc);
-  tl->set_count(1);
-  tl->init_statistics(true /* split_birth */);
-  tl->setParent(NULL);
-  tl->setLeft(NULL);
-  tl->setRight(NULL);
-  return tl;
-}
-
-TreeList* TreeList::as_TreeList(HeapWord* addr, size_t size) {
-  TreeChunk* tc = (TreeChunk*) addr;
-  assert(size >= sizeof(TreeChunk), "Chunk is too small for a TreeChunk");
-  // The space in the heap will have been mangled initially but
-  // is not remangled when a free chunk is returned to the free list
-  // (since it is used to maintain the chunk on the free list).
-  assert((ZapUnusedHeapArea &&
-          SpaceMangler::is_mangled((HeapWord*) tc->size_addr()) &&
-          SpaceMangler::is_mangled((HeapWord*) tc->prev_addr()) &&
-          SpaceMangler::is_mangled((HeapWord*) tc->next_addr())) ||
-          (tc->size() == 0 && tc->prev() == NULL && tc->next() == NULL),
-    "Space should be clear or mangled");
-  tc->setSize(size);
-  tc->linkPrev(NULL);
-  tc->linkNext(NULL);
-  TreeList* tl = TreeList::as_TreeList(tc);
-  return tl;
-}
-
-TreeList* TreeList::removeChunkReplaceIfNeeded(TreeChunk* tc) {
-
-  TreeList* retTL = this;
-  FreeChunk* list = head();
-  assert(!list || list != list->next(), "Chunk on list twice");
-  assert(tc != NULL, "Chunk being removed is NULL");
-  assert(parent() == NULL || this == parent()->left() ||
-    this == parent()->right(), "list is inconsistent");
-  assert(tc->isFree(), "Header is not marked correctly");
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-
-  FreeChunk* prevFC = tc->prev();
-  TreeChunk* nextTC = TreeChunk::as_TreeChunk(tc->next());
-  assert(list != NULL, "should have at least the target chunk");
-
-  // Is this the first item on the list?
-  if (tc == list) {
-    // The "getChunk..." functions for a TreeList will not return the
-    // first chunk in the list unless it is the last chunk in the list
-    // because the first chunk is also acting as the tree node.
-    // When coalescing happens, however, the first chunk in the a tree
-    // list can be the start of a free range.  Free ranges are removed
-    // from the free lists so that they are not available to be
-    // allocated when the sweeper yields (giving up the free list lock)
-    // to allow mutator activity.  If this chunk is the first in the
-    // list and is not the last in the list, do the work to copy the
-    // TreeList from the first chunk to the next chunk and update all
-    // the TreeList pointers in the chunks in the list.
-    if (nextTC == NULL) {
-      assert(prevFC == NULL, "Not last chunk in the list");
-      set_tail(NULL);
-      set_head(NULL);
-    } else {
-      // copy embedded list.
-      nextTC->set_embedded_list(tc->embedded_list());
-      retTL = nextTC->embedded_list();
-      // Fix the pointer to the list in each chunk in the list.
-      // This can be slow for a long list.  Consider having
-      // an option that does not allow the first chunk on the
-      // list to be coalesced.
-      for (TreeChunk* curTC = nextTC; curTC != NULL;
-          curTC = TreeChunk::as_TreeChunk(curTC->next())) {
-        curTC->set_list(retTL);
-      }
-      // Fix the parent to point to the new TreeList.
-      if (retTL->parent() != NULL) {
-        if (this == retTL->parent()->left()) {
-          retTL->parent()->setLeft(retTL);
-        } else {
-          assert(this == retTL->parent()->right(), "Parent is incorrect");
-          retTL->parent()->setRight(retTL);
-        }
-      }
-      // Fix the children's parent pointers to point to the
-      // new list.
-      assert(right() == retTL->right(), "Should have been copied");
-      if (retTL->right() != NULL) {
-        retTL->right()->setParent(retTL);
-      }
-      assert(left() == retTL->left(), "Should have been copied");
-      if (retTL->left() != NULL) {
-        retTL->left()->setParent(retTL);
-      }
-      retTL->link_head(nextTC);
-      assert(nextTC->isFree(), "Should be a free chunk");
-    }
-  } else {
-    if (nextTC == NULL) {
-      // Removing chunk at tail of list
-      link_tail(prevFC);
-    }
-    // Chunk is interior to the list
-    prevFC->linkAfter(nextTC);
-  }
-
-  // Below this point the embeded TreeList being used for the
-  // tree node may have changed. Don't use "this"
-  // TreeList*.
-  // chunk should still be a free chunk (bit set in _prev)
-  assert(!retTL->head() || retTL->size() == retTL->head()->size(),
-    "Wrong sized chunk in list");
-  debug_only(
-    tc->linkPrev(NULL);
-    tc->linkNext(NULL);
-    tc->set_list(NULL);
-    bool prev_found = false;
-    bool next_found = false;
-    for (FreeChunk* curFC = retTL->head();
-         curFC != NULL; curFC = curFC->next()) {
-      assert(curFC != tc, "Chunk is still in list");
-      if (curFC == prevFC) {
-        prev_found = true;
-      }
-      if (curFC == nextTC) {
-        next_found = true;
-      }
-    }
-    assert(prevFC == NULL || prev_found, "Chunk was lost from list");
-    assert(nextTC == NULL || next_found, "Chunk was lost from list");
-    assert(retTL->parent() == NULL ||
-           retTL == retTL->parent()->left() ||
-           retTL == retTL->parent()->right(),
-           "list is inconsistent");
-  )
-  retTL->decrement_count();
-
-  assert(tc->isFree(), "Should still be a free chunk");
-  assert(retTL->head() == NULL || retTL->head()->prev() == NULL,
-    "list invariant");
-  assert(retTL->tail() == NULL || retTL->tail()->next() == NULL,
-    "list invariant");
-  return retTL;
-}
-void TreeList::returnChunkAtTail(TreeChunk* chunk) {
-  assert(chunk != NULL, "returning NULL chunk");
-  assert(chunk->list() == this, "list should be set for chunk");
-  assert(tail() != NULL, "The tree list is embedded in the first chunk");
-  // which means that the list can never be empty.
-  assert(!verifyChunkInFreeLists(chunk), "Double entry");
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-
-  FreeChunk* fc = tail();
-  fc->linkAfter(chunk);
-  link_tail(chunk);
-
-  assert(!tail() || size() == tail()->size(), "Wrong sized chunk in list");
-  increment_count();
-  debug_only(increment_returnedBytes_by(chunk->size()*sizeof(HeapWord));)
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-}
-
-// Add this chunk at the head of the list.  "At the head of the list"
-// is defined to be after the chunk pointer to by head().  This is
-// because the TreeList is embedded in the first TreeChunk in the
-// list.  See the definition of TreeChunk.
-void TreeList::returnChunkAtHead(TreeChunk* chunk) {
-  assert(chunk->list() == this, "list should be set for chunk");
-  assert(head() != NULL, "The tree list is embedded in the first chunk");
-  assert(chunk != NULL, "returning NULL chunk");
-  assert(!verifyChunkInFreeLists(chunk), "Double entry");
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-
-  FreeChunk* fc = head()->next();
-  if (fc != NULL) {
-    chunk->linkAfter(fc);
-  } else {
-    assert(tail() == NULL, "List is inconsistent");
-    link_tail(chunk);
-  }
-  head()->linkAfter(chunk);
-  assert(!head() || size() == head()->size(), "Wrong sized chunk in list");
-  increment_count();
-  debug_only(increment_returnedBytes_by(chunk->size()*sizeof(HeapWord));)
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-}
-
-TreeChunk* TreeList::head_as_TreeChunk() {
-  assert(head() == NULL || TreeChunk::as_TreeChunk(head())->list() == this,
-    "Wrong type of chunk?");
-  return TreeChunk::as_TreeChunk(head());
-}
-
-TreeChunk* TreeList::first_available() {
-  assert(head() != NULL, "The head of the list cannot be NULL");
-  FreeChunk* fc = head()->next();
-  TreeChunk* retTC;
-  if (fc == NULL) {
-    retTC = head_as_TreeChunk();
-  } else {
-    retTC = TreeChunk::as_TreeChunk(fc);
-  }
-  assert(retTC->list() == this, "Wrong type of chunk.");
-  return retTC;
-}
-
-// Returns the block with the largest heap address amongst
-// those in the list for this size; potentially slow and expensive,
-// use with caution!
-TreeChunk* TreeList::largest_address() {
-  assert(head() != NULL, "The head of the list cannot be NULL");
-  FreeChunk* fc = head()->next();
-  TreeChunk* retTC;
-  if (fc == NULL) {
-    retTC = head_as_TreeChunk();
-  } else {
-    // walk down the list and return the one with the highest
-    // heap address among chunks of this size.
-    FreeChunk* last = fc;
-    while (fc->next() != NULL) {
-      if ((HeapWord*)last < (HeapWord*)fc) {
-        last = fc;
-      }
-      fc = fc->next();
-    }
-    retTC = TreeChunk::as_TreeChunk(last);
-  }
-  assert(retTC->list() == this, "Wrong type of chunk.");
-  return retTC;
-}
-
-BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr, bool splay):
-  _splay(splay)
-{
-  assert(mr.byte_size() > MIN_TREE_CHUNK_SIZE, "minimum chunk size");
-
-  reset(mr);
-  assert(root()->left() == NULL, "reset check failed");
-  assert(root()->right() == NULL, "reset check failed");
-  assert(root()->head()->next() == NULL, "reset check failed");
-  assert(root()->head()->prev() == NULL, "reset check failed");
-  assert(totalSize() == root()->size(), "reset check failed");
-  assert(totalFreeBlocks() == 1, "reset check failed");
-}
-
-void BinaryTreeDictionary::inc_totalSize(size_t inc) {
-  _totalSize = _totalSize + inc;
-}
-
-void BinaryTreeDictionary::dec_totalSize(size_t dec) {
-  _totalSize = _totalSize - dec;
-}
-
-void BinaryTreeDictionary::reset(MemRegion mr) {
-  assert(mr.byte_size() > MIN_TREE_CHUNK_SIZE, "minimum chunk size");
-  set_root(TreeList::as_TreeList(mr.start(), mr.word_size()));
-  set_totalSize(mr.word_size());
-  set_totalFreeBlocks(1);
-}
-
-void BinaryTreeDictionary::reset(HeapWord* addr, size_t byte_size) {
-  MemRegion mr(addr, heap_word_size(byte_size));
-  reset(mr);
-}
-
-void BinaryTreeDictionary::reset() {
-  set_root(NULL);
-  set_totalSize(0);
-  set_totalFreeBlocks(0);
-}
-
-// Get a free block of size at least size from tree, or NULL.
-// If a splay step is requested, the removal algorithm (only) incorporates
-// a splay step as follows:
-// . the search proceeds down the tree looking for a possible
-//   match. At the (closest) matching location, an appropriate splay step is applied
-//   (zig, zig-zig or zig-zag). A chunk of the appropriate size is then returned
-//   if available, and if it's the last chunk, the node is deleted. A deteleted
-//   node is replaced in place by its tree successor.
-TreeChunk*
-BinaryTreeDictionary::getChunkFromTree(size_t size, Dither dither, bool splay)
-{
-  TreeList *curTL, *prevTL;
-  TreeChunk* retTC = NULL;
-  assert(size >= MIN_TREE_CHUNK_SIZE, "minimum chunk size");
-  if (FLSVerifyDictionary) {
-    verifyTree();
-  }
-  // starting at the root, work downwards trying to find match.
-  // Remember the last node of size too great or too small.
-  for (prevTL = curTL = root(); curTL != NULL;) {
-    if (curTL->size() == size) {        // exact match
-      break;
-    }
-    prevTL = curTL;
-    if (curTL->size() < size) {        // proceed to right sub-tree
-      curTL = curTL->right();
-    } else {                           // proceed to left sub-tree
-      assert(curTL->size() > size, "size inconsistency");
-      curTL = curTL->left();
-    }
-  }
-  if (curTL == NULL) { // couldn't find exact match
-    // try and find the next larger size by walking back up the search path
-    for (curTL = prevTL; curTL != NULL;) {
-      if (curTL->size() >= size) break;
-      else curTL = curTL->parent();
-    }
-    assert(curTL == NULL || curTL->count() > 0,
-      "An empty list should not be in the tree");
-  }
-  if (curTL != NULL) {
-    assert(curTL->size() >= size, "size inconsistency");
-    if (UseCMSAdaptiveFreeLists) {
-
-      // A candidate chunk has been found.  If it is already under
-      // populated, get a chunk associated with the hint for this
-      // chunk.
-      if (curTL->surplus() <= 0) {
-        /* Use the hint to find a size with a surplus, and reset the hint. */
-        TreeList* hintTL = curTL;
-        while (hintTL->hint() != 0) {
-          assert(hintTL->hint() == 0 || hintTL->hint() > hintTL->size(),
-            "hint points in the wrong direction");
-          hintTL = findList(hintTL->hint());
-          assert(curTL != hintTL, "Infinite loop");
-          if (hintTL == NULL ||
-              hintTL == curTL /* Should not happen but protect against it */ ) {
-            // No useful hint.  Set the hint to NULL and go on.
-            curTL->set_hint(0);
-            break;
-          }
-          assert(hintTL->size() > size, "hint is inconsistent");
-          if (hintTL->surplus() > 0) {
-            // The hint led to a list that has a surplus.  Use it.
-            // Set the hint for the candidate to an overpopulated
-            // size.
-            curTL->set_hint(hintTL->size());
-            // Change the candidate.
-            curTL = hintTL;
-            break;
-          }
-          // The evm code reset the hint of the candidate as
-          // at an interim point.  Why?  Seems like this leaves
-          // the hint pointing to a list that didn't work.
-          // curTL->set_hint(hintTL->size());
-        }
-      }
-    }
-    // don't waste time splaying if chunk's singleton
-    if (splay && curTL->head()->next() != NULL) {
-      semiSplayStep(curTL);
-    }
-    retTC = curTL->first_available();
-    assert((retTC != NULL) && (curTL->count() > 0),
-      "A list in the binary tree should not be NULL");
-    assert(retTC->size() >= size,
-      "A chunk of the wrong size was found");
-    removeChunkFromTree(retTC);
-    assert(retTC->isFree(), "Header is not marked correctly");
-  }
-
-  if (FLSVerifyDictionary) {
-    verify();
-  }
-  return retTC;
-}
-
-TreeList* BinaryTreeDictionary::findList(size_t size) const {
-  TreeList* curTL;
-  for (curTL = root(); curTL != NULL;) {
-    if (curTL->size() == size) {        // exact match
-      break;
-    }
-
-    if (curTL->size() < size) {        // proceed to right sub-tree
-      curTL = curTL->right();
-    } else {                           // proceed to left sub-tree
-      assert(curTL->size() > size, "size inconsistency");
-      curTL = curTL->left();
-    }
-  }
-  return curTL;
-}
-
-
-bool BinaryTreeDictionary::verifyChunkInFreeLists(FreeChunk* tc) const {
-  size_t size = tc->size();
-  TreeList* tl = findList(size);
-  if (tl == NULL) {
-    return false;
-  } else {
-    return tl->verifyChunkInFreeLists(tc);
-  }
-}
-
-FreeChunk* BinaryTreeDictionary::findLargestDict() const {
-  TreeList *curTL = root();
-  if (curTL != NULL) {
-    while(curTL->right() != NULL) curTL = curTL->right();
-    return curTL->largest_address();
-  } else {
-    return NULL;
-  }
-}
-
-// Remove the current chunk from the tree.  If it is not the last
-// chunk in a list on a tree node, just unlink it.
-// If it is the last chunk in the list (the next link is NULL),
-// remove the node and repair the tree.
-TreeChunk*
-BinaryTreeDictionary::removeChunkFromTree(TreeChunk* tc) {
-  assert(tc != NULL, "Should not call with a NULL chunk");
-  assert(tc->isFree(), "Header is not marked correctly");
-
-  TreeList *newTL, *parentTL;
-  TreeChunk* retTC;
-  TreeList* tl = tc->list();
-  debug_only(
-    bool removing_only_chunk = false;
-    if (tl == _root) {
-      if ((_root->left() == NULL) && (_root->right() == NULL)) {
-        if (_root->count() == 1) {
-          assert(_root->head() == tc, "Should only be this one chunk");
-          removing_only_chunk = true;
-        }
-      }
-    }
-  )
-  assert(tl != NULL, "List should be set");
-  assert(tl->parent() == NULL || tl == tl->parent()->left() ||
-         tl == tl->parent()->right(), "list is inconsistent");
-
-  bool complicatedSplice = false;
-
-  retTC = tc;
-  // Removing this chunk can have the side effect of changing the node
-  // (TreeList*) in the tree.  If the node is the root, update it.
-  TreeList* replacementTL = tl->removeChunkReplaceIfNeeded(tc);
-  assert(tc->isFree(), "Chunk should still be free");
-  assert(replacementTL->parent() == NULL ||
-         replacementTL == replacementTL->parent()->left() ||
-         replacementTL == replacementTL->parent()->right(),
-         "list is inconsistent");
-  if (tl == root()) {
-    assert(replacementTL->parent() == NULL, "Incorrectly replacing root");
-    set_root(replacementTL);
-  }
-  debug_only(
-    if (tl != replacementTL) {
-      assert(replacementTL->head() != NULL,
-        "If the tree list was replaced, it should not be a NULL list");
-      TreeList* rhl = replacementTL->head_as_TreeChunk()->list();
-      TreeList* rtl = TreeChunk::as_TreeChunk(replacementTL->tail())->list();
-      assert(rhl == replacementTL, "Broken head");
-      assert(rtl == replacementTL, "Broken tail");
-      assert(replacementTL->size() == tc->size(),  "Broken size");
-    }
-  )
-
-  // Does the tree need to be repaired?
-  if (replacementTL->count() == 0) {
-    assert(replacementTL->head() == NULL &&
-           replacementTL->tail() == NULL, "list count is incorrect");
-    // Find the replacement node for the (soon to be empty) node being removed.
-    // if we have a single (or no) child, splice child in our stead
-    if (replacementTL->left() == NULL) {
-      // left is NULL so pick right.  right may also be NULL.
-      newTL = replacementTL->right();
-      debug_only(replacementTL->clearRight();)
-    } else if (replacementTL->right() == NULL) {
-      // right is NULL
-      newTL = replacementTL->left();
-      debug_only(replacementTL->clearLeft();)
-    } else {  // we have both children, so, by patriarchal convention,
-              // my replacement is least node in right sub-tree
-      complicatedSplice = true;
-      newTL = removeTreeMinimum(replacementTL->right());
-      assert(newTL != NULL && newTL->left() == NULL &&
-             newTL->right() == NULL, "sub-tree minimum exists");
-    }
-    // newTL is the replacement for the (soon to be empty) node.
-    // newTL may be NULL.
-    // should verify; we just cleanly excised our replacement
-    if (FLSVerifyDictionary) {
-      verifyTree();
-    }
-    // first make newTL my parent's child
-    if ((parentTL = replacementTL->parent()) == NULL) {
-      // newTL should be root
-      assert(tl == root(), "Incorrectly replacing root");
-      set_root(newTL);
-      if (newTL != NULL) {
-        newTL->clearParent();
-      }
-    } else if (parentTL->right() == replacementTL) {
-      // replacementTL is a right child
-      parentTL->setRight(newTL);
-    } else {                                // replacementTL is a left child
-      assert(parentTL->left() == replacementTL, "should be left child");
-      parentTL->setLeft(newTL);
-    }
-    debug_only(replacementTL->clearParent();)
-    if (complicatedSplice) {  // we need newTL to get replacementTL's
-                              // two children
-      assert(newTL != NULL &&
-             newTL->left() == NULL && newTL->right() == NULL,
-            "newTL should not have encumbrances from the past");
-      // we'd like to assert as below:
-      // assert(replacementTL->left() != NULL && replacementTL->right() != NULL,
-      //       "else !complicatedSplice");
-      // ... however, the above assertion is too strong because we aren't
-      // guaranteed that replacementTL->right() is still NULL.
-      // Recall that we removed
-      // the right sub-tree minimum from replacementTL.
-      // That may well have been its right
-      // child! So we'll just assert half of the above:
-      assert(replacementTL->left() != NULL, "else !complicatedSplice");
-      newTL->setLeft(replacementTL->left());
-      newTL->setRight(replacementTL->right());
-      debug_only(
-        replacementTL->clearRight();
-        replacementTL->clearLeft();
-      )
-    }
-    assert(replacementTL->right() == NULL &&
-           replacementTL->left() == NULL &&
-           replacementTL->parent() == NULL,
-        "delete without encumbrances");
-  }
-
-  assert(totalSize() >= retTC->size(), "Incorrect total size");
-  dec_totalSize(retTC->size());     // size book-keeping
-  assert(totalFreeBlocks() > 0, "Incorrect total count");
-  set_totalFreeBlocks(totalFreeBlocks() - 1);
-
-  assert(retTC != NULL, "null chunk?");
-  assert(retTC->prev() == NULL && retTC->next() == NULL,
-         "should return without encumbrances");
-  if (FLSVerifyDictionary) {
-    verifyTree();
-  }
-  assert(!removing_only_chunk || _root == NULL, "root should be NULL");
-  return TreeChunk::as_TreeChunk(retTC);
-}
-
-// Remove the leftmost node (lm) in the tree and return it.
-// If lm has a right child, link it to the left node of
-// the parent of lm.
-TreeList* BinaryTreeDictionary::removeTreeMinimum(TreeList* tl) {
-  assert(tl != NULL && tl->parent() != NULL, "really need a proper sub-tree");
-  // locate the subtree minimum by walking down left branches
-  TreeList* curTL = tl;
-  for (; curTL->left() != NULL; curTL = curTL->left());
-  // obviously curTL now has at most one child, a right child
-  if (curTL != root()) {  // Should this test just be removed?
-    TreeList* parentTL = curTL->parent();
-    if (parentTL->left() == curTL) { // curTL is a left child
-      parentTL->setLeft(curTL->right());
-    } else {
-      // If the list tl has no left child, then curTL may be
-      // the right child of parentTL.
-      assert(parentTL->right() == curTL, "should be a right child");
-      parentTL->setRight(curTL->right());
-    }
-  } else {
-    // The only use of this method would not pass the root of the
-    // tree (as indicated by the assertion above that the tree list
-    // has a parent) but the specification does not explicitly exclude the
-    // passing of the root so accomodate it.
-    set_root(NULL);
-  }
-  debug_only(
-    curTL->clearParent();  // Test if this needs to be cleared
-    curTL->clearRight();    // recall, above, left child is already null
-  )
-  // we just excised a (non-root) node, we should still verify all tree invariants
-  if (FLSVerifyDictionary) {
-    verifyTree();
-  }
-  return curTL;
-}
-
-// Based on a simplification of the algorithm by Sleator and Tarjan (JACM 1985).
-// The simplifications are the following:
-// . we splay only when we delete (not when we insert)
-// . we apply a single spay step per deletion/access
-// By doing such partial splaying, we reduce the amount of restructuring,
-// while getting a reasonably efficient search tree (we think).
-// [Measurements will be needed to (in)validate this expectation.]
-
-void BinaryTreeDictionary::semiSplayStep(TreeList* tc) {
-  // apply a semi-splay step at the given node:
-  // . if root, norting needs to be done
-  // . if child of root, splay once
-  // . else zig-zig or sig-zag depending on path from grandparent
-  if (root() == tc) return;
-  warning("*** Splaying not yet implemented; "
-          "tree operations may be inefficient ***");
-}
-
-void BinaryTreeDictionary::insertChunkInTree(FreeChunk* fc) {
-  TreeList *curTL, *prevTL;
-  size_t size = fc->size();
-
-  assert(size >= MIN_TREE_CHUNK_SIZE, "too small to be a TreeList");
-  if (FLSVerifyDictionary) {
-    verifyTree();
-  }
-  // XXX: do i need to clear the FreeChunk fields, let me do it just in case
-  // Revisit this later
-
-  fc->clearNext();
-  fc->linkPrev(NULL);
-
-  // work down from the _root, looking for insertion point
-  for (prevTL = curTL = root(); curTL != NULL;) {
-    if (curTL->size() == size)  // exact match
-      break;
-    prevTL = curTL;
-    if (curTL->size() > size) { // follow left branch
-      curTL = curTL->left();
-    } else {                    // follow right branch
-      assert(curTL->size() < size, "size inconsistency");
-      curTL = curTL->right();
-    }
-  }
-  TreeChunk* tc = TreeChunk::as_TreeChunk(fc);
-  // This chunk is being returned to the binary tree.  Its embedded
-  // TreeList should be unused at this point.
-  tc->initialize();
-  if (curTL != NULL) {          // exact match
-    tc->set_list(curTL);
-    curTL->returnChunkAtTail(tc);
-  } else {                     // need a new node in tree
-    tc->clearNext();
-    tc->linkPrev(NULL);
-    TreeList* newTL = TreeList::as_TreeList(tc);
-    assert(((TreeChunk*)tc)->list() == newTL,
-      "List was not initialized correctly");
-    if (prevTL == NULL) {      // we are the only tree node
-      assert(root() == NULL, "control point invariant");
-      set_root(newTL);
-    } else {                   // insert under prevTL ...
-      if (prevTL->size() < size) {   // am right child
-        assert(prevTL->right() == NULL, "control point invariant");
-        prevTL->setRight(newTL);
-      } else {                       // am left child
-        assert(prevTL->size() > size && prevTL->left() == NULL, "cpt pt inv");
-        prevTL->setLeft(newTL);
-      }
-    }
-  }
-  assert(tc->list() != NULL, "Tree list should be set");
-
-  inc_totalSize(size);
-  // Method 'totalSizeInTree' walks through the every block in the
-  // tree, so it can cause significant performance loss if there are
-  // many blocks in the tree
-  assert(!FLSVerifyDictionary || totalSizeInTree(root()) == totalSize(), "_totalSize inconsistency");
-  set_totalFreeBlocks(totalFreeBlocks() + 1);
-  if (FLSVerifyDictionary) {
-    verifyTree();
-  }
-}
-
-size_t BinaryTreeDictionary::maxChunkSize() const {
-  verify_par_locked();
-  TreeList* tc = root();
-  if (tc == NULL) return 0;
-  for (; tc->right() != NULL; tc = tc->right());
-  return tc->size();
-}
-
-size_t BinaryTreeDictionary::totalListLength(TreeList* tl) const {
-  size_t res;
-  res = tl->count();
-#ifdef ASSERT
-  size_t cnt;
-  FreeChunk* tc = tl->head();
-  for (cnt = 0; tc != NULL; tc = tc->next(), cnt++);
-  assert(res == cnt, "The count is not being maintained correctly");
-#endif
-  return res;
-}
-
-size_t BinaryTreeDictionary::totalSizeInTree(TreeList* tl) const {
-  if (tl == NULL)
-    return 0;
-  return (tl->size() * totalListLength(tl)) +
-         totalSizeInTree(tl->left())    +
-         totalSizeInTree(tl->right());
-}
-
-double BinaryTreeDictionary::sum_of_squared_block_sizes(TreeList* const tl) const {
-  if (tl == NULL) {
-    return 0.0;
-  }
-  double size = (double)(tl->size());
-  double curr = size * size * totalListLength(tl);
-  curr += sum_of_squared_block_sizes(tl->left());
-  curr += sum_of_squared_block_sizes(tl->right());
-  return curr;
-}
-
-size_t BinaryTreeDictionary::totalFreeBlocksInTree(TreeList* tl) const {
-  if (tl == NULL)
-    return 0;
-  return totalListLength(tl) +
-         totalFreeBlocksInTree(tl->left()) +
-         totalFreeBlocksInTree(tl->right());
-}
-
-size_t BinaryTreeDictionary::numFreeBlocks() const {
-  assert(totalFreeBlocksInTree(root()) == totalFreeBlocks(),
-         "_totalFreeBlocks inconsistency");
-  return totalFreeBlocks();
-}
-
-size_t BinaryTreeDictionary::treeHeightHelper(TreeList* tl) const {
-  if (tl == NULL)
-    return 0;
-  return 1 + MAX2(treeHeightHelper(tl->left()),
-                  treeHeightHelper(tl->right()));
-}
-
-size_t BinaryTreeDictionary::treeHeight() const {
-  return treeHeightHelper(root());
-}
-
-size_t BinaryTreeDictionary::totalNodesHelper(TreeList* tl) const {
-  if (tl == NULL) {
-    return 0;
-  }
-  return 1 + totalNodesHelper(tl->left()) +
-    totalNodesHelper(tl->right());
-}
-
-size_t BinaryTreeDictionary::totalNodesInTree(TreeList* tl) const {
-  return totalNodesHelper(root());
-}
-
-void BinaryTreeDictionary::dictCensusUpdate(size_t size, bool split, bool birth){
-  TreeList* nd = findList(size);
-  if (nd) {
-    if (split) {
-      if (birth) {
-        nd->increment_splitBirths();
-        nd->increment_surplus();
-      }  else {
-        nd->increment_splitDeaths();
-        nd->decrement_surplus();
-      }
-    } else {
-      if (birth) {
-        nd->increment_coalBirths();
-        nd->increment_surplus();
-      } else {
-        nd->increment_coalDeaths();
-        nd->decrement_surplus();
-      }
-    }
-  }
-  // A list for this size may not be found (nd == 0) if
-  //   This is a death where the appropriate list is now
-  //     empty and has been removed from the list.
-  //   This is a birth associated with a LinAB.  The chunk
-  //     for the LinAB is not in the dictionary.
-}
-
-bool BinaryTreeDictionary::coalDictOverPopulated(size_t size) {
-  if (FLSAlwaysCoalesceLarge) return true;
-
-  TreeList* list_of_size = findList(size);
-  // None of requested size implies overpopulated.
-  return list_of_size == NULL || list_of_size->coalDesired() <= 0 ||
-         list_of_size->count() > list_of_size->coalDesired();
-}
-
-// Closures for walking the binary tree.
-//   do_list() walks the free list in a node applying the closure
-//     to each free chunk in the list
-//   do_tree() walks the nodes in the binary tree applying do_list()
-//     to each list at each node.
-
-class TreeCensusClosure : public StackObj {
- protected:
-  virtual void do_list(FreeList* fl) = 0;
- public:
-  virtual void do_tree(TreeList* tl) = 0;
-};
-
-class AscendTreeCensusClosure : public TreeCensusClosure {
- public:
-  void do_tree(TreeList* tl) {
-    if (tl != NULL) {
-      do_tree(tl->left());
-      do_list(tl);
-      do_tree(tl->right());
-    }
-  }
-};
-
-class DescendTreeCensusClosure : public TreeCensusClosure {
- public:
-  void do_tree(TreeList* tl) {
-    if (tl != NULL) {
-      do_tree(tl->right());
-      do_list(tl);
-      do_tree(tl->left());
-    }
-  }
-};
-
-// For each list in the tree, calculate the desired, desired
-// coalesce, count before sweep, and surplus before sweep.
-class BeginSweepClosure : public AscendTreeCensusClosure {
-  double _percentage;
-  float _inter_sweep_current;
-  float _inter_sweep_estimate;
-  float _intra_sweep_estimate;
-
- public:
-  BeginSweepClosure(double p, float inter_sweep_current,
-                              float inter_sweep_estimate,
-                              float intra_sweep_estimate) :
-   _percentage(p),
-   _inter_sweep_current(inter_sweep_current),
-   _inter_sweep_estimate(inter_sweep_estimate),
-   _intra_sweep_estimate(intra_sweep_estimate) { }
-
-  void do_list(FreeList* fl) {
-    double coalSurplusPercent = _percentage;
-    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
-    fl->set_coalDesired((ssize_t)((double)fl->desired() * coalSurplusPercent));
-    fl->set_beforeSweep(fl->count());
-    fl->set_bfrSurp(fl->surplus());
-  }
-};
-
-// Used to search the tree until a condition is met.
-// Similar to TreeCensusClosure but searches the
-// tree and returns promptly when found.
-
-class TreeSearchClosure : public StackObj {
- protected:
-  virtual bool do_list(FreeList* fl) = 0;
- public:
-  virtual bool do_tree(TreeList* tl) = 0;
-};
-
-#if 0 //  Don't need this yet but here for symmetry.
-class AscendTreeSearchClosure : public TreeSearchClosure {
- public:
-  bool do_tree(TreeList* tl) {
-    if (tl != NULL) {
-      if (do_tree(tl->left())) return true;
-      if (do_list(tl)) return true;
-      if (do_tree(tl->right())) return true;
-    }
-    return false;
-  }
-};
-#endif
-
-class DescendTreeSearchClosure : public TreeSearchClosure {
- public:
-  bool do_tree(TreeList* tl) {
-    if (tl != NULL) {
-      if (do_tree(tl->right())) return true;
-      if (do_list(tl)) return true;
-      if (do_tree(tl->left())) return true;
-    }
-    return false;
-  }
-};
-
-// Searches the tree for a chunk that ends at the
-// specified address.
-class EndTreeSearchClosure : public DescendTreeSearchClosure {
-  HeapWord* _target;
-  FreeChunk* _found;
-
- public:
-  EndTreeSearchClosure(HeapWord* target) : _target(target), _found(NULL) {}
-  bool do_list(FreeList* fl) {
-    FreeChunk* item = fl->head();
-    while (item != NULL) {
-      if (item->end() == _target) {
-        _found = item;
-        return true;
-      }
-      item = item->next();
-    }
-    return false;
-  }
-  FreeChunk* found() { return _found; }
-};
-
-FreeChunk* BinaryTreeDictionary::find_chunk_ends_at(HeapWord* target) const {
-  EndTreeSearchClosure etsc(target);
-  bool found_target = etsc.do_tree(root());
-  assert(found_target || etsc.found() == NULL, "Consistency check");
-  assert(!found_target || etsc.found() != NULL, "Consistency check");
-  return etsc.found();
-}
-
-void BinaryTreeDictionary::beginSweepDictCensus(double coalSurplusPercent,
-  float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
-  BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current,
-                                            inter_sweep_estimate,
-                                            intra_sweep_estimate);
-  bsc.do_tree(root());
-}
-
-// Closures and methods for calculating total bytes returned to the
-// free lists in the tree.
-NOT_PRODUCT(
-  class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure {
-   public:
-    void do_list(FreeList* fl) {
-      fl->set_returnedBytes(0);
-    }
-  };
-
-  void BinaryTreeDictionary::initializeDictReturnedBytes() {
-    InitializeDictReturnedBytesClosure idrb;
-    idrb.do_tree(root());
-  }
-
-  class ReturnedBytesClosure : public AscendTreeCensusClosure {
-    size_t _dictReturnedBytes;
-   public:
-    ReturnedBytesClosure() { _dictReturnedBytes = 0; }
-    void do_list(FreeList* fl) {
-      _dictReturnedBytes += fl->returnedBytes();
-    }
-    size_t dictReturnedBytes() { return _dictReturnedBytes; }
-  };
-
-  size_t BinaryTreeDictionary::sumDictReturnedBytes() {
-    ReturnedBytesClosure rbc;
-    rbc.do_tree(root());
-
-    return rbc.dictReturnedBytes();
-  }
-
-  // Count the number of entries in the tree.
-  class treeCountClosure : public DescendTreeCensusClosure {
-   public:
-    uint count;
-    treeCountClosure(uint c) { count = c; }
-    void do_list(FreeList* fl) {
-      count++;
-    }
-  };
-
-  size_t BinaryTreeDictionary::totalCount() {
-    treeCountClosure ctc(0);
-    ctc.do_tree(root());
-    return ctc.count;
-  }
-)
-
-// Calculate surpluses for the lists in the tree.
-class setTreeSurplusClosure : public AscendTreeCensusClosure {
-  double percentage;
- public:
-  setTreeSurplusClosure(double v) { percentage = v; }
-  void do_list(FreeList* fl) {
-    double splitSurplusPercent = percentage;
-    fl->set_surplus(fl->count() -
-                   (ssize_t)((double)fl->desired() * splitSurplusPercent));
-  }
-};
-
-void BinaryTreeDictionary::setTreeSurplus(double splitSurplusPercent) {
-  setTreeSurplusClosure sts(splitSurplusPercent);
-  sts.do_tree(root());
-}
-
-// Set hints for the lists in the tree.
-class setTreeHintsClosure : public DescendTreeCensusClosure {
-  size_t hint;
- public:
-  setTreeHintsClosure(size_t v) { hint = v; }
-  void do_list(FreeList* fl) {
-    fl->set_hint(hint);
-    assert(fl->hint() == 0 || fl->hint() > fl->size(),
-      "Current hint is inconsistent");
-    if (fl->surplus() > 0) {
-      hint = fl->size();
-    }
-  }
-};
-
-void BinaryTreeDictionary::setTreeHints(void) {
-  setTreeHintsClosure sth(0);
-  sth.do_tree(root());
-}
-
-// Save count before previous sweep and splits and coalesces.
-class clearTreeCensusClosure : public AscendTreeCensusClosure {
-  void do_list(FreeList* fl) {
-    fl->set_prevSweep(fl->count());
-    fl->set_coalBirths(0);
-    fl->set_coalDeaths(0);
-    fl->set_splitBirths(0);
-    fl->set_splitDeaths(0);
-  }
-};
-
-void BinaryTreeDictionary::clearTreeCensus(void) {
-  clearTreeCensusClosure ctc;
-  ctc.do_tree(root());
-}
-
-// Do reporting and post sweep clean up.
-void BinaryTreeDictionary::endSweepDictCensus(double splitSurplusPercent) {
-  // Does walking the tree 3 times hurt?
-  setTreeSurplus(splitSurplusPercent);
-  setTreeHints();
-  if (PrintGC && Verbose) {
-    reportStatistics();
-  }
-  clearTreeCensus();
-}
-
-// Print summary statistics
-void BinaryTreeDictionary::reportStatistics() const {
-  verify_par_locked();
-  gclog_or_tty->print("Statistics for BinaryTreeDictionary:\n"
-         "------------------------------------\n");
-  size_t totalSize = totalChunkSize(debug_only(NULL));
-  size_t    freeBlocks = numFreeBlocks();
-  gclog_or_tty->print("Total Free Space: %d\n", totalSize);
-  gclog_or_tty->print("Max   Chunk Size: %d\n", maxChunkSize());
-  gclog_or_tty->print("Number of Blocks: %d\n", freeBlocks);
-  if (freeBlocks > 0) {
-    gclog_or_tty->print("Av.  Block  Size: %d\n", totalSize/freeBlocks);
-  }
-  gclog_or_tty->print("Tree      Height: %d\n", treeHeight());
-}
-
-// Print census information - counts, births, deaths, etc.
-// for each list in the tree.  Also print some summary
-// information.
-class PrintTreeCensusClosure : public AscendTreeCensusClosure {
-  int _print_line;
-  size_t _totalFree;
-  FreeList _total;
-
- public:
-  PrintTreeCensusClosure() {
-    _print_line = 0;
-    _totalFree = 0;
-  }
-  FreeList* total() { return &_total; }
-  size_t totalFree() { return _totalFree; }
-  void do_list(FreeList* fl) {
-    if (++_print_line >= 40) {
-      FreeList::print_labels_on(gclog_or_tty, "size");
-      _print_line = 0;
-    }
-    fl->print_on(gclog_or_tty);
-    _totalFree +=            fl->count()            * fl->size()        ;
-    total()->set_count(      total()->count()       + fl->count()      );
-    total()->set_bfrSurp(    total()->bfrSurp()     + fl->bfrSurp()    );
-    total()->set_surplus(    total()->splitDeaths() + fl->surplus()    );
-    total()->set_desired(    total()->desired()     + fl->desired()    );
-    total()->set_prevSweep(  total()->prevSweep()   + fl->prevSweep()  );
-    total()->set_beforeSweep(total()->beforeSweep() + fl->beforeSweep());
-    total()->set_coalBirths( total()->coalBirths()  + fl->coalBirths() );
-    total()->set_coalDeaths( total()->coalDeaths()  + fl->coalDeaths() );
-    total()->set_splitBirths(total()->splitBirths() + fl->splitBirths());
-    total()->set_splitDeaths(total()->splitDeaths() + fl->splitDeaths());
-  }
-};
-
-void BinaryTreeDictionary::printDictCensus(void) const {
-
-  gclog_or_tty->print("\nBinaryTree\n");
-  FreeList::print_labels_on(gclog_or_tty, "size");
-  PrintTreeCensusClosure ptc;
-  ptc.do_tree(root());
-
-  FreeList* total = ptc.total();
-  FreeList::print_labels_on(gclog_or_tty, " ");
-  total->print_on(gclog_or_tty, "TOTAL\t");
-  gclog_or_tty->print(
-              "totalFree(words): " SIZE_FORMAT_W(16)
-              " growth: %8.5f  deficit: %8.5f\n",
-              ptc.totalFree(),
-              (double)(total->splitBirths() + total->coalBirths()
-                     - total->splitDeaths() - total->coalDeaths())
-              /(total->prevSweep() != 0 ? (double)total->prevSweep() : 1.0),
-             (double)(total->desired() - total->count())
-             /(total->desired() != 0 ? (double)total->desired() : 1.0));
-}
-
-class PrintFreeListsClosure : public AscendTreeCensusClosure {
-  outputStream* _st;
-  int _print_line;
-
- public:
-  PrintFreeListsClosure(outputStream* st) {
-    _st = st;
-    _print_line = 0;
-  }
-  void do_list(FreeList* fl) {
-    if (++_print_line >= 40) {
-      FreeList::print_labels_on(_st, "size");
-      _print_line = 0;
-    }
-    fl->print_on(gclog_or_tty);
-    size_t sz = fl->size();
-    for (FreeChunk* fc = fl->head(); fc != NULL;
-         fc = fc->next()) {
-      _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
-                    fc, (HeapWord*)fc + sz,
-                    fc->cantCoalesce() ? "\t CC" : "");
-    }
-  }
-};
-
-void BinaryTreeDictionary::print_free_lists(outputStream* st) const {
-
-  FreeList::print_labels_on(st, "size");
-  PrintFreeListsClosure pflc(st);
-  pflc.do_tree(root());
-}
-
-// Verify the following tree invariants:
-// . _root has no parent
-// . parent and child point to each other
-// . each node's key correctly related to that of its child(ren)
-void BinaryTreeDictionary::verifyTree() const {
-  guarantee(root() == NULL || totalFreeBlocks() == 0 ||
-    totalSize() != 0, "_totalSize should't be 0?");
-  guarantee(root() == NULL || root()->parent() == NULL, "_root shouldn't have parent");
-  verifyTreeHelper(root());
-}
-
-size_t BinaryTreeDictionary::verifyPrevFreePtrs(TreeList* tl) {
-  size_t ct = 0;
-  for (FreeChunk* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
-    ct++;
-    assert(curFC->prev() == NULL || curFC->prev()->isFree(),
-      "Chunk should be free");
-  }
-  return ct;
-}
-
-// Note: this helper is recursive rather than iterative, so use with
-// caution on very deep trees; and watch out for stack overflow errors;
-// In general, to be used only for debugging.
-void BinaryTreeDictionary::verifyTreeHelper(TreeList* tl) const {
-  if (tl == NULL)
-    return;
-  guarantee(tl->size() != 0, "A list must has a size");
-  guarantee(tl->left()  == NULL || tl->left()->parent()  == tl,
-         "parent<-/->left");
-  guarantee(tl->right() == NULL || tl->right()->parent() == tl,
-         "parent<-/->right");;
-  guarantee(tl->left() == NULL  || tl->left()->size()    <  tl->size(),
-         "parent !> left");
-  guarantee(tl->right() == NULL || tl->right()->size()   >  tl->size(),
-         "parent !< left");
-  guarantee(tl->head() == NULL || tl->head()->isFree(), "!Free");
-  guarantee(tl->head() == NULL || tl->head_as_TreeChunk()->list() == tl,
-    "list inconsistency");
-  guarantee(tl->count() > 0 || (tl->head() == NULL && tl->tail() == NULL),
-    "list count is inconsistent");
-  guarantee(tl->count() > 1 || tl->head() == tl->tail(),
-    "list is incorrectly constructed");
-  size_t count = verifyPrevFreePtrs(tl);
-  guarantee(count == (size_t)tl->count(), "Node count is incorrect");
-  if (tl->head() != NULL) {
-    tl->head_as_TreeChunk()->verifyTreeChunkList();
-  }
-  verifyTreeHelper(tl->left());
-  verifyTreeHelper(tl->right());
-}
-
-void BinaryTreeDictionary::verify() const {
-  verifyTree();
-  guarantee(totalSize() == totalSizeInTree(root()), "Total Size inconsistency");
-}
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,296 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_BINARYTREEDICTIONARY_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_BINARYTREEDICTIONARY_HPP
-
-#include "gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp"
-#include "gc_implementation/concurrentMarkSweep/freeList.hpp"
-
-/*
- * A binary tree based search structure for free blocks.
- * This is currently used in the Concurrent Mark&Sweep implementation.
- */
-
-// A TreeList is a FreeList which can be used to maintain a
-// binary tree of free lists.
-
-class TreeChunk;
-class BinaryTreeDictionary;
-class AscendTreeCensusClosure;
-class DescendTreeCensusClosure;
-class DescendTreeSearchClosure;
-
-class TreeList: public FreeList {
-  friend class TreeChunk;
-  friend class BinaryTreeDictionary;
-  friend class AscendTreeCensusClosure;
-  friend class DescendTreeCensusClosure;
-  friend class DescendTreeSearchClosure;
-
- protected:
-  TreeList* parent() const { return _parent; }
-  TreeList* left()   const { return _left;   }
-  TreeList* right()  const { return _right;  }
-
-  // Accessors for links in tree.
-
-  void setLeft(TreeList* tl) {
-    _left   = tl;
-    if (tl != NULL)
-      tl->setParent(this);
-  }
-  void setRight(TreeList* tl) {
-    _right  = tl;
-    if (tl != NULL)
-      tl->setParent(this);
-  }
-  void setParent(TreeList* tl)  { _parent = tl;   }
-
-  void clearLeft()               { _left = NULL;   }
-  void clearRight()              { _right = NULL;  }
-  void clearParent()             { _parent = NULL; }
-  void initialize()              { clearLeft(); clearRight(), clearParent(); }
-
-  // For constructing a TreeList from a Tree chunk or
-  // address and size.
-  static TreeList* as_TreeList(TreeChunk* tc);
-  static TreeList* as_TreeList(HeapWord* addr, size_t size);
-
-  // Returns the head of the free list as a pointer to a TreeChunk.
-  TreeChunk* head_as_TreeChunk();
-
-  // Returns the first available chunk in the free list as a pointer
-  // to a TreeChunk.
-  TreeChunk* first_available();
-
-  // Returns the block with the largest heap address amongst
-  // those in the list for this size; potentially slow and expensive,
-  // use with caution!
-  TreeChunk* largest_address();
-
-  // removeChunkReplaceIfNeeded() removes the given "tc" from the TreeList.
-  // If "tc" is the first chunk in the list, it is also the
-  // TreeList that is the node in the tree.  removeChunkReplaceIfNeeded()
-  // returns the possibly replaced TreeList* for the node in
-  // the tree.  It also updates the parent of the original
-  // node to point to the new node.
-  TreeList* removeChunkReplaceIfNeeded(TreeChunk* tc);
-  // See FreeList.
-  void returnChunkAtHead(TreeChunk* tc);
-  void returnChunkAtTail(TreeChunk* tc);
-};
-
-// A TreeChunk is a subclass of a FreeChunk that additionally
-// maintains a pointer to the free list on which it is currently
-// linked.
-// A TreeChunk is also used as a node in the binary tree.  This
-// allows the binary tree to be maintained without any additional
-// storage (the free chunks are used).  In a binary tree the first
-// chunk in the free list is also the tree node.  Note that the
-// TreeChunk has an embedded TreeList for this purpose.  Because
-// the first chunk in the list is distinguished in this fashion
-// (also is the node in the tree), it is the last chunk to be found
-// on the free list for a node in the tree and is only removed if
-// it is the last chunk on the free list.
-
-class TreeChunk : public FreeChunk {
-  friend class TreeList;
-  TreeList* _list;
-  TreeList _embedded_list;  // if non-null, this chunk is on _list
- protected:
-  TreeList* embedded_list() const { return (TreeList*) &_embedded_list; }
-  void set_embedded_list(TreeList* v) { _embedded_list = *v; }
- public:
-  TreeList* list() { return _list; }
-  void set_list(TreeList* v) { _list = v; }
-  static TreeChunk* as_TreeChunk(FreeChunk* fc);
-  // Initialize fields in a TreeChunk that should be
-  // initialized when the TreeChunk is being added to
-  // a free list in the tree.
-  void initialize() { embedded_list()->initialize(); }
-
-  // debugging
-  void verifyTreeChunkList() const;
-};
-
-const size_t MIN_TREE_CHUNK_SIZE  = sizeof(TreeChunk)/HeapWordSize;
-
-class BinaryTreeDictionary: public FreeBlockDictionary {
-  friend class VMStructs;
-  bool       _splay;
-  size_t     _totalSize;
-  size_t     _totalFreeBlocks;
-  TreeList* _root;
-
-  // private accessors
-  bool splay() const { return _splay; }
-  void set_splay(bool v) { _splay = v; }
-  size_t totalSize() const { return _totalSize; }
-  void set_totalSize(size_t v) { _totalSize = v; }
-  virtual void inc_totalSize(size_t v);
-  virtual void dec_totalSize(size_t v);
-  size_t totalFreeBlocks() const { return _totalFreeBlocks; }
-  void set_totalFreeBlocks(size_t v) { _totalFreeBlocks = v; }
-  TreeList* root() const { return _root; }
-  void set_root(TreeList* v) { _root = v; }
-
-  // Remove a chunk of size "size" or larger from the tree and
-  // return it.  If the chunk
-  // is the last chunk of that size, remove the node for that size
-  // from the tree.
-  TreeChunk* getChunkFromTree(size_t size, Dither dither, bool splay);
-  // Return a list of the specified size or NULL from the tree.
-  // The list is not removed from the tree.
-  TreeList* findList (size_t size) const;
-  // Remove this chunk from the tree.  If the removal results
-  // in an empty list in the tree, remove the empty list.
-  TreeChunk* removeChunkFromTree(TreeChunk* tc);
-  // Remove the node in the trees starting at tl that has the
-  // minimum value and return it.  Repair the tree as needed.
-  TreeList* removeTreeMinimum(TreeList* tl);
-  void       semiSplayStep(TreeList* tl);
-  // Add this free chunk to the tree.
-  void       insertChunkInTree(FreeChunk* freeChunk);
- public:
-  void       verifyTree() const;
-  // verify that the given chunk is in the tree.
-  bool       verifyChunkInFreeLists(FreeChunk* tc) const;
- private:
-  void          verifyTreeHelper(TreeList* tl) const;
-  static size_t verifyPrevFreePtrs(TreeList* tl);
-
-  // Returns the total number of chunks in the list.
-  size_t     totalListLength(TreeList* tl) const;
-  // Returns the total number of words in the chunks in the tree
-  // starting at "tl".
-  size_t     totalSizeInTree(TreeList* tl) const;
-  // Returns the sum of the square of the size of each block
-  // in the tree starting at "tl".
-  double     sum_of_squared_block_sizes(TreeList* const tl) const;
-  // Returns the total number of free blocks in the tree starting
-  // at "tl".
-  size_t     totalFreeBlocksInTree(TreeList* tl) const;
-  size_t     numFreeBlocks() const;
-  size_t     treeHeight() const;
-  size_t     treeHeightHelper(TreeList* tl) const;
-  size_t     totalNodesInTree(TreeList* tl) const;
-  size_t     totalNodesHelper(TreeList* tl) const;
-
- public:
-  // Constructor
-  BinaryTreeDictionary(MemRegion mr, bool splay = false);
-
-  // Reset the dictionary to the initial conditions with
-  // a single free chunk.
-  void       reset(MemRegion mr);
-  void       reset(HeapWord* addr, size_t size);
-  // Reset the dictionary to be empty.
-  void       reset();
-
-  // Return a chunk of size "size" or greater from
-  // the tree.
-  // want a better dynamic splay strategy for the future.
-  FreeChunk* getChunk(size_t size, Dither dither) {
-    verify_par_locked();
-    FreeChunk* res = getChunkFromTree(size, dither, splay());
-    assert(res == NULL || res->isFree(),
-           "Should be returning a free chunk");
-    return res;
-  }
-
-  void returnChunk(FreeChunk* chunk) {
-    verify_par_locked();
-    insertChunkInTree(chunk);
-  }
-
-  void removeChunk(FreeChunk* chunk) {
-    verify_par_locked();
-    removeChunkFromTree((TreeChunk*)chunk);
-    assert(chunk->isFree(), "Should still be a free chunk");
-  }
-
-  size_t     maxChunkSize() const;
-  size_t     totalChunkSize(debug_only(const Mutex* lock)) const {
-    debug_only(
-      if (lock != NULL && lock->owned_by_self()) {
-        assert(totalSizeInTree(root()) == totalSize(),
-               "_totalSize inconsistency");
-      }
-    )
-    return totalSize();
-  }
-
-  size_t     minSize() const {
-    return MIN_TREE_CHUNK_SIZE;
-  }
-
-  double     sum_of_squared_block_sizes() const {
-    return sum_of_squared_block_sizes(root());
-  }
-
-  FreeChunk* find_chunk_ends_at(HeapWord* target) const;
-
-  // Find the list with size "size" in the binary tree and update
-  // the statistics in the list according to "split" (chunk was
-  // split or coalesce) and "birth" (chunk was added or removed).
-  void       dictCensusUpdate(size_t size, bool split, bool birth);
-  // Return true if the dictionary is overpopulated (more chunks of
-  // this size than desired) for size "size".
-  bool       coalDictOverPopulated(size_t size);
-  // Methods called at the beginning of a sweep to prepare the
-  // statistics for the sweep.
-  void       beginSweepDictCensus(double coalSurplusPercent,
-                                  float inter_sweep_current,
-                                  float inter_sweep_estimate,
-                                  float intra_sweep_estimate);
-  // Methods called after the end of a sweep to modify the
-  // statistics for the sweep.
-  void       endSweepDictCensus(double splitSurplusPercent);
-  // Return the largest free chunk in the tree.
-  FreeChunk* findLargestDict() const;
-  // Accessors for statistics
-  void       setTreeSurplus(double splitSurplusPercent);
-  void       setTreeHints(void);
-  // Reset statistics for all the lists in the tree.
-  void       clearTreeCensus(void);
-  // Print the statistcis for all the lists in the tree.  Also may
-  // print out summaries.
-  void       printDictCensus(void) const;
-  void       print_free_lists(outputStream* st) const;
-
-  // For debugging.  Returns the sum of the _returnedBytes for
-  // all lists in the tree.
-  size_t     sumDictReturnedBytes()     PRODUCT_RETURN0;
-  // Sets the _returnedBytes for all the lists in the tree to zero.
-  void       initializeDictReturnedBytes()      PRODUCT_RETURN;
-  // For debugging.  Return the total number of chunks in the dictionary.
-  size_t     totalCount()       PRODUCT_RETURN0;
-
-  void       reportStatistics() const;
-
-  void       verify() const;
-};
-
-#endif // SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_BINARYTREEDICTIONARY_HPP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -38,7 +38,7 @@
 
 CMSPermGen::CMSPermGen(ReservedSpace rs, size_t initial_byte_size,
              CardTableRS* ct,
-             FreeBlockDictionary::DictionaryChoice dictionaryChoice) {
+             FreeBlockDictionary<FreeChunk>::DictionaryChoice dictionaryChoice) {
   CMSPermGenGen* g =
     new CMSPermGenGen(rs, initial_byte_size, -1, ct);
   if (g == NULL) {
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -45,7 +45,7 @@
 
  public:
   CMSPermGen(ReservedSpace rs, size_t initial_byte_size,
-             CardTableRS* ct, FreeBlockDictionary::DictionaryChoice);
+             CardTableRS* ct, FreeBlockDictionary<FreeChunk>::DictionaryChoice);
 
   HeapWord* mem_allocate(size_t size);
 
@@ -65,7 +65,7 @@
     // regarding not using adaptive free lists for a perm gen.
     ConcurrentMarkSweepGeneration(rs, initial_byte_size, // MinPermHeapExapnsion
       level, ct, false /* use adaptive freelists */,
-      (FreeBlockDictionary::DictionaryChoice)CMSDictionaryChoice)
+      (FreeBlockDictionary<FreeChunk>::DictionaryChoice)CMSDictionaryChoice)
   {}
 
   void initialize_performance_counters();
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,8 +58,11 @@
 void CompactibleFreeListSpace::set_cms_values() {
   // Set CMS global values
   assert(MinChunkSize == 0, "already set");
-  #define numQuanta(x,y) ((x+y-1)/y)
-  MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment;
+
+  // MinChunkSize should be a multiple of MinObjAlignment and be large enough
+  // for chunks to contain a FreeChunk.
+  size_t min_chunk_size_in_bytes = align_size_up(sizeof(FreeChunk), MinObjAlignmentInBytes);
+  MinChunkSize = min_chunk_size_in_bytes / BytesPerWord;
 
   assert(IndexSetStart == 0 && IndexSetStride == 0, "already set");
   IndexSetStart  = MinChunkSize;
@@ -69,7 +72,7 @@
 // Constructor
 CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs,
   MemRegion mr, bool use_adaptive_freelists,
-  FreeBlockDictionary::DictionaryChoice dictionaryChoice) :
+  FreeBlockDictionary<FreeChunk>::DictionaryChoice dictionaryChoice) :
   _dictionaryChoice(dictionaryChoice),
   _adaptive_freelists(use_adaptive_freelists),
   _bt(bs, mr),
@@ -87,6 +90,8 @@
                     CMSConcMarkMultiple),
   _collector(NULL)
 {
+  assert(sizeof(FreeChunk) / BytesPerWord <= MinChunkSize,
+    "FreeChunk is larger than expected");
   _bt.set_space(this);
   initialize(mr, SpaceDecorator::Clear, SpaceDecorator::Mangle);
   // We have all of "mr", all of which we place in the dictionary
@@ -96,13 +101,13 @@
   // implementation, namely, the simple binary tree (splaying
   // temporarily disabled).
   switch (dictionaryChoice) {
-    case FreeBlockDictionary::dictionarySplayTree:
-    case FreeBlockDictionary::dictionarySkipList:
+    case FreeBlockDictionary<FreeChunk>::dictionarySplayTree:
+    case FreeBlockDictionary<FreeChunk>::dictionarySkipList:
     default:
       warning("dictionaryChoice: selected option not understood; using"
               " default BinaryTreeDictionary implementation instead.");
-    case FreeBlockDictionary::dictionaryBinaryTree:
-      _dictionary = new BinaryTreeDictionary(mr);
+    case FreeBlockDictionary<FreeChunk>::dictionaryBinaryTree:
+      _dictionary = new BinaryTreeDictionary<FreeChunk>(mr, use_adaptive_freelists);
       break;
   }
   assert(_dictionary != NULL, "CMS dictionary initialization");
@@ -117,7 +122,7 @@
   // moved to its new location before the klass is moved.
   // Set the _refillSize for the linear allocation blocks
   if (!use_adaptive_freelists) {
-    FreeChunk* fc = _dictionary->getChunk(mr.word_size());
+    FreeChunk* fc = _dictionary->get_chunk(mr.word_size());
     // The small linAB initially has all the space and will allocate
     // a chunk of any size.
     HeapWord* addr = (HeapWord*) fc;
@@ -273,12 +278,12 @@
     assert(mr.word_size() >= MinChunkSize, "Chunk size is too small");
     _bt.single_block(mr.start(), mr.word_size());
     FreeChunk* fc = (FreeChunk*) mr.start();
-    fc->setSize(mr.word_size());
+    fc->set_size(mr.word_size());
     if (mr.word_size() >= IndexSetSize ) {
       returnChunkToDictionary(fc);
     } else {
       _bt.verify_not_unallocated((HeapWord*)fc, fc->size());
-      _indexedFreeList[mr.word_size()].returnChunkAtHead(fc);
+      _indexedFreeList[mr.word_size()].return_chunk_at_head(fc);
     }
   }
   _promoInfo.reset();
@@ -296,7 +301,7 @@
   } else {
     // Place as much of mr in the linAB as we can get,
     // provided it was big enough to go into the dictionary.
-    FreeChunk* fc = dictionary()->findLargestDict();
+    FreeChunk* fc = dictionary()->find_largest_dict();
     if (fc != NULL) {
       assert(fc->size() == mr.word_size(),
              "Why was the chunk broken up?");
@@ -323,14 +328,14 @@
 #ifndef PRODUCT
 void CompactibleFreeListSpace::initializeIndexedFreeListArrayReturnedBytes() {
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    _indexedFreeList[i].allocation_stats()->set_returnedBytes(0);
+    _indexedFreeList[i].allocation_stats()->set_returned_bytes(0);
   }
 }
 
 size_t CompactibleFreeListSpace::sumIndexedFreeListArrayReturnedBytes() {
   size_t sum = 0;
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    sum += _indexedFreeList[i].allocation_stats()->returnedBytes();
+    sum += _indexedFreeList[i].allocation_stats()->returned_bytes();
   }
   return sum;
 }
@@ -354,7 +359,7 @@
 
 size_t CompactibleFreeListSpace::totalCount() {
   size_t num = totalCountInIndexedFreeLists();
-  num +=  dictionary()->totalCount();
+  num +=  dictionary()->total_count();
   if (_smallLinearAllocBlock._word_size != 0) {
     num++;
   }
@@ -364,7 +369,7 @@
 
 bool CompactibleFreeListSpace::is_free_block(const HeapWord* p) const {
   FreeChunk* fc = (FreeChunk*) p;
-  return fc->isFree();
+  return fc->is_free();
 }
 
 size_t CompactibleFreeListSpace::used() const {
@@ -391,7 +396,7 @@
   // that supports jvmstat, and you are apt to see the values
   // flicker in such cases.
   assert(_dictionary != NULL, "No _dictionary?");
-  return (_dictionary->totalChunkSize(DEBUG_ONLY(freelistLock())) +
+  return (_dictionary->total_chunk_size(DEBUG_ONLY(freelistLock())) +
           totalSizeInIndexedFreeLists() +
           _smallLinearAllocBlock._word_size) * HeapWordSize;
 }
@@ -399,7 +404,7 @@
 size_t CompactibleFreeListSpace::max_alloc_in_words() const {
   assert(_dictionary != NULL, "No _dictionary?");
   assert_locked();
-  size_t res = _dictionary->maxChunkSize();
+  size_t res = _dictionary->max_chunk_size();
   res = MAX2(res, MIN2(_smallLinearAllocBlock._word_size,
                        (size_t) SmallForLinearAlloc - 1));
   // XXX the following could potentially be pretty slow;
@@ -448,7 +453,7 @@
   reportIndexedFreeListStatistics();
   gclog_or_tty->print_cr("Layout of Indexed Freelists");
   gclog_or_tty->print_cr("---------------------------");
-  FreeList::print_labels_on(st, "size");
+  FreeList<FreeChunk>::print_labels_on(st, "size");
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     _indexedFreeList[i].print_on(gclog_or_tty);
     for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
@@ -467,7 +472,7 @@
 
 void CompactibleFreeListSpace::print_dictionary_free_lists(outputStream* st)
 const {
-  _dictionary->reportStatistics();
+  _dictionary->report_statistics();
   st->print_cr("Layout of Freelists in Tree");
   st->print_cr("---------------------------");
   _dictionary->print_free_lists(st);
@@ -545,12 +550,12 @@
 void CompactibleFreeListSpace::reportFreeListStatistics() const {
   assert_lock_strong(&_freelistLock);
   assert(PrintFLSStatistics != 0, "Reporting error");
-  _dictionary->reportStatistics();
+  _dictionary->report_statistics();
   if (PrintFLSStatistics > 1) {
     reportIndexedFreeListStatistics();
-    size_t totalSize = totalSizeInIndexedFreeLists() +
-                       _dictionary->totalChunkSize(DEBUG_ONLY(freelistLock()));
-    gclog_or_tty->print(" free=%ld frag=%1.4f\n", totalSize, flsFrag());
+    size_t total_size = totalSizeInIndexedFreeLists() +
+                       _dictionary->total_chunk_size(DEBUG_ONLY(freelistLock()));
+    gclog_or_tty->print(" free=%ld frag=%1.4f\n", total_size, flsFrag());
   }
 }
 
@@ -558,13 +563,13 @@
   assert_lock_strong(&_freelistLock);
   gclog_or_tty->print("Statistics for IndexedFreeLists:\n"
                       "--------------------------------\n");
-  size_t totalSize = totalSizeInIndexedFreeLists();
-  size_t   freeBlocks = numFreeBlocksInIndexedFreeLists();
-  gclog_or_tty->print("Total Free Space: %d\n", totalSize);
+  size_t total_size = totalSizeInIndexedFreeLists();
+  size_t   free_blocks = numFreeBlocksInIndexedFreeLists();
+  gclog_or_tty->print("Total Free Space: %d\n", total_size);
   gclog_or_tty->print("Max   Chunk Size: %d\n", maxChunkSizeInIndexedFreeLists());
-  gclog_or_tty->print("Number of Blocks: %d\n", freeBlocks);
-  if (freeBlocks != 0) {
-    gclog_or_tty->print("Av.  Block  Size: %d\n", totalSize/freeBlocks);
+  gclog_or_tty->print("Number of Blocks: %d\n", free_blocks);
+  if (free_blocks != 0) {
+    gclog_or_tty->print("Av.  Block  Size: %d\n", total_size/free_blocks);
   }
 }
 
@@ -911,7 +916,7 @@
   for (addr = bottom(), last  = end();
        addr < last; addr += size) {
     FreeChunk* fc = (FreeChunk*)addr;
-    if (fc->isFree()) {
+    if (fc->is_free()) {
       // Since we hold the free list lock, which protects direct
       // allocation in this generation by mutators, a free object
       // will remain free throughout this iteration code.
@@ -953,7 +958,7 @@
   for (addr = block_start_careful(mr.start()), end  = mr.end();
        addr < end; addr += size) {
     FreeChunk* fc = (FreeChunk*)addr;
-    if (fc->isFree()) {
+    if (fc->is_free()) {
       // Since we hold the free list lock, which protects direct
       // allocation in this generation by mutators, a free object
       // will remain free throughout this iteration code.
@@ -1069,7 +1074,7 @@
   NOT_PRODUCT(verify_objects_initialized());
   assert(MemRegion(bottom(), end()).contains(p), "p not in space");
   FreeChunk* fc = (FreeChunk*)p;
-  if (fc->isFree()) {
+  if (fc->is_free()) {
     return fc->size();
   } else {
     // Ignore mark word because this may be a recently promoted
@@ -1160,7 +1165,7 @@
   FreeChunk* fc = (FreeChunk*)p;
   assert(is_in_reserved(p), "Should be in space");
   assert(_bt.block_start(p) == p, "Should be a block boundary");
-  if (!fc->isFree()) {
+  if (!fc->is_free()) {
     // Ignore mark word because it may have been used to
     // chain together promoted objects (the last one
     // would have a null value).
@@ -1222,7 +1227,7 @@
 
     FreeChunk* fc = (FreeChunk*)res;
     fc->markNotFree();
-    assert(!fc->isFree(), "shouldn't be marked free");
+    assert(!fc->is_free(), "shouldn't be marked free");
     assert(oop(fc)->klass_or_null() == NULL, "should look uninitialized");
     // Verify that the block offset table shows this to
     // be a single block, but not one which is unallocated.
@@ -1331,10 +1336,10 @@
   size_t currSize = numWords + MinChunkSize;
   assert(currSize % MinObjAlignment == 0, "currSize should be aligned");
   for (i = currSize; i < IndexSetSize; i += IndexSetStride) {
-    FreeList* fl = &_indexedFreeList[i];
+    FreeList<FreeChunk>* fl = &_indexedFreeList[i];
     if (fl->head()) {
       ret = getFromListGreater(fl, numWords);
-      assert(ret == NULL || ret->isFree(), "Should be returning a free chunk");
+      assert(ret == NULL || ret->is_free(), "Should be returning a free chunk");
       return ret;
     }
   }
@@ -1345,7 +1350,7 @@
   /* Try to get a chunk that satisfies request, while avoiding
      fragmentation that can't be handled. */
   {
-    ret =  dictionary()->getChunk(currSize);
+    ret =  dictionary()->get_chunk(currSize);
     if (ret != NULL) {
       assert(ret->size() - numWords >= MinChunkSize,
              "Chunk is too small");
@@ -1353,10 +1358,10 @@
       /* Carve returned chunk. */
       (void) splitChunkAndReturnRemainder(ret, numWords);
       /* Label this as no longer a free chunk. */
-      assert(ret->isFree(), "This chunk should be free");
-      ret->linkPrev(NULL);
+      assert(ret->is_free(), "This chunk should be free");
+      ret->link_prev(NULL);
     }
-    assert(ret == NULL || ret->isFree(), "Should be returning a free chunk");
+    assert(ret == NULL || ret->is_free(), "Should be returning a free chunk");
     return ret;
   }
   ShouldNotReachHere();
@@ -1364,7 +1369,7 @@
 
 bool CompactibleFreeListSpace::verifyChunkInIndexedFreeLists(FreeChunk* fc) const {
   assert(fc->size() < IndexSetSize, "Size of chunk is too large");
-  return _indexedFreeList[fc->size()].verifyChunkInFreeLists(fc);
+  return _indexedFreeList[fc->size()].verify_chunk_in_free_list(fc);
 }
 
 bool CompactibleFreeListSpace::verify_chunk_is_linear_alloc_block(FreeChunk* fc) const {
@@ -1378,13 +1383,13 @@
 // Check if the purported free chunk is present either as a linear
 // allocation block, the size-indexed table of (smaller) free blocks,
 // or the larger free blocks kept in the binary tree dictionary.
-bool CompactibleFreeListSpace::verifyChunkInFreeLists(FreeChunk* fc) const {
+bool CompactibleFreeListSpace::verify_chunk_in_free_list(FreeChunk* fc) const {
   if (verify_chunk_is_linear_alloc_block(fc)) {
     return true;
   } else if (fc->size() < IndexSetSize) {
     return verifyChunkInIndexedFreeLists(fc);
   } else {
-    return dictionary()->verifyChunkInFreeLists(fc);
+    return dictionary()->verify_chunk_in_free_list(fc);
   }
 }
 
@@ -1412,7 +1417,7 @@
   }
   if (fc != NULL) {
     fc->dontCoalesce();
-    assert(fc->isFree(), "Should be free, but not coalescable");
+    assert(fc->is_free(), "Should be free, but not coalescable");
     // Verify that the block offset table shows this to
     // be a single block, but not one which is unallocated.
     _bt.verify_single_block((HeapWord*)fc, fc->size());
@@ -1492,7 +1497,7 @@
     }
     // Return the chunk that isn't big enough, and then refill below.
     addChunkToFreeLists(blk->_ptr, sz);
-    splitBirth(sz);
+    split_birth(sz);
     // Don't keep statistics on adding back chunk from a LinAB.
   } else {
     // A refilled block would not satisfy the request.
@@ -1504,14 +1509,14 @@
   assert(blk->_ptr == NULL || blk->_word_size >= size + MinChunkSize,
          "block was replenished");
   if (res != NULL) {
-    splitBirth(size);
+    split_birth(size);
     repairLinearAllocBlock(blk);
   } else if (blk->_ptr != NULL) {
     res = blk->_ptr;
     size_t blk_size = blk->_word_size;
     blk->_word_size -= size;
     blk->_ptr  += size;
-    splitBirth(size);
+    split_birth(size);
     repairLinearAllocBlock(blk);
     // Update BOT last so that other (parallel) GC threads see a consistent
     // view of the BOT and free blocks.
@@ -1540,7 +1545,7 @@
     size_t blk_size = blk->_word_size;
     blk->_word_size -= size;
     blk->_ptr  += size;
-    splitBirth(size);
+    split_birth(size);
     repairLinearAllocBlock(blk);
     // Update BOT last so that other (parallel) GC threads see a consistent
     // view of the BOT and free blocks.
@@ -1557,7 +1562,7 @@
   assert_locked();
   assert(size < SmallForDictionary, "just checking");
   FreeChunk* res;
-  res = _indexedFreeList[size].getChunkAtHead();
+  res = _indexedFreeList[size].get_chunk_at_head();
   if (res == NULL) {
     res = getChunkFromIndexedFreeListHelper(size);
   }
@@ -1591,7 +1596,7 @@
         // Do not replenish from an underpopulated size.
         if (_indexedFreeList[replenish_size].surplus() > 0 &&
             _indexedFreeList[replenish_size].head() != NULL) {
-          newFc = _indexedFreeList[replenish_size].getChunkAtHead();
+          newFc = _indexedFreeList[replenish_size].get_chunk_at_head();
         } else if (bestFitFirst()) {
           newFc = bestFitSmall(replenish_size);
         }
@@ -1624,13 +1629,13 @@
                i < (num_blk - 1);
                curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
                i++) {
-            curFc->setSize(size);
+            curFc->set_size(size);
             // Don't record this as a return in order to try and
             // determine the "returns" from a GC.
             _bt.verify_not_unallocated((HeapWord*) fc, size);
-            _indexedFreeList[size].returnChunkAtTail(curFc, false);
+            _indexedFreeList[size].return_chunk_at_tail(curFc, false);
             _bt.mark_block((HeapWord*)curFc, size);
-            splitBirth(size);
+            split_birth(size);
             // Don't record the initial population of the indexed list
             // as a split birth.
           }
@@ -1638,9 +1643,9 @@
           // check that the arithmetic was OK above
           assert((HeapWord*)nextFc == (HeapWord*)newFc + num_blk*size,
             "inconsistency in carving newFc");
-          curFc->setSize(size);
+          curFc->set_size(size);
           _bt.mark_block((HeapWord*)curFc, size);
-          splitBirth(size);
+          split_birth(size);
           fc = curFc;
         } else {
           // Return entire block to caller
@@ -1653,14 +1658,14 @@
     // replenish the indexed free list.
     fc = getChunkFromDictionaryExact(size);
   }
-  // assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
+  // assert(fc == NULL || fc->is_free(), "Should be returning a free chunk");
   return fc;
 }
 
 FreeChunk*
 CompactibleFreeListSpace::getChunkFromDictionary(size_t size) {
   assert_locked();
-  FreeChunk* fc = _dictionary->getChunk(size);
+  FreeChunk* fc = _dictionary->get_chunk(size);
   if (fc == NULL) {
     return NULL;
   }
@@ -1677,7 +1682,7 @@
 FreeChunk*
 CompactibleFreeListSpace::getChunkFromDictionaryExact(size_t size) {
   assert_locked();
-  FreeChunk* fc = _dictionary->getChunk(size);
+  FreeChunk* fc = _dictionary->get_chunk(size);
   if (fc == NULL) {
     return fc;
   }
@@ -1686,11 +1691,11 @@
     _bt.verify_single_block((HeapWord*)fc, size);
     return fc;
   }
-  assert(fc->size() > size, "getChunk() guarantee");
+  assert(fc->size() > size, "get_chunk() guarantee");
   if (fc->size() < size + MinChunkSize) {
     // Return the chunk to the dictionary and go get a bigger one.
     returnChunkToDictionary(fc);
-    fc = _dictionary->getChunk(size + MinChunkSize);
+    fc = _dictionary->get_chunk(size + MinChunkSize);
     if (fc == NULL) {
       return NULL;
     }
@@ -1711,10 +1716,10 @@
   _bt.verify_single_block((HeapWord*)chunk, size);
   // adjust _unallocated_block downward, as necessary
   _bt.freed((HeapWord*)chunk, size);
-  _dictionary->returnChunk(chunk);
+  _dictionary->return_chunk(chunk);
 #ifndef PRODUCT
   if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
-    TreeChunk::as_TreeChunk(chunk)->list()->verify_stats();
+    TreeChunk<FreeChunk>::as_TreeChunk(chunk)->list()->verify_stats();
   }
 #endif // PRODUCT
 }
@@ -1726,9 +1731,9 @@
   _bt.verify_single_block((HeapWord*) fc, size);
   _bt.verify_not_unallocated((HeapWord*) fc, size);
   if (_adaptive_freelists) {
-    _indexedFreeList[size].returnChunkAtTail(fc);
+    _indexedFreeList[size].return_chunk_at_tail(fc);
   } else {
-    _indexedFreeList[size].returnChunkAtHead(fc);
+    _indexedFreeList[size].return_chunk_at_head(fc);
   }
 #ifndef PRODUCT
   if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
@@ -1756,7 +1761,7 @@
   FreeChunk* ec;
   {
     MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
-    ec = dictionary()->findLargestDict();  // get largest block
+    ec = dictionary()->find_largest_dict();  // get largest block
     if (ec != NULL && ec->end() == chunk) {
       // It's a coterminal block - we can coalesce.
       size_t old_size = ec->size();
@@ -1767,7 +1772,7 @@
       ec = (FreeChunk*)chunk;
     }
   }
-  ec->setSize(size);
+  ec->set_size(size);
   debug_only(ec->mangleFreed(size));
   if (size < SmallForDictionary) {
     lock = _indexedFreeListParLocks[size];
@@ -1790,7 +1795,7 @@
   _bt.verify_single_block(chunk, size);
 
   FreeChunk* fc = (FreeChunk*) chunk;
-  fc->setSize(size);
+  fc->set_size(size);
   debug_only(fc->mangleFreed(size));
   if (size < SmallForDictionary) {
     returnChunkToFreeList(fc);
@@ -1833,7 +1838,7 @@
   assert_locked();
   assert(fc != NULL, "null chunk");
   _bt.verify_single_block((HeapWord*)fc, size);
-  _dictionary->removeChunk(fc);
+  _dictionary->remove_chunk(fc);
   // adjust _unallocated_block upward, as necessary
   _bt.allocated((HeapWord*)fc, size);
 }
@@ -1848,7 +1853,7 @@
       verifyIndexedFreeList(size);
     }
   )
-  _indexedFreeList[size].removeChunk(fc);
+  _indexedFreeList[size].remove_chunk(fc);
   NOT_PRODUCT(
     if (FLSVerifyIndexTable) {
       verifyIndexedFreeList(size);
@@ -1862,17 +1867,17 @@
      the excess is >= MIN_CHUNK. */
   size_t start = align_object_size(numWords + MinChunkSize);
   if (start < IndexSetSize) {
-    FreeList* it   = _indexedFreeList;
+    FreeList<FreeChunk>* it   = _indexedFreeList;
     size_t    hint = _indexedFreeList[start].hint();
     while (hint < IndexSetSize) {
       assert(hint % MinObjAlignment == 0, "hint should be aligned");
-      FreeList *fl = &_indexedFreeList[hint];
+      FreeList<FreeChunk> *fl = &_indexedFreeList[hint];
       if (fl->surplus() > 0 && fl->head() != NULL) {
         // Found a list with surplus, reset original hint
         // and split out a free chunk which is returned.
         _indexedFreeList[start].set_hint(hint);
         FreeChunk* res = getFromListGreater(fl, numWords);
-        assert(res == NULL || res->isFree(),
+        assert(res == NULL || res->is_free(),
           "Should be returning a free chunk");
         return res;
       }
@@ -1885,7 +1890,7 @@
 }
 
 /* Requires fl->size >= numWords + MinChunkSize */
-FreeChunk* CompactibleFreeListSpace::getFromListGreater(FreeList* fl,
+FreeChunk* CompactibleFreeListSpace::getFromListGreater(FreeList<FreeChunk>* fl,
   size_t numWords) {
   FreeChunk *curr = fl->head();
   size_t oldNumWords = curr->size();
@@ -1894,13 +1899,13 @@
   assert(oldNumWords >= numWords + MinChunkSize,
         "Size of chunks in the list is too small");
 
-  fl->removeChunk(curr);
+  fl->remove_chunk(curr);
   // recorded indirectly by splitChunkAndReturnRemainder -
   // smallSplit(oldNumWords, numWords);
   FreeChunk* new_chunk = splitChunkAndReturnRemainder(curr, numWords);
   // Does anything have to be done for the remainder in terms of
   // fixing the card table?
-  assert(new_chunk == NULL || new_chunk->isFree(),
+  assert(new_chunk == NULL || new_chunk->is_free(),
     "Should be returning a free chunk");
   return new_chunk;
 }
@@ -1918,13 +1923,13 @@
   assert(rem_size >= MinChunkSize, "Free chunk smaller than minimum");
   FreeChunk* ffc = (FreeChunk*)((HeapWord*)chunk + new_size);
   assert(is_aligned(ffc), "alignment problem");
-  ffc->setSize(rem_size);
-  ffc->linkNext(NULL);
-  ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
+  ffc->set_size(rem_size);
+  ffc->link_next(NULL);
+  ffc->link_prev(NULL); // Mark as a free block for other (parallel) GC threads.
   // Above must occur before BOT is updated below.
   // adjust block offset table
   OrderAccess::storestore();
-  assert(chunk->isFree() && ffc->isFree(), "Error");
+  assert(chunk->is_free() && ffc->is_free(), "Error");
   _bt.split_block((HeapWord*)chunk, chunk->size(), new_size);
   if (rem_size < SmallForDictionary) {
     bool is_par = (SharedHeap::heap()->n_par_threads() > 0);
@@ -1939,7 +1944,7 @@
     returnChunkToDictionary(ffc);
     split(size ,rem_size);
   }
-  chunk->setSize(new_size);
+  chunk->set_size(new_size);
   return chunk;
 }
 
@@ -2046,10 +2051,10 @@
     assert(blk->_word_size != 0 && blk->_word_size >= MinChunkSize,
            "Minimum block size requirement");
     FreeChunk* fc = (FreeChunk*)(blk->_ptr);
-    fc->setSize(blk->_word_size);
-    fc->linkPrev(NULL);   // mark as free
+    fc->set_size(blk->_word_size);
+    fc->link_prev(NULL);   // mark as free
     fc->dontCoalesce();
-    assert(fc->isFree(), "just marked it free");
+    assert(fc->is_free(), "just marked it free");
     assert(fc->cantCoalesce(), "just marked it uncoalescable");
   }
 }
@@ -2149,7 +2154,7 @@
   }
 
   double totFree = itabFree +
-                   _dictionary->totalChunkSize(DEBUG_ONLY(freelistLock()));
+                   _dictionary->total_chunk_size(DEBUG_ONLY(freelistLock()));
   if (totFree > 0) {
     frag = ((frag + _dictionary->sum_of_squared_block_sizes()) /
             (totFree * totFree));
@@ -2167,16 +2172,16 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList* fl    = &_indexedFreeList[i];
+    FreeList<FreeChunk>* fl = &_indexedFreeList[i];
     if (PrintFLSStatistics > 1) {
       gclog_or_tty->print("size[%d] : ", i);
     }
     fl->compute_desired(inter_sweep_current, inter_sweep_estimate, intra_sweep_estimate);
-    fl->set_coalDesired((ssize_t)((double)fl->desired() * CMSSmallCoalSurplusPercent));
-    fl->set_beforeSweep(fl->count());
-    fl->set_bfrSurp(fl->surplus());
+    fl->set_coal_desired((ssize_t)((double)fl->desired() * CMSSmallCoalSurplusPercent));
+    fl->set_before_sweep(fl->count());
+    fl->set_bfr_surp(fl->surplus());
   }
-  _dictionary->beginSweepDictCensus(CMSLargeCoalSurplusPercent,
+  _dictionary->begin_sweep_dict_census(CMSLargeCoalSurplusPercent,
                                     inter_sweep_current,
                                     inter_sweep_estimate,
                                     intra_sweep_estimate);
@@ -2186,7 +2191,7 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList *fl = &_indexedFreeList[i];
+    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_surplus(fl->count() -
                     (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
   }
@@ -2197,7 +2202,7 @@
   size_t i;
   size_t h = IndexSetSize;
   for (i = IndexSetSize - 1; i != 0; i -= IndexSetStride) {
-    FreeList *fl = &_indexedFreeList[i];
+    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_hint(h);
     if (fl->surplus() > 0) {
       h = i;
@@ -2209,18 +2214,18 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList *fl = &_indexedFreeList[i];
-    fl->set_prevSweep(fl->count());
-    fl->set_coalBirths(0);
-    fl->set_coalDeaths(0);
-    fl->set_splitBirths(0);
-    fl->set_splitDeaths(0);
+    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    fl->set_prev_sweep(fl->count());
+    fl->set_coal_births(0);
+    fl->set_coal_deaths(0);
+    fl->set_split_births(0);
+    fl->set_split_deaths(0);
   }
 }
 
 void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) {
   if (PrintFLSStatistics > 0) {
-    HeapWord* largestAddr = (HeapWord*) dictionary()->findLargestDict();
+    HeapWord* largestAddr = (HeapWord*) dictionary()->find_largest_dict();
     gclog_or_tty->print_cr("CMS: Large block " PTR_FORMAT,
                            largestAddr);
   }
@@ -2231,30 +2236,30 @@
   }
   clearFLCensus();
   assert_locked();
-  _dictionary->endSweepDictCensus(CMSLargeSplitSurplusPercent);
+  _dictionary->end_sweep_dict_census(CMSLargeSplitSurplusPercent);
 }
 
 bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
   if (size < SmallForDictionary) {
-    FreeList *fl = &_indexedFreeList[size];
-    return (fl->coalDesired() < 0) ||
-           ((int)fl->count() > fl->coalDesired());
+    FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+    return (fl->coal_desired() < 0) ||
+           ((int)fl->count() > fl->coal_desired());
   } else {
-    return dictionary()->coalDictOverPopulated(size);
+    return dictionary()->coal_dict_over_populated(size);
   }
 }
 
 void CompactibleFreeListSpace::smallCoalBirth(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList *fl = &_indexedFreeList[size];
-  fl->increment_coalBirths();
+  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  fl->increment_coal_births();
   fl->increment_surplus();
 }
 
 void CompactibleFreeListSpace::smallCoalDeath(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList *fl = &_indexedFreeList[size];
-  fl->increment_coalDeaths();
+  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  fl->increment_coal_deaths();
   fl->decrement_surplus();
 }
 
@@ -2262,7 +2267,7 @@
   if (size  < SmallForDictionary) {
     smallCoalBirth(size);
   } else {
-    dictionary()->dictCensusUpdate(size,
+    dictionary()->dict_census_udpate(size,
                                    false /* split */,
                                    true /* birth */);
   }
@@ -2272,7 +2277,7 @@
   if(size  < SmallForDictionary) {
     smallCoalDeath(size);
   } else {
-    dictionary()->dictCensusUpdate(size,
+    dictionary()->dict_census_udpate(size,
                                    false /* split */,
                                    false /* birth */);
   }
@@ -2280,23 +2285,23 @@
 
 void CompactibleFreeListSpace::smallSplitBirth(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList *fl = &_indexedFreeList[size];
-  fl->increment_splitBirths();
+  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  fl->increment_split_births();
   fl->increment_surplus();
 }
 
 void CompactibleFreeListSpace::smallSplitDeath(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList *fl = &_indexedFreeList[size];
-  fl->increment_splitDeaths();
+  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  fl->increment_split_deaths();
   fl->decrement_surplus();
 }
 
-void CompactibleFreeListSpace::splitBirth(size_t size) {
+void CompactibleFreeListSpace::split_birth(size_t size) {
   if (size  < SmallForDictionary) {
     smallSplitBirth(size);
   } else {
-    dictionary()->dictCensusUpdate(size,
+    dictionary()->dict_census_udpate(size,
                                    true /* split */,
                                    true /* birth */);
   }
@@ -2306,7 +2311,7 @@
   if (size  < SmallForDictionary) {
     smallSplitDeath(size);
   } else {
-    dictionary()->dictCensusUpdate(size,
+    dictionary()->dict_census_udpate(size,
                                    true /* split */,
                                    false /* birth */);
   }
@@ -2315,8 +2320,8 @@
 void CompactibleFreeListSpace::split(size_t from, size_t to1) {
   size_t to2 = from - to1;
   splitDeath(from);
-  splitBirth(to1);
-  splitBirth(to2);
+  split_birth(to1);
+  split_birth(to2);
 }
 
 void CompactibleFreeListSpace::print() const {
@@ -2362,7 +2367,7 @@
       FreeChunk* fc = (FreeChunk*)addr;
       res = fc->size();
       if (FLSVerifyLists && !fc->cantCoalesce()) {
-        guarantee(_sp->verifyChunkInFreeLists(fc),
+        guarantee(_sp->verify_chunk_in_free_list(fc),
                   "Chunk should be on a free list");
       }
     }
@@ -2444,7 +2449,7 @@
   virtual void do_oop(narrowOop* p) { VerifyAllOopsClosure::do_oop_work(p); }
 };
 
-void CompactibleFreeListSpace::verify(bool ignored) const {
+void CompactibleFreeListSpace::verify() const {
   assert_lock_strong(&_freelistLock);
   verify_objects_initialized();
   MemRegion span = _collector->_span;
@@ -2518,7 +2523,7 @@
             "Slot should have been empty");
   for (; fc != NULL; fc = fc->next(), n++) {
     guarantee(fc->size() == size, "Size inconsistency");
-    guarantee(fc->isFree(), "!free?");
+    guarantee(fc->is_free(), "!free?");
     guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
     guarantee((fc->next() == NULL) == (fc == tail), "Incorrect tail");
   }
@@ -2527,52 +2532,48 @@
 
 #ifndef PRODUCT
 void CompactibleFreeListSpace::check_free_list_consistency() const {
-  assert(_dictionary->minSize() <= IndexSetSize,
+  assert(_dictionary->min_size() <= IndexSetSize,
     "Some sizes can't be allocated without recourse to"
     " linear allocation buffers");
-  assert(MIN_TREE_CHUNK_SIZE*HeapWordSize == sizeof(TreeChunk),
+  assert(BinaryTreeDictionary<FreeChunk>::min_tree_chunk_size*HeapWordSize == sizeof(TreeChunk<FreeChunk>),
     "else MIN_TREE_CHUNK_SIZE is wrong");
-  assert((IndexSetStride == 2 && IndexSetStart == 4) ||                   // 32-bit
-         (IndexSetStride == 1 && IndexSetStart == 3), "just checking");   // 64-bit
-  assert((IndexSetStride != 2) || (IndexSetStart % 2 == 0),
-      "Some for-loops may be incorrectly initialized");
-  assert((IndexSetStride != 2) || (IndexSetSize % 2 == 1),
-      "For-loops that iterate over IndexSet with stride 2 may be wrong");
+  assert(IndexSetStart != 0, "IndexSetStart not initialized");
+  assert(IndexSetStride != 0, "IndexSetStride not initialized");
 }
 #endif
 
 void CompactibleFreeListSpace::printFLCensus(size_t sweep_count) const {
   assert_lock_strong(&_freelistLock);
-  FreeList total;
+  FreeList<FreeChunk> total;
   gclog_or_tty->print("end sweep# " SIZE_FORMAT "\n", sweep_count);
-  FreeList::print_labels_on(gclog_or_tty, "size");
-  size_t totalFree = 0;
+  FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+  size_t total_free = 0;
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    const FreeList *fl = &_indexedFreeList[i];
-    totalFree += fl->count() * fl->size();
+    const FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    total_free += fl->count() * fl->size();
     if (i % (40*IndexSetStride) == 0) {
-      FreeList::print_labels_on(gclog_or_tty, "size");
+      FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
     }
     fl->print_on(gclog_or_tty);
-    total.set_bfrSurp(    total.bfrSurp()     + fl->bfrSurp()    );
+    total.set_bfr_surp(    total.bfr_surp()     + fl->bfr_surp()    );
     total.set_surplus(    total.surplus()     + fl->surplus()    );
     total.set_desired(    total.desired()     + fl->desired()    );
-    total.set_prevSweep(  total.prevSweep()   + fl->prevSweep()  );
-    total.set_beforeSweep(total.beforeSweep() + fl->beforeSweep());
+    total.set_prev_sweep(  total.prev_sweep()   + fl->prev_sweep()  );
+    total.set_before_sweep(total.before_sweep() + fl->before_sweep());
     total.set_count(      total.count()       + fl->count()      );
-    total.set_coalBirths( total.coalBirths()  + fl->coalBirths() );
-    total.set_coalDeaths( total.coalDeaths()  + fl->coalDeaths() );
-    total.set_splitBirths(total.splitBirths() + fl->splitBirths());
-    total.set_splitDeaths(total.splitDeaths() + fl->splitDeaths());
+    total.set_coal_births( total.coal_births()  + fl->coal_births() );
+    total.set_coal_deaths( total.coal_deaths()  + fl->coal_deaths() );
+    total.set_split_births(total.split_births() + fl->split_births());
+    total.set_split_deaths(total.split_deaths() + fl->split_deaths());
   }
   total.print_on(gclog_or_tty, "TOTAL");
   gclog_or_tty->print_cr("Total free in indexed lists "
-                         SIZE_FORMAT " words", totalFree);
+                         SIZE_FORMAT " words", total_free);
   gclog_or_tty->print("growth: %8.5f  deficit: %8.5f\n",
-    (double)(total.splitBirths()+total.coalBirths()-total.splitDeaths()-total.coalDeaths())/
-            (total.prevSweep() != 0 ? (double)total.prevSweep() : 1.0),
+    (double)(total.split_births()+total.coal_births()-total.split_deaths()-total.coal_deaths())/
+            (total.prev_sweep() != 0 ? (double)total.prev_sweep() : 1.0),
     (double)(total.desired() - total.count())/(total.desired() != 0 ? (double)total.desired() : 1.0));
-  _dictionary->printDictCensus();
+  _dictionary->print_dict_census();
 }
 
 ///////////////////////////////////////////////////////////////////////////
@@ -2634,18 +2635,18 @@
     res = _cfls->getChunkFromDictionaryExact(word_sz);
     if (res == NULL) return NULL;
   } else {
-    FreeList* fl = &_indexedFreeList[word_sz];
+    FreeList<FreeChunk>* fl = &_indexedFreeList[word_sz];
     if (fl->count() == 0) {
       // Attempt to refill this local free list.
       get_from_global_pool(word_sz, fl);
       // If it didn't work, give up.
       if (fl->count() == 0) return NULL;
     }
-    res = fl->getChunkAtHead();
+    res = fl->get_chunk_at_head();
     assert(res != NULL, "Why was count non-zero?");
   }
   res->markNotFree();
-  assert(!res->isFree(), "shouldn't be marked free");
+  assert(!res->is_free(), "shouldn't be marked free");
   assert(oop(res)->klass_or_null() == NULL, "should look uninitialized");
   // mangle a just allocated object with a distinct pattern.
   debug_only(res->mangleAllocated(word_sz));
@@ -2654,7 +2655,7 @@
 
 // Get a chunk of blocks of the right size and update related
 // book-keeping stats
-void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList* fl) {
+void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl) {
   // Get the #blocks we want to claim
   size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
   assert(n_blks > 0, "Error");
@@ -2736,7 +2737,7 @@
         if (num_retire > 0) {
           _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
           // Reset this list.
-          _indexedFreeList[i] = FreeList();
+          _indexedFreeList[i] = FreeList<FreeChunk>();
           _indexedFreeList[i].set_size(i);
         }
       }
@@ -2750,7 +2751,7 @@
   }
 }
 
-void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl) {
   assert(fl->count() == 0, "Precondition.");
   assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
          "Precondition");
@@ -2766,12 +2767,12 @@
          (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
          (CMSSplitIndexedFreeListBlocks || k <= 1);
          k++, cur_sz = k * word_sz) {
-      FreeList fl_for_cur_sz;  // Empty.
+      FreeList<FreeChunk> fl_for_cur_sz;  // Empty.
       fl_for_cur_sz.set_size(cur_sz);
       {
         MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
                         Mutex::_no_safepoint_check_flag);
-        FreeList* gfl = &_indexedFreeList[cur_sz];
+        FreeList<FreeChunk>* gfl = &_indexedFreeList[cur_sz];
         if (gfl->count() != 0) {
           // nn is the number of chunks of size cur_sz that
           // we'd need to split k-ways each, in order to create
@@ -2784,9 +2785,9 @@
             // we increment the split death count by the number of blocks
             // we just took from the cur_sz-size blocks list and which
             // we will be splitting below.
-            ssize_t deaths = gfl->splitDeaths() +
+            ssize_t deaths = gfl->split_deaths() +
                              fl_for_cur_sz.count();
-            gfl->set_splitDeaths(deaths);
+            gfl->set_split_deaths(deaths);
           }
         }
       }
@@ -2797,21 +2798,21 @@
         } else {
           // Divide each block on fl_for_cur_sz up k ways.
           FreeChunk* fc;
-          while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
+          while ((fc = fl_for_cur_sz.get_chunk_at_head()) != NULL) {
             // Must do this in reverse order, so that anybody attempting to
             // access the main chunk sees it as a single free block until we
             // change it.
             size_t fc_size = fc->size();
-            assert(fc->isFree(), "Error");
+            assert(fc->is_free(), "Error");
             for (int i = k-1; i >= 0; i--) {
               FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
               assert((i != 0) ||
-                        ((fc == ffc) && ffc->isFree() &&
+                        ((fc == ffc) && ffc->is_free() &&
                          (ffc->size() == k*word_sz) && (fc_size == word_sz)),
                         "Counting error");
-              ffc->setSize(word_sz);
-              ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
-              ffc->linkNext(NULL);
+              ffc->set_size(word_sz);
+              ffc->link_prev(NULL); // Mark as a free block for other (parallel) GC threads.
+              ffc->link_next(NULL);
               // Above must occur before BOT is updated below.
               OrderAccess::storestore();
               // splitting from the right, fc_size == i * word_sz
@@ -2822,7 +2823,7 @@
               _bt.verify_single_block((HeapWord*)fc, fc_size);
               _bt.verify_single_block((HeapWord*)ffc, word_sz);
               // Push this on "fl".
-              fl->returnChunkAtHead(ffc);
+              fl->return_chunk_at_head(ffc);
             }
             // TRAP
             assert(fl->tail()->next() == NULL, "List invariant.");
@@ -2832,8 +2833,8 @@
         size_t num = fl->count();
         MutexLockerEx x(_indexedFreeListParLocks[word_sz],
                         Mutex::_no_safepoint_check_flag);
-        ssize_t births = _indexedFreeList[word_sz].splitBirths() + num;
-        _indexedFreeList[word_sz].set_splitBirths(births);
+        ssize_t births = _indexedFreeList[word_sz].split_births() + num;
+        _indexedFreeList[word_sz].set_split_births(births);
         return;
       }
     }
@@ -2846,12 +2847,12 @@
     MutexLockerEx x(parDictionaryAllocLock(),
                     Mutex::_no_safepoint_check_flag);
     while (n > 0) {
-      fc = dictionary()->getChunk(MAX2(n * word_sz,
-                                  _dictionary->minSize()),
-                                  FreeBlockDictionary::atLeast);
+      fc = dictionary()->get_chunk(MAX2(n * word_sz,
+                                  _dictionary->min_size()),
+                                  FreeBlockDictionary<FreeChunk>::atLeast);
       if (fc != NULL) {
         _bt.allocated((HeapWord*)fc, fc->size(), true /* reducing */);  // update _unallocated_blk
-        dictionary()->dictCensusUpdate(fc->size(),
+        dictionary()->dict_census_udpate(fc->size(),
                                        true /*split*/,
                                        false /*birth*/);
         break;
@@ -2862,7 +2863,7 @@
     if (fc == NULL) return;
     // Otherwise, split up that block.
     assert((ssize_t)n >= 1, "Control point invariant");
-    assert(fc->isFree(), "Error: should be a free block");
+    assert(fc->is_free(), "Error: should be a free block");
     _bt.verify_single_block((HeapWord*)fc, fc->size());
     const size_t nn = fc->size() / word_sz;
     n = MIN2(nn, n);
@@ -2893,18 +2894,18 @@
     if (rem > 0) {
       size_t prefix_size = n * word_sz;
       rem_fc = (FreeChunk*)((HeapWord*)fc + prefix_size);
-      rem_fc->setSize(rem);
-      rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
-      rem_fc->linkNext(NULL);
+      rem_fc->set_size(rem);
+      rem_fc->link_prev(NULL); // Mark as a free block for other (parallel) GC threads.
+      rem_fc->link_next(NULL);
       // Above must occur before BOT is updated below.
       assert((ssize_t)n > 0 && prefix_size > 0 && rem_fc > fc, "Error");
       OrderAccess::storestore();
       _bt.split_block((HeapWord*)fc, fc->size(), prefix_size);
-      assert(fc->isFree(), "Error");
-      fc->setSize(prefix_size);
+      assert(fc->is_free(), "Error");
+      fc->set_size(prefix_size);
       if (rem >= IndexSetSize) {
         returnChunkToDictionary(rem_fc);
-        dictionary()->dictCensusUpdate(rem, true /*split*/, true /*birth*/);
+        dictionary()->dict_census_udpate(rem, true /*split*/, true /*birth*/);
         rem_fc = NULL;
       }
       // Otherwise, return it to the small list below.
@@ -2914,7 +2915,7 @@
     MutexLockerEx x(_indexedFreeListParLocks[rem],
                     Mutex::_no_safepoint_check_flag);
     _bt.verify_not_unallocated((HeapWord*)rem_fc, rem_fc->size());
-    _indexedFreeList[rem].returnChunkAtHead(rem_fc);
+    _indexedFreeList[rem].return_chunk_at_head(rem_fc);
     smallSplitBirth(rem);
   }
   assert((ssize_t)n > 0 && fc != NULL, "Consistency");
@@ -2926,9 +2927,9 @@
   // All but first chunk in this loop
   for (ssize_t i = n-1; i > 0; i--) {
     FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
-    ffc->setSize(word_sz);
-    ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
-    ffc->linkNext(NULL);
+    ffc->set_size(word_sz);
+    ffc->link_prev(NULL); // Mark as a free block for other (parallel) GC threads.
+    ffc->link_next(NULL);
     // Above must occur before BOT is updated below.
     OrderAccess::storestore();
     // splitting from the right, fc_size == (n - i + 1) * wordsize
@@ -2938,25 +2939,25 @@
     _bt.verify_single_block((HeapWord*)ffc, ffc->size());
     _bt.verify_single_block((HeapWord*)fc, fc_size);
     // Push this on "fl".
-    fl->returnChunkAtHead(ffc);
+    fl->return_chunk_at_head(ffc);
   }
   // First chunk
-  assert(fc->isFree() && fc->size() == n*word_sz, "Error: should still be a free block");
+  assert(fc->is_free() && fc->size() == n*word_sz, "Error: should still be a free block");
   // The blocks above should show their new sizes before the first block below
-  fc->setSize(word_sz);
-  fc->linkPrev(NULL);    // idempotent wrt free-ness, see assert above
-  fc->linkNext(NULL);
+  fc->set_size(word_sz);
+  fc->link_prev(NULL);    // idempotent wrt free-ness, see assert above
+  fc->link_next(NULL);
   _bt.verify_not_unallocated((HeapWord*)fc, fc->size());
   _bt.verify_single_block((HeapWord*)fc, fc->size());
-  fl->returnChunkAtHead(fc);
+  fl->return_chunk_at_head(fc);
 
   assert((ssize_t)n > 0 && (ssize_t)n == fl->count(), "Incorrect number of blocks");
   {
     // Update the stats for this block size.
     MutexLockerEx x(_indexedFreeListParLocks[word_sz],
                     Mutex::_no_safepoint_check_flag);
-    const ssize_t births = _indexedFreeList[word_sz].splitBirths() + n;
-    _indexedFreeList[word_sz].set_splitBirths(births);
+    const ssize_t births = _indexedFreeList[word_sz].split_births() + n;
+    _indexedFreeList[word_sz].set_split_births(births);
     // ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
     // _indexedFreeList[word_sz].set_surplus(new_surplus);
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,10 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
 
-#include "gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp"
-#include "gc_implementation/concurrentMarkSweep/freeList.hpp"
 #include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp"
+#include "memory/binaryTreeDictionary.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
+#include "memory/freeList.hpp"
 #include "memory/space.hpp"
 
 // Classes in support of keeping track of promotions into a non-Contiguous
@@ -129,10 +129,10 @@
   // Linear allocation blocks
   LinearAllocBlock _smallLinearAllocBlock;
 
-  FreeBlockDictionary::DictionaryChoice _dictionaryChoice;
-  FreeBlockDictionary* _dictionary;    // ptr to dictionary for large size blocks
+  FreeBlockDictionary<FreeChunk>::DictionaryChoice _dictionaryChoice;
+  FreeBlockDictionary<FreeChunk>* _dictionary;    // ptr to dictionary for large size blocks
 
-  FreeList _indexedFreeList[IndexSetSize];
+  FreeList<FreeChunk> _indexedFreeList[IndexSetSize];
                                        // indexed array for small size blocks
   // allocation stategy
   bool       _fitStrategy;      // Use best fit strategy.
@@ -169,7 +169,7 @@
   // If the count of "fl" is negative, it's absolute value indicates a
   // number of free chunks that had been previously "borrowed" from global
   // list of size "word_sz", and must now be decremented.
-  void par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl);
+  void par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl);
 
   // Allocation helper functions
   // Allocate using a strategy that takes from the indexed free lists
@@ -215,7 +215,7 @@
   // and return it.  The split off remainder is returned to
   // the free lists.  The old name for getFromListGreater
   // was lookInListGreater.
-  FreeChunk* getFromListGreater(FreeList* fl, size_t numWords);
+  FreeChunk* getFromListGreater(FreeList<FreeChunk>* fl, size_t numWords);
   // Get a chunk in the indexed free list or dictionary,
   // by considering a larger chunk and splitting it.
   FreeChunk* getChunkFromGreater(size_t numWords);
@@ -286,10 +286,10 @@
   // Constructor...
   CompactibleFreeListSpace(BlockOffsetSharedArray* bs, MemRegion mr,
                            bool use_adaptive_freelists,
-                           FreeBlockDictionary::DictionaryChoice);
+                           FreeBlockDictionary<FreeChunk>::DictionaryChoice);
   // accessors
   bool bestFitFirst() { return _fitStrategy == FreeBlockBestFitFirst; }
-  FreeBlockDictionary* dictionary() const { return _dictionary; }
+  FreeBlockDictionary<FreeChunk>* dictionary() const { return _dictionary; }
   HeapWord* nearLargestChunk() const { return _nearLargestChunk; }
   void set_nearLargestChunk(HeapWord* v) { _nearLargestChunk = v; }
 
@@ -492,14 +492,14 @@
   void print()                            const;
   void print_on(outputStream* st)         const;
   void prepare_for_verify();
-  void verify(bool allow_dirty)           const;
+  void verify()                           const;
   void verifyFreeLists()                  const PRODUCT_RETURN;
   void verifyIndexedFreeLists()           const;
   void verifyIndexedFreeList(size_t size) const;
   // Verify that the given chunk is in the free lists:
   // i.e. either the binary tree dictionary, the indexed free lists
   // or the linear allocation block.
-  bool verifyChunkInFreeLists(FreeChunk* fc) const;
+  bool verify_chunk_in_free_list(FreeChunk* fc) const;
   // Verify that the given chunk is the linear allocation block
   bool verify_chunk_is_linear_alloc_block(FreeChunk* fc) const;
   // Do some basic checks on the the free lists.
@@ -608,7 +608,7 @@
   void coalDeath(size_t size);
   void smallSplitBirth(size_t size);
   void smallSplitDeath(size_t size);
-  void splitBirth(size_t size);
+  void split_birth(size_t size);
   void splitDeath(size_t size);
   void split(size_t from, size_t to1);
 
@@ -617,12 +617,12 @@
 
 // A parallel-GC-thread-local allocation buffer for allocation into a
 // CompactibleFreeListSpace.
-class CFLS_LAB : public CHeapObj {
+class CFLS_LAB : public CHeapObj<mtGC> {
   // The space that this buffer allocates into.
   CompactibleFreeListSpace* _cfls;
 
   // Our local free lists.
-  FreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
+  FreeList<FreeChunk> _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
 
   // Initialized from a command-line arg.
 
@@ -635,7 +635,7 @@
   size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
 
   // Internal work method
-  void get_from_global_pool(size_t word_sz, FreeList* fl);
+  void get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl);
 
 public:
   CFLS_LAB(CompactibleFreeListSpace* cfls);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -174,7 +174,7 @@
 
 // This struct contains per-thread things necessary to support parallel
 // young-gen collection.
-class CMSParGCThreadState: public CHeapObj {
+class CMSParGCThreadState: public CHeapObj<mtGC> {
  public:
   CFLS_LAB lab;
   PromotionInfo promo;
@@ -188,7 +188,7 @@
 ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
      ReservedSpace rs, size_t initial_byte_size, int level,
      CardTableRS* ct, bool use_adaptive_freelists,
-     FreeBlockDictionary::DictionaryChoice dictionaryChoice) :
+     FreeBlockDictionary<FreeChunk>::DictionaryChoice dictionaryChoice) :
   CardGeneration(rs, initial_byte_size, level, ct),
   _dilatation_factor(((double)MinChunkSize)/((double)(CollectedHeap::min_fill_size()))),
   _debug_collection_type(Concurrent_collection_type)
@@ -229,7 +229,7 @@
   if (CollectedHeap::use_parallel_gc_threads()) {
     typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
     _par_gc_thread_states =
-      NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
+      NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads, mtGC);
     if (_par_gc_thread_states == NULL) {
       vm_exit_during_initialization("Could not allocate par gc structs");
     }
@@ -687,7 +687,7 @@
         warning("task_queues allocation failure.");
         return;
       }
-      _hash_seed = NEW_C_HEAP_ARRAY(int, num_queues);
+      _hash_seed = NEW_C_HEAP_ARRAY(int, num_queues, mtGC);
       if (_hash_seed == NULL) {
         warning("_hash_seed array allocation failure");
         return;
@@ -737,7 +737,7 @@
     assert(_young_gen != NULL, "no _young_gen");
     _eden_chunk_index = 0;
     _eden_chunk_capacity = (_young_gen->max_capacity()+CMSSamplingGrain)/CMSSamplingGrain;
-    _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity);
+    _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity, mtGC);
     if (_eden_chunk_array == NULL) {
       _eden_chunk_capacity = 0;
       warning("GC/CMS: _eden_chunk_array allocation failure");
@@ -750,35 +750,35 @@
     const size_t max_plab_samples =
       ((DefNewGeneration*)_young_gen)->max_survivor_size()/MinTLABSize;
 
-    _survivor_plab_array  = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads);
-    _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples);
-    _cursor               = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads);
+    _survivor_plab_array  = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads, mtGC);
+    _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples, mtGC);
+    _cursor               = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads, mtGC);
     if (_survivor_plab_array == NULL || _survivor_chunk_array == NULL
         || _cursor == NULL) {
       warning("Failed to allocate survivor plab/chunk array");
       if (_survivor_plab_array  != NULL) {
-        FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
+        FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array, mtGC);
         _survivor_plab_array = NULL;
       }
       if (_survivor_chunk_array != NULL) {
-        FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
+        FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array, mtGC);
         _survivor_chunk_array = NULL;
       }
       if (_cursor != NULL) {
-        FREE_C_HEAP_ARRAY(size_t, _cursor);
+        FREE_C_HEAP_ARRAY(size_t, _cursor, mtGC);
         _cursor = NULL;
       }
     } else {
       _survivor_chunk_capacity = 2*max_plab_samples;
       for (uint i = 0; i < ParallelGCThreads; i++) {
-        HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples);
+        HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples, mtGC);
         if (vec == NULL) {
           warning("Failed to allocate survivor plab array");
           for (int j = i; j > 0; j--) {
-            FREE_C_HEAP_ARRAY(HeapWord*, _survivor_plab_array[j-1].array());
+            FREE_C_HEAP_ARRAY(HeapWord*, _survivor_plab_array[j-1].array(), mtGC);
           }
-          FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
-          FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
+          FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array, mtGC);
+          FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array, mtGC);
           _survivor_plab_array = NULL;
           _survivor_chunk_array = NULL;
           _survivor_chunk_capacity = 0;
@@ -1026,7 +1026,7 @@
     // its mark-bit or P-bits not yet set. Such objects need
     // to be safely navigable by block_start().
     assert(oop(res)->klass_or_null() == NULL, "Object should be uninitialized here.");
-    assert(!((FreeChunk*)res)->isFree(), "Error, block will look free but show wrong size");
+    assert(!((FreeChunk*)res)->is_free(), "Error, block will look free but show wrong size");
     collector()->direct_allocated(res, adjustedSize);
     _direct_allocated_words += adjustedSize;
     // allocation counters
@@ -1391,7 +1391,7 @@
   oop obj = oop(obj_ptr);
   OrderAccess::storestore();
   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
-  assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
+  assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
   // IMPORTANT: See note on object initialization for CMS above.
   // Otherwise, copy the object.  Here we must be careful to insert the
   // klass pointer last, since this marks the block as an allocated object.
@@ -1400,7 +1400,7 @@
   // Restore the mark word copied above.
   obj->set_mark(m);
   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
-  assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
+  assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
   OrderAccess::storestore();
 
   if (UseCompressedOops) {
@@ -1421,7 +1421,7 @@
     promoInfo->track((PromotedObject*)obj, old->klass());
   }
   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
-  assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
+  assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
   assert(old->is_oop(), "Will use and dereference old klass ptr below");
 
   // Finally, install the klass pointer (this should be volatile).
@@ -2034,7 +2034,7 @@
            pointer_delta(cms_space->end(), cms_space->compaction_top())
            * HeapWordSize,
       "All the free space should be compacted into one chunk at top");
-    assert(cms_space->dictionary()->totalChunkSize(
+    assert(cms_space->dictionary()->total_chunk_size(
                                       debug_only(cms_space->freelistLock())) == 0 ||
            cms_space->totalSizeInIndexedFreeLists() == 0,
       "All the free space should be in a single chunk");
@@ -3109,21 +3109,21 @@
 }
 
 void
-ConcurrentMarkSweepGeneration::verify(bool allow_dirty /* ignored */) {
+ConcurrentMarkSweepGeneration::verify() {
   // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those
   // are not called when the heap is verified during universe initialization and
   // at vm shutdown.
   if (freelistLock()->owned_by_self()) {
-    cmsSpace()->verify(false /* ignored */);
+    cmsSpace()->verify();
   } else {
     MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag);
-    cmsSpace()->verify(false /* ignored */);
-  }
-}
-
-void CMSCollector::verify(bool allow_dirty /* ignored */) {
-  _cmsGen->verify(allow_dirty);
-  _permGen->verify(allow_dirty);
+    cmsSpace()->verify();
+  }
+}
+
+void CMSCollector::verify() {
+  _cmsGen->verify();
+  _permGen->verify();
 }
 
 #ifndef PRODUCT
@@ -6131,7 +6131,7 @@
   double nearLargestPercent = FLSLargestBlockCoalesceProximity;
   HeapWord*  minAddr        = _cmsSpace->bottom();
   HeapWord*  largestAddr    =
-    (HeapWord*) _cmsSpace->dictionary()->findLargestDict();
+    (HeapWord*) _cmsSpace->dictionary()->find_largest_dict();
   if (largestAddr == NULL) {
     // The dictionary appears to be empty.  In this case
     // try to coalesce at the end of the heap.
@@ -6332,10 +6332,10 @@
   )
 }
 
-void CMSCollector::do_CMS_operation(CMS_op_type op) {
+void CMSCollector::do_CMS_operation(CMS_op_type op, GCCause::Cause gc_cause) {
   gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
   TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-  TraceTime t("GC", PrintGC, !PrintGCDetails, gclog_or_tty);
+  TraceTime t(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, gclog_or_tty);
   TraceCollectorStats tcs(counters());
 
   switch (op) {
@@ -7906,7 +7906,7 @@
     _last_fc = NULL;
 
     _sp->initializeIndexedFreeListArrayReturnedBytes();
-    _sp->dictionary()->initializeDictReturnedBytes();
+    _sp->dictionary()->initialize_dict_returned_bytes();
   )
   assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
          "sweep _limit out of bounds");
@@ -7954,13 +7954,13 @@
 
     if (PrintCMSStatistics && CMSVerifyReturnedBytes) {
       size_t indexListReturnedBytes = _sp->sumIndexedFreeListArrayReturnedBytes();
-      size_t dictReturnedBytes = _sp->dictionary()->sumDictReturnedBytes();
-      size_t returnedBytes = indexListReturnedBytes + dictReturnedBytes;
-      gclog_or_tty->print("Returned "SIZE_FORMAT" bytes", returnedBytes);
+      size_t dict_returned_bytes = _sp->dictionary()->sum_dict_returned_bytes();
+      size_t returned_bytes = indexListReturnedBytes + dict_returned_bytes;
+      gclog_or_tty->print("Returned "SIZE_FORMAT" bytes", returned_bytes);
       gclog_or_tty->print("   Indexed List Returned "SIZE_FORMAT" bytes",
         indexListReturnedBytes);
       gclog_or_tty->print_cr("        Dictionary Returned "SIZE_FORMAT" bytes",
-        dictReturnedBytes);
+        dict_returned_bytes);
     }
   }
   if (CMSTraceSweeper) {
@@ -7985,9 +7985,9 @@
   if (CMSTestInFreeList) {
     if (freeRangeInFreeLists) {
       FreeChunk* fc = (FreeChunk*) freeFinger;
-      assert(fc->isFree(), "A chunk on the free list should be free.");
+      assert(fc->is_free(), "A chunk on the free list should be free.");
       assert(fc->size() > 0, "Free range should have a size");
-      assert(_sp->verifyChunkInFreeLists(fc), "Chunk is not in free lists");
+      assert(_sp->verify_chunk_in_free_list(fc), "Chunk is not in free lists");
     }
   }
 }
@@ -8057,7 +8057,7 @@
   assert(addr < _limit, "sweep invariant");
   // check if we should yield
   do_yield_check(addr);
-  if (fc->isFree()) {
+  if (fc->is_free()) {
     // Chunk that is already free
     res = fc->size();
     do_already_free_chunk(fc);
@@ -8145,7 +8145,7 @@
   // Chunks that cannot be coalesced are not in the
   // free lists.
   if (CMSTestInFreeList && !fc->cantCoalesce()) {
-    assert(_sp->verifyChunkInFreeLists(fc),
+    assert(_sp->verify_chunk_in_free_list(fc),
       "free chunk should be in free lists");
   }
   // a chunk that is already free, should not have been
@@ -8171,7 +8171,7 @@
         FreeChunk* nextChunk = (FreeChunk*)(addr + size);
         assert((HeapWord*)nextChunk <= _sp->end(), "Chunk size out of bounds?");
         if ((HeapWord*)nextChunk < _sp->end() &&     // There is another free chunk to the right ...
-            nextChunk->isFree()               &&     // ... which is free...
+            nextChunk->is_free()               &&     // ... which is free...
             nextChunk->cantCoalesce()) {             // ... but can't be coalesced
           // nothing to do
         } else {
@@ -8203,7 +8203,7 @@
           assert(ffc->size() == pointer_delta(addr, freeFinger()),
             "Size of free range is inconsistent with chunk size.");
           if (CMSTestInFreeList) {
-            assert(_sp->verifyChunkInFreeLists(ffc),
+            assert(_sp->verify_chunk_in_free_list(ffc),
               "free range is not in free lists");
           }
           _sp->removeFreeChunkFromFreeLists(ffc);
@@ -8262,7 +8262,7 @@
         assert(ffc->size() == pointer_delta(addr, freeFinger()),
           "Size of free range is inconsistent with chunk size.");
         if (CMSTestInFreeList) {
-          assert(_sp->verifyChunkInFreeLists(ffc),
+          assert(_sp->verify_chunk_in_free_list(ffc),
             "free range is not in free lists");
         }
         _sp->removeFreeChunkFromFreeLists(ffc);
@@ -8351,11 +8351,11 @@
                                                  size_t chunkSize) {
   // do_post_free_or_garbage_chunk() should only be called in the case
   // of the adaptive free list allocator.
-  const bool fcInFreeLists = fc->isFree();
+  const bool fcInFreeLists = fc->is_free();
   assert(_sp->adaptive_freelists(), "Should only be used in this case.");
   assert((HeapWord*)fc <= _limit, "sweep invariant");
   if (CMSTestInFreeList && fcInFreeLists) {
-    assert(_sp->verifyChunkInFreeLists(fc), "free chunk is not in free lists");
+    assert(_sp->verify_chunk_in_free_list(fc), "free chunk is not in free lists");
   }
 
   if (CMSTraceSweeper) {
@@ -8410,7 +8410,7 @@
       assert(ffc->size() == pointer_delta(fc_addr, freeFinger()),
         "Size of free range is inconsistent with chunk size.");
       if (CMSTestInFreeList) {
-        assert(_sp->verifyChunkInFreeLists(ffc),
+        assert(_sp->verify_chunk_in_free_list(ffc),
           "Chunk is not in free lists");
       }
       _sp->coalDeath(ffc->size());
@@ -8459,7 +8459,7 @@
                  " when examining fc = " PTR_FORMAT "(" SIZE_FORMAT ")",
                  _limit, _sp->bottom(), _sp->end(), fc, chunk_size));
   if (eob >= _limit) {
-    assert(eob == _limit || fc->isFree(), "Only a free chunk should allow us to cross over the limit");
+    assert(eob == _limit || fc->is_free(), "Only a free chunk should allow us to cross over the limit");
     if (CMSTraceSweeper) {
       gclog_or_tty->print_cr("_limit " PTR_FORMAT " reached or crossed by block "
                              "[" PTR_FORMAT "," PTR_FORMAT ") in space "
@@ -8482,8 +8482,8 @@
   if (!freeRangeInFreeLists()) {
     if (CMSTestInFreeList) {
       FreeChunk* fc = (FreeChunk*) chunk;
-      fc->setSize(size);
-      assert(!_sp->verifyChunkInFreeLists(fc),
+      fc->set_size(size);
+      assert(!_sp->verify_chunk_in_free_list(fc),
         "chunk should not be in free lists yet");
     }
     if (CMSTraceSweeper) {
@@ -8557,8 +8557,8 @@
 // This is actually very useful in a product build if it can
 // be called from the debugger.  Compile it into the product
 // as needed.
-bool debug_verifyChunkInFreeLists(FreeChunk* fc) {
-  return debug_cms_space->verifyChunkInFreeLists(fc);
+bool debug_verify_chunk_in_free_list(FreeChunk* fc) {
+  return debug_cms_space->verify_chunk_in_free_list(fc);
 }
 #endif
 
@@ -9255,7 +9255,7 @@
       size_t chunk_at_end_old_size = chunk_at_end->size();
       assert(chunk_at_end_old_size >= word_size_change,
         "Shrink is too large");
-      chunk_at_end->setSize(chunk_at_end_old_size -
+      chunk_at_end->set_size(chunk_at_end_old_size -
                           word_size_change);
       _cmsSpace->freed((HeapWord*) chunk_at_end->end(),
         word_size_change);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,10 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CONCURRENTMARKSWEEPGENERATION_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CONCURRENTMARKSWEEPGENERATION_HPP
 
-#include "gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp"
 #include "gc_implementation/shared/gSpaceCounters.hpp"
 #include "gc_implementation/shared/gcStats.hpp"
 #include "gc_implementation/shared/generationCounters.hpp"
+#include "memory/freeBlockDictionary.hpp"
 #include "memory/generation.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
@@ -161,7 +161,7 @@
 
 // Represents a marking stack used by the CMS collector.
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
-class CMSMarkStack: public CHeapObj  {
+class CMSMarkStack: public CHeapObj<mtGC>  {
   //
   friend class CMSCollector;   // to get at expasion stats further below
   //
@@ -265,7 +265,7 @@
 
 // Survivor Chunk Array in support of parallelization of
 // Survivor Space rescan.
-class ChunkArray: public CHeapObj {
+class ChunkArray: public CHeapObj<mtGC> {
   size_t _index;
   size_t _capacity;
   size_t _overflows;
@@ -506,7 +506,7 @@
 };
 
 
-class CMSCollector: public CHeapObj {
+class CMSCollector: public CHeapObj<mtGC> {
   friend class VMStructs;
   friend class ConcurrentMarkSweepThread;
   friend class ConcurrentMarkSweepGeneration;
@@ -553,8 +553,8 @@
   // The following array-pair keeps track of mark words
   // displaced for accomodating overflow list above.
   // This code will likely be revisited under RFE#4922830.
-  Stack<oop>     _preserved_oop_stack;
-  Stack<markOop> _preserved_mark_stack;
+  Stack<oop, mtGC>     _preserved_oop_stack;
+  Stack<markOop, mtGC> _preserved_mark_stack;
 
   int*             _hash_seed;
 
@@ -717,7 +717,7 @@
     CMS_op_checkpointRootsFinal
   };
 
-  void do_CMS_operation(CMS_op_type op);
+  void do_CMS_operation(CMS_op_type op, GCCause::Cause gc_cause);
   bool stop_world_and_do(CMS_op_type op);
 
   OopTaskQueueSet* task_queues() { return _task_queues; }
@@ -988,7 +988,7 @@
   CMSGCAdaptivePolicyCounters* gc_adaptive_policy_counters();
 
   // debugging
-  void verify(bool);
+  void verify();
   bool verify_after_remark();
   void verify_ok_to_terminate() const PRODUCT_RETURN;
   void verify_work_stacks_empty() const PRODUCT_RETURN;
@@ -1106,7 +1106,7 @@
   ConcurrentMarkSweepGeneration(ReservedSpace rs, size_t initial_byte_size,
                                 int level, CardTableRS* ct,
                                 bool use_adaptive_freelists,
-                                FreeBlockDictionary::DictionaryChoice);
+                                FreeBlockDictionary<FreeChunk>::DictionaryChoice);
 
   // Accessors
   CMSCollector* collector() const { return _collector; }
@@ -1279,7 +1279,7 @@
 
   // Debugging
   void prepare_for_verify();
-  void verify(bool allow_dirty);
+  void verify();
   void print_statistics()               PRODUCT_RETURN;
 
   // Performance Counters support
@@ -1328,7 +1328,7 @@
   ASConcurrentMarkSweepGeneration(ReservedSpace rs, size_t initial_byte_size,
                                   int level, CardTableRS* ct,
                                   bool use_adaptive_freelists,
-                                  FreeBlockDictionary::DictionaryChoice
+                                  FreeBlockDictionary<FreeChunk>::DictionaryChoice
                                     dictionaryChoice) :
     ConcurrentMarkSweepGeneration(rs, initial_byte_size, level, ct,
       use_adaptive_freelists, dictionaryChoice) {}
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
-
-#ifndef PRODUCT
-Mutex* FreeBlockDictionary::par_lock() const {
-  return _lock;
-}
-
-void FreeBlockDictionary::set_par_lock(Mutex* lock) {
-  _lock = lock;
-}
-
-void FreeBlockDictionary::verify_par_locked() const {
-#ifdef ASSERT
-  if (ParallelGCThreads > 0) {
-    Thread* myThread = Thread::current();
-    if (myThread->is_GC_task_thread()) {
-      assert(par_lock() != NULL, "Should be using locking?");
-      assert_lock_strong(par_lock());
-    }
-  }
-#endif // ASSERT
-}
-#endif
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_FREEBLOCKDICTIONARY_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_FREEBLOCKDICTIONARY_HPP
-
-#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
-#include "memory/allocation.hpp"
-#include "memory/memRegion.hpp"
-#include "runtime/mutex.hpp"
-#include "utilities/debug.hpp"
-#include "utilities/globalDefinitions.hpp"
-#include "utilities/ostream.hpp"
-
-// A FreeBlockDictionary is an abstract superclass that will allow
-// a number of alternative implementations in the future.
-class FreeBlockDictionary: public CHeapObj {
- public:
-  enum Dither {
-    atLeast,
-    exactly,
-    roughly
-  };
-  enum DictionaryChoice {
-    dictionaryBinaryTree = 0,
-    dictionarySplayTree  = 1,
-    dictionarySkipList   = 2
-  };
-
- private:
-  NOT_PRODUCT(Mutex* _lock;)
-
- public:
-  virtual void       removeChunk(FreeChunk* fc) = 0;
-  virtual FreeChunk* getChunk(size_t size, Dither dither = atLeast) = 0;
-  virtual void       returnChunk(FreeChunk* chunk) = 0;
-  virtual size_t     totalChunkSize(debug_only(const Mutex* lock)) const = 0;
-  virtual size_t     maxChunkSize()   const = 0;
-  virtual size_t     minSize()        const = 0;
-  // Reset the dictionary to the initial conditions for a single
-  // block.
-  virtual void       reset(HeapWord* addr, size_t size) = 0;
-  virtual void       reset() = 0;
-
-  virtual void       dictCensusUpdate(size_t size, bool split, bool birth) = 0;
-  virtual bool       coalDictOverPopulated(size_t size) = 0;
-  virtual void       beginSweepDictCensus(double coalSurplusPercent,
-                       float inter_sweep_current, float inter_sweep_estimate,
-                       float intra__sweep_current) = 0;
-  virtual void       endSweepDictCensus(double splitSurplusPercent) = 0;
-  virtual FreeChunk* findLargestDict() const = 0;
-  // verify that the given chunk is in the dictionary.
-  virtual bool verifyChunkInFreeLists(FreeChunk* tc) const = 0;
-
-  // Sigma_{all_free_blocks} (block_size^2)
-  virtual double sum_of_squared_block_sizes() const = 0;
-
-  virtual FreeChunk* find_chunk_ends_at(HeapWord* target) const = 0;
-  virtual void inc_totalSize(size_t v) = 0;
-  virtual void dec_totalSize(size_t v) = 0;
-
-  NOT_PRODUCT (
-    virtual size_t   sumDictReturnedBytes() = 0;
-    virtual void     initializeDictReturnedBytes() = 0;
-    virtual size_t   totalCount() = 0;
-  )
-
-  virtual void       reportStatistics() const {
-    gclog_or_tty->print("No statistics available");
-  }
-
-  virtual void       printDictCensus() const = 0;
-  virtual void       print_free_lists(outputStream* st) const = 0;
-
-  virtual void       verify()         const = 0;
-
-  Mutex* par_lock()                const PRODUCT_RETURN0;
-  void   set_par_lock(Mutex* lock)       PRODUCT_RETURN;
-  void   verify_par_locked()       const PRODUCT_RETURN;
-};
-
-#endif // SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_FREEBLOCKDICTIONARY_HPP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -23,7 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#include "memory/freeBlockDictionary.hpp"
 #include "utilities/copy.hpp"
 
 #ifndef PRODUCT
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -75,20 +75,20 @@
     // calls.  We really want the read of _mark and _prev from this pointer
     // to be volatile but making the fields volatile causes all sorts of
     // compilation errors.
-    return ((volatile FreeChunk*)addr)->isFree();
+    return ((volatile FreeChunk*)addr)->is_free();
   }
 
-  bool isFree() const volatile {
+  bool is_free() const volatile {
     LP64_ONLY(if (UseCompressedOops) return mark()->is_cms_free_chunk(); else)
     return (((intptr_t)_prev) & 0x1) == 0x1;
   }
   bool cantCoalesce() const {
-    assert(isFree(), "can't get coalesce bit on not free");
+    assert(is_free(), "can't get coalesce bit on not free");
     return (((intptr_t)_prev) & 0x2) == 0x2;
   }
   void dontCoalesce() {
     // the block should be free
-    assert(isFree(), "Should look like a free block");
+    assert(is_free(), "Should look like a free block");
     _prev = (FreeChunk*)(((intptr_t)_prev) | 0x2);
   }
   FreeChunk* prev() const {
@@ -103,23 +103,23 @@
     LP64_ONLY(if (UseCompressedOops) return mark()->get_size(); else )
     return _size;
   }
-  void setSize(size_t sz) {
+  void set_size(size_t sz) {
     LP64_ONLY(if (UseCompressedOops) set_mark(markOopDesc::set_size_and_free(sz)); else )
     _size = sz;
   }
 
   FreeChunk* next()   const { return _next; }
 
-  void linkAfter(FreeChunk* ptr) {
-    linkNext(ptr);
-    if (ptr != NULL) ptr->linkPrev(this);
+  void link_after(FreeChunk* ptr) {
+    link_next(ptr);
+    if (ptr != NULL) ptr->link_prev(this);
   }
-  void linkNext(FreeChunk* ptr) { _next = ptr; }
-  void linkPrev(FreeChunk* ptr) {
+  void link_next(FreeChunk* ptr) { _next = ptr; }
+  void link_prev(FreeChunk* ptr) {
     LP64_ONLY(if (UseCompressedOops) _prev = ptr; else)
     _prev = (FreeChunk*)((intptr_t)ptr | 0x1);
   }
-  void clearNext()              { _next = NULL; }
+  void clear_next()              { _next = NULL; }
   void markNotFree() {
     // Set _prev (klass) to null before (if) clearing the mark word below
     _prev = NULL;
@@ -129,7 +129,7 @@
       set_mark(markOopDesc::prototype());
     }
 #endif
-    assert(!isFree(), "Error");
+    assert(!is_free(), "Error");
   }
 
   // Return the address past the end of this chunk
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,360 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp"
-#include "gc_implementation/concurrentMarkSweep/freeList.hpp"
-#include "memory/sharedHeap.hpp"
-#include "runtime/globals.hpp"
-#include "runtime/mutex.hpp"
-#include "runtime/vmThread.hpp"
-
-// Free list.  A FreeList is used to access a linked list of chunks
-// of space in the heap.  The head and tail are maintained so that
-// items can be (as in the current implementation) added at the
-// at the tail of the list and removed from the head of the list to
-// maintain a FIFO queue.
-
-FreeList::FreeList() :
-  _head(NULL), _tail(NULL)
-#ifdef ASSERT
-  , _protecting_lock(NULL)
-#endif
-{
-  _size         = 0;
-  _count        = 0;
-  _hint         = 0;
-  init_statistics();
-}
-
-FreeList::FreeList(FreeChunk* fc) :
-  _head(fc), _tail(fc)
-#ifdef ASSERT
-  , _protecting_lock(NULL)
-#endif
-{
-  _size         = fc->size();
-  _count        = 1;
-  _hint         = 0;
-  init_statistics();
-#ifndef PRODUCT
-  _allocation_stats.set_returnedBytes(size() * HeapWordSize);
-#endif
-}
-
-FreeList::FreeList(HeapWord* addr, size_t size) :
-  _head((FreeChunk*) addr), _tail((FreeChunk*) addr)
-#ifdef ASSERT
-  , _protecting_lock(NULL)
-#endif
-{
-  assert(size > sizeof(FreeChunk), "size is too small");
-  head()->setSize(size);
-  _size         = size;
-  _count        = 1;
-  init_statistics();
-#ifndef PRODUCT
-  _allocation_stats.set_returnedBytes(_size * HeapWordSize);
-#endif
-}
-
-void FreeList::reset(size_t hint) {
-  set_count(0);
-  set_head(NULL);
-  set_tail(NULL);
-  set_hint(hint);
-}
-
-void FreeList::init_statistics(bool split_birth) {
-  _allocation_stats.initialize(split_birth);
-}
-
-FreeChunk* FreeList::getChunkAtHead() {
-  assert_proper_lock_protection();
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-  FreeChunk* fc = head();
-  if (fc != NULL) {
-    FreeChunk* nextFC = fc->next();
-    if (nextFC != NULL) {
-      // The chunk fc being removed has a "next".  Set the "next" to the
-      // "prev" of fc.
-      nextFC->linkPrev(NULL);
-    } else { // removed tail of list
-      link_tail(NULL);
-    }
-    link_head(nextFC);
-    decrement_count();
-  }
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-  return fc;
-}
-
-
-void FreeList::getFirstNChunksFromList(size_t n, FreeList* fl) {
-  assert_proper_lock_protection();
-  assert(fl->count() == 0, "Precondition");
-  if (count() > 0) {
-    int k = 1;
-    fl->set_head(head()); n--;
-    FreeChunk* tl = head();
-    while (tl->next() != NULL && n > 0) {
-      tl = tl->next(); n--; k++;
-    }
-    assert(tl != NULL, "Loop Inv.");
-
-    // First, fix up the list we took from.
-    FreeChunk* new_head = tl->next();
-    set_head(new_head);
-    set_count(count() - k);
-    if (new_head == NULL) {
-      set_tail(NULL);
-    } else {
-      new_head->linkPrev(NULL);
-    }
-    // Now we can fix up the tail.
-    tl->linkNext(NULL);
-    // And return the result.
-    fl->set_tail(tl);
-    fl->set_count(k);
-  }
-}
-
-// Remove this chunk from the list
-void FreeList::removeChunk(FreeChunk*fc) {
-   assert_proper_lock_protection();
-   assert(head() != NULL, "Remove from empty list");
-   assert(fc != NULL, "Remove a NULL chunk");
-   assert(size() == fc->size(), "Wrong list");
-   assert(head() == NULL || head()->prev() == NULL, "list invariant");
-   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-
-   FreeChunk* prevFC = fc->prev();
-   FreeChunk* nextFC = fc->next();
-   if (nextFC != NULL) {
-     // The chunk fc being removed has a "next".  Set the "next" to the
-     // "prev" of fc.
-     nextFC->linkPrev(prevFC);
-   } else { // removed tail of list
-     link_tail(prevFC);
-   }
-   if (prevFC == NULL) { // removed head of list
-     link_head(nextFC);
-     assert(nextFC == NULL || nextFC->prev() == NULL,
-       "Prev of head should be NULL");
-   } else {
-     prevFC->linkNext(nextFC);
-     assert(tail() != prevFC || prevFC->next() == NULL,
-       "Next of tail should be NULL");
-   }
-   decrement_count();
-   assert(((head() == NULL) + (tail() == NULL) + (count() == 0)) % 3 == 0,
-          "H/T/C Inconsistency");
-   // clear next and prev fields of fc, debug only
-   NOT_PRODUCT(
-     fc->linkPrev(NULL);
-     fc->linkNext(NULL);
-   )
-   assert(fc->isFree(), "Should still be a free chunk");
-   assert(head() == NULL || head()->prev() == NULL, "list invariant");
-   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-   assert(head() == NULL || head()->size() == size(), "wrong item on list");
-   assert(tail() == NULL || tail()->size() == size(), "wrong item on list");
-}
-
-// Add this chunk at the head of the list.
-void FreeList::returnChunkAtHead(FreeChunk* chunk, bool record_return) {
-  assert_proper_lock_protection();
-  assert(chunk != NULL, "insert a NULL chunk");
-  assert(size() == chunk->size(), "Wrong size");
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-
-  FreeChunk* oldHead = head();
-  assert(chunk != oldHead, "double insertion");
-  chunk->linkAfter(oldHead);
-  link_head(chunk);
-  if (oldHead == NULL) { // only chunk in list
-    assert(tail() == NULL, "inconsistent FreeList");
-    link_tail(chunk);
-  }
-  increment_count(); // of # of chunks in list
-  DEBUG_ONLY(
-    if (record_return) {
-      increment_returnedBytes_by(size()*HeapWordSize);
-    }
-  )
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-  assert(head() == NULL || head()->size() == size(), "wrong item on list");
-  assert(tail() == NULL || tail()->size() == size(), "wrong item on list");
-}
-
-void FreeList::returnChunkAtHead(FreeChunk* chunk) {
-  assert_proper_lock_protection();
-  returnChunkAtHead(chunk, true);
-}
-
-// Add this chunk at the tail of the list.
-void FreeList::returnChunkAtTail(FreeChunk* chunk, bool record_return) {
-  assert_proper_lock_protection();
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-  assert(chunk != NULL, "insert a NULL chunk");
-  assert(size() == chunk->size(), "wrong size");
-
-  FreeChunk* oldTail = tail();
-  assert(chunk != oldTail, "double insertion");
-  if (oldTail != NULL) {
-    oldTail->linkAfter(chunk);
-  } else { // only chunk in list
-    assert(head() == NULL, "inconsistent FreeList");
-    link_head(chunk);
-  }
-  link_tail(chunk);
-  increment_count();  // of # of chunks in list
-  DEBUG_ONLY(
-    if (record_return) {
-      increment_returnedBytes_by(size()*HeapWordSize);
-    }
-  )
-  assert(head() == NULL || head()->prev() == NULL, "list invariant");
-  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-  assert(head() == NULL || head()->size() == size(), "wrong item on list");
-  assert(tail() == NULL || tail()->size() == size(), "wrong item on list");
-}
-
-void FreeList::returnChunkAtTail(FreeChunk* chunk) {
-  returnChunkAtTail(chunk, true);
-}
-
-void FreeList::prepend(FreeList* fl) {
-  assert_proper_lock_protection();
-  if (fl->count() > 0) {
-    if (count() == 0) {
-      set_head(fl->head());
-      set_tail(fl->tail());
-      set_count(fl->count());
-    } else {
-      // Both are non-empty.
-      FreeChunk* fl_tail = fl->tail();
-      FreeChunk* this_head = head();
-      assert(fl_tail->next() == NULL, "Well-formedness of fl");
-      fl_tail->linkNext(this_head);
-      this_head->linkPrev(fl_tail);
-      set_head(fl->head());
-      set_count(count() + fl->count());
-    }
-    fl->set_head(NULL);
-    fl->set_tail(NULL);
-    fl->set_count(0);
-  }
-}
-
-// verifyChunkInFreeLists() is used to verify that an item is in this free list.
-// It is used as a debugging aid.
-bool FreeList::verifyChunkInFreeLists(FreeChunk* fc) const {
-  // This is an internal consistency check, not part of the check that the
-  // chunk is in the free lists.
-  guarantee(fc->size() == size(), "Wrong list is being searched");
-  FreeChunk* curFC = head();
-  while (curFC) {
-    // This is an internal consistency check.
-    guarantee(size() == curFC->size(), "Chunk is in wrong list.");
-    if (fc == curFC) {
-      return true;
-    }
-    curFC = curFC->next();
-  }
-  return false;
-}
-
-#ifndef PRODUCT
-void FreeList::verify_stats() const {
-  // The +1 of the LH comparand is to allow some "looseness" in
-  // checking: we usually call this interface when adding a block
-  // and we'll subsequently update the stats; we cannot update the
-  // stats beforehand because in the case of the large-block BT
-  // dictionary for example, this might be the first block and
-  // in that case there would be no place that we could record
-  // the stats (which are kept in the block itself).
-  assert((_allocation_stats.prevSweep() + _allocation_stats.splitBirths()
-          + _allocation_stats.coalBirths() + 1)   // Total Production Stock + 1
-         >= (_allocation_stats.splitDeaths() + _allocation_stats.coalDeaths()
-             + (ssize_t)count()),                // Total Current Stock + depletion
-         err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
-                 " violates Conservation Principle: "
-                 "prevSweep(" SIZE_FORMAT ")"
-                 " + splitBirths(" SIZE_FORMAT ")"
-                 " + coalBirths(" SIZE_FORMAT ") + 1 >= "
-                 " splitDeaths(" SIZE_FORMAT ")"
-                 " coalDeaths(" SIZE_FORMAT ")"
-                 " + count(" SSIZE_FORMAT ")",
-                 this, _size, _allocation_stats.prevSweep(), _allocation_stats.splitBirths(),
-                 _allocation_stats.splitBirths(), _allocation_stats.splitDeaths(),
-                 _allocation_stats.coalDeaths(), count()));
-}
-
-void FreeList::assert_proper_lock_protection_work() const {
-  assert(_protecting_lock != NULL, "Don't call this directly");
-  assert(ParallelGCThreads > 0, "Don't call this directly");
-  Thread* thr = Thread::current();
-  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
-    // assert that we are holding the freelist lock
-  } else if (thr->is_GC_task_thread()) {
-    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
-  } else if (thr->is_Java_thread()) {
-    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
-  } else {
-    ShouldNotReachHere();  // unaccounted thread type?
-  }
-}
-#endif
-
-// Print the "label line" for free list stats.
-void FreeList::print_labels_on(outputStream* st, const char* c) {
-  st->print("%16s\t", c);
-  st->print("%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"
-            "%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"    "\n",
-            "bfrsurp", "surplus", "desired", "prvSwep", "bfrSwep",
-            "count",   "cBirths", "cDeaths", "sBirths", "sDeaths");
-}
-
-// Print the AllocationStats for the given free list. If the second argument
-// to the call is a non-null string, it is printed in the first column;
-// otherwise, if the argument is null (the default), then the size of the
-// (free list) block is printed in the first column.
-void FreeList::print_on(outputStream* st, const char* c) const {
-  if (c != NULL) {
-    st->print("%16s", c);
-  } else {
-    st->print(SIZE_FORMAT_W(16), size());
-  }
-  st->print("\t"
-           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
-           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
-           bfrSurp(),             surplus(),             desired(),             prevSweep(),           beforeSweep(),
-           count(),               coalBirths(),          coalDeaths(),          splitBirths(),         splitDeaths());
-}
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,335 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_FREELIST_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_FREELIST_HPP
-
-#include "gc_implementation/shared/allocationStats.hpp"
-
-class CompactibleFreeListSpace;
-
-// A class for maintaining a free list of FreeChunk's.  The FreeList
-// maintains a the structure of the list (head, tail, etc.) plus
-// statistics for allocations from the list.  The links between items
-// are not part of FreeList.  The statistics are
-// used to make decisions about coalescing FreeChunk's when they
-// are swept during collection.
-//
-// See the corresponding .cpp file for a description of the specifics
-// for that implementation.
-
-class Mutex;
-class TreeList;
-
-class FreeList VALUE_OBJ_CLASS_SPEC {
-  friend class CompactibleFreeListSpace;
-  friend class VMStructs;
-  friend class PrintTreeCensusClosure;
-
- protected:
-  TreeList* _parent;
-  TreeList* _left;
-  TreeList* _right;
-
- private:
-  FreeChunk*    _head;          // Head of list of free chunks
-  FreeChunk*    _tail;          // Tail of list of free chunks
-  size_t        _size;          // Size in Heap words of each chunk
-  ssize_t       _count;         // Number of entries in list
-  size_t        _hint;          // next larger size list with a positive surplus
-
-  AllocationStats _allocation_stats; // allocation-related statistics
-
-#ifdef ASSERT
-  Mutex*        _protecting_lock;
-#endif
-
-  // Asserts false if the protecting lock (if any) is not held.
-  void assert_proper_lock_protection_work() const PRODUCT_RETURN;
-  void assert_proper_lock_protection() const {
-#ifdef ASSERT
-    if (_protecting_lock != NULL)
-      assert_proper_lock_protection_work();
-#endif
-  }
-
-  // Initialize the allocation statistics.
- protected:
-  void init_statistics(bool split_birth = false);
-  void set_count(ssize_t v) { _count = v;}
-  void increment_count()    {
-    _count++;
-  }
-
-  void decrement_count() {
-    _count--;
-    assert(_count >= 0, "Count should not be negative");
-  }
-
- public:
-  // Constructor
-  // Construct a list without any entries.
-  FreeList();
-  // Construct a list with "fc" as the first (and lone) entry in the list.
-  FreeList(FreeChunk* fc);
-  // Construct a list which will have a FreeChunk at address "addr" and
-  // of size "size" as the first (and lone) entry in the list.
-  FreeList(HeapWord* addr, size_t size);
-
-  // Reset the head, tail, hint, and count of a free list.
-  void reset(size_t hint);
-
-  // Declare the current free list to be protected by the given lock.
-#ifdef ASSERT
-  void set_protecting_lock(Mutex* protecting_lock) {
-    _protecting_lock = protecting_lock;
-  }
-#endif
-
-  // Accessors.
-  FreeChunk* head() const {
-    assert_proper_lock_protection();
-    return _head;
-  }
-  void set_head(FreeChunk* v) {
-    assert_proper_lock_protection();
-    _head = v;
-    assert(!_head || _head->size() == _size, "bad chunk size");
-  }
-  // Set the head of the list and set the prev field of non-null
-  // values to NULL.
-  void link_head(FreeChunk* v) {
-    assert_proper_lock_protection();
-    set_head(v);
-    // If this method is not used (just set the head instead),
-    // this check can be avoided.
-    if (v != NULL) {
-      v->linkPrev(NULL);
-    }
-  }
-
-  FreeChunk* tail() const {
-    assert_proper_lock_protection();
-    return _tail;
-  }
-  void set_tail(FreeChunk* v) {
-    assert_proper_lock_protection();
-    _tail = v;
-    assert(!_tail || _tail->size() == _size, "bad chunk size");
-  }
-  // Set the tail of the list and set the next field of non-null
-  // values to NULL.
-  void link_tail(FreeChunk* v) {
-    assert_proper_lock_protection();
-    set_tail(v);
-    if (v != NULL) {
-      v->clearNext();
-    }
-  }
-
-  // No locking checks in read-accessors: lock-free reads (only) are benign.
-  // Readers are expected to have the lock if they are doing work that
-  // requires atomicity guarantees in sections of code.
-  size_t size() const {
-    return _size;
-  }
-  void set_size(size_t v) {
-    assert_proper_lock_protection();
-    _size = v;
-  }
-  ssize_t count() const {
-    return _count;
-  }
-  size_t hint() const {
-    return _hint;
-  }
-  void set_hint(size_t v) {
-    assert_proper_lock_protection();
-    assert(v == 0 || _size < v, "Bad hint"); _hint = v;
-  }
-
-  // Accessors for statistics
-  AllocationStats* allocation_stats() {
-    assert_proper_lock_protection();
-    return &_allocation_stats;
-  }
-
-  ssize_t desired() const {
-    return _allocation_stats.desired();
-  }
-  void set_desired(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_desired(v);
-  }
-  void compute_desired(float inter_sweep_current,
-                       float inter_sweep_estimate,
-                       float intra_sweep_estimate) {
-    assert_proper_lock_protection();
-    _allocation_stats.compute_desired(_count,
-                                      inter_sweep_current,
-                                      inter_sweep_estimate,
-                                      intra_sweep_estimate);
-  }
-  ssize_t coalDesired() const {
-    return _allocation_stats.coalDesired();
-  }
-  void set_coalDesired(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coalDesired(v);
-  }
-
-  ssize_t surplus() const {
-    return _allocation_stats.surplus();
-  }
-  void set_surplus(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_surplus(v);
-  }
-  void increment_surplus() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_surplus();
-  }
-  void decrement_surplus() {
-    assert_proper_lock_protection();
-    _allocation_stats.decrement_surplus();
-  }
-
-  ssize_t bfrSurp() const {
-    return _allocation_stats.bfrSurp();
-  }
-  void set_bfrSurp(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_bfrSurp(v);
-  }
-  ssize_t prevSweep() const {
-    return _allocation_stats.prevSweep();
-  }
-  void set_prevSweep(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_prevSweep(v);
-  }
-  ssize_t beforeSweep() const {
-    return _allocation_stats.beforeSweep();
-  }
-  void set_beforeSweep(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_beforeSweep(v);
-  }
-
-  ssize_t coalBirths() const {
-    return _allocation_stats.coalBirths();
-  }
-  void set_coalBirths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coalBirths(v);
-  }
-  void increment_coalBirths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_coalBirths();
-  }
-
-  ssize_t coalDeaths() const {
-    return _allocation_stats.coalDeaths();
-  }
-  void set_coalDeaths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coalDeaths(v);
-  }
-  void increment_coalDeaths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_coalDeaths();
-  }
-
-  ssize_t splitBirths() const {
-    return _allocation_stats.splitBirths();
-  }
-  void set_splitBirths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_splitBirths(v);
-  }
-  void increment_splitBirths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_splitBirths();
-  }
-
-  ssize_t splitDeaths() const {
-    return _allocation_stats.splitDeaths();
-  }
-  void set_splitDeaths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_splitDeaths(v);
-  }
-  void increment_splitDeaths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_splitDeaths();
-  }
-
-  NOT_PRODUCT(
-    // For debugging.  The "_returnedBytes" in all the lists are summed
-    // and compared with the total number of bytes swept during a
-    // collection.
-    size_t returnedBytes() const { return _allocation_stats.returnedBytes(); }
-    void set_returnedBytes(size_t v) { _allocation_stats.set_returnedBytes(v); }
-    void increment_returnedBytes_by(size_t v) {
-      _allocation_stats.set_returnedBytes(_allocation_stats.returnedBytes() + v);
-    }
-  )
-
-  // Unlink head of list and return it.  Returns NULL if
-  // the list is empty.
-  FreeChunk* getChunkAtHead();
-
-  // Remove the first "n" or "count", whichever is smaller, chunks from the
-  // list, setting "fl", which is required to be empty, to point to them.
-  void getFirstNChunksFromList(size_t n, FreeList* fl);
-
-  // Unlink this chunk from it's free list
-  void removeChunk(FreeChunk* fc);
-
-  // Add this chunk to this free list.
-  void returnChunkAtHead(FreeChunk* fc);
-  void returnChunkAtTail(FreeChunk* fc);
-
-  // Similar to returnChunk* but also records some diagnostic
-  // information.
-  void returnChunkAtHead(FreeChunk* fc, bool record_return);
-  void returnChunkAtTail(FreeChunk* fc, bool record_return);
-
-  // Prepend "fl" (whose size is required to be the same as that of "this")
-  // to the front of "this" list.
-  void prepend(FreeList* fl);
-
-  // Verify that the chunk is in the list.
-  // found.  Return NULL if "fc" is not found.
-  bool verifyChunkInFreeLists(FreeChunk* fc) const;
-
-  // Stats verification
-  void verify_stats() const PRODUCT_RETURN;
-
-  // Printing support
-  static void print_labels_on(outputStream* st, const char* c);
-  void print_on(outputStream* st, const char* c = NULL) const;
-};
-
-#endif // SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_FREELIST_HPP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -121,7 +121,7 @@
 void PromotionInfo::track(PromotedObject* trackOop, klassOop klassOfOop) {
   // make a copy of header as it may need to be spooled
   markOop mark = oop(trackOop)->mark();
-  trackOop->clearNext();
+  trackOop->clear_next();
   if (mark->must_be_preserved_for_cms_scavenge(klassOfOop)) {
     // save non-prototypical header, and mark oop
     saveDisplacedHeader(mark);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,7 +43,7 @@
   // whose position will depend on endian-ness of the platform.
   // This is so that there is no interference with the
   // cms_free_bit occupying bit position 7 (lsb == 0)
-  // when we are using compressed oops; see FreeChunk::isFree().
+  // when we are using compressed oops; see FreeChunk::is_free().
   // We cannot move the cms_free_bit down because currently
   // biased locking code assumes that age bits are contiguous
   // with the lock bits. Even if that assumption were relaxed,
@@ -65,7 +65,7 @@
   };
  public:
   inline PromotedObject* next() const {
-    assert(!((FreeChunk*)this)->isFree(), "Error");
+    assert(!((FreeChunk*)this)->is_free(), "Error");
     PromotedObject* res;
     if (UseCompressedOops) {
       // The next pointer is a compressed oop stored in the top 32 bits
@@ -85,27 +85,27 @@
     } else {
       _next |= (intptr_t)x;
     }
-    assert(!((FreeChunk*)this)->isFree(), "Error");
+    assert(!((FreeChunk*)this)->is_free(), "Error");
   }
   inline void setPromotedMark() {
     _next |= promoted_mask;
-    assert(!((FreeChunk*)this)->isFree(), "Error");
+    assert(!((FreeChunk*)this)->is_free(), "Error");
   }
   inline bool hasPromotedMark() const {
-    assert(!((FreeChunk*)this)->isFree(), "Error");
+    assert(!((FreeChunk*)this)->is_free(), "Error");
     return (_next & promoted_mask) == promoted_mask;
   }
   inline void setDisplacedMark() {
     _next |= displaced_mark;
-    assert(!((FreeChunk*)this)->isFree(), "Error");
+    assert(!((FreeChunk*)this)->is_free(), "Error");
   }
   inline bool hasDisplacedMark() const {
-    assert(!((FreeChunk*)this)->isFree(), "Error");
+    assert(!((FreeChunk*)this)->is_free(), "Error");
     return (_next & displaced_mark) != 0;
   }
-  inline void clearNext()        {
+  inline void clear_next()        {
     _next = 0;
-    assert(!((FreeChunk*)this)->isFree(), "Error");
+    assert(!((FreeChunk*)this)->is_free(), "Error");
   }
   debug_only(void *next_addr() { return (void *) &_next; })
 };
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -146,7 +146,7 @@
   VM_CMS_Operation::verify_before_gc();
 
   IsGCActiveMark x; // stop-world GC active
-  _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsInitial);
+  _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsInitial, gch->gc_cause());
 
   VM_CMS_Operation::verify_after_gc();
 #ifndef USDT2
@@ -178,7 +178,7 @@
   VM_CMS_Operation::verify_before_gc();
 
   IsGCActiveMark x; // stop-world GC active
-  _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsFinal);
+  _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsFinal, gch->gc_cause());
 
   VM_CMS_Operation::verify_after_gc();
 #ifndef USDT2
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,11 +44,11 @@
   nonstatic_field(FreeChunk,                   _next,                                         FreeChunk*)                            \
   nonstatic_field(FreeChunk,                   _prev,                                         FreeChunk*)                            \
   nonstatic_field(LinearAllocBlock,            _word_size,                                    size_t)                                \
-  nonstatic_field(FreeList,                    _size,                                         size_t)                                \
-  nonstatic_field(FreeList,                    _count,                                        ssize_t)                               \
-  nonstatic_field(BinaryTreeDictionary,        _totalSize,                                    size_t)                                \
-  nonstatic_field(CompactibleFreeListSpace,    _dictionary,                                   FreeBlockDictionary*)                  \
-  nonstatic_field(CompactibleFreeListSpace,    _indexedFreeList[0],                           FreeList)                              \
+  nonstatic_field(FreeList<FreeChunk>,         _size,                                         size_t)                                \
+  nonstatic_field(FreeList<FreeChunk>,         _count,                                        ssize_t)                               \
+  nonstatic_field(BinaryTreeDictionary<FreeChunk>,_total_size,                                 size_t)                                \
+  nonstatic_field(CompactibleFreeListSpace,    _dictionary,                                   FreeBlockDictionary<FreeChunk>*)       \
+  nonstatic_field(CompactibleFreeListSpace,    _indexedFreeList[0],                           FreeList<FreeChunk>)                   \
   nonstatic_field(CompactibleFreeListSpace,    _smallLinearAllocBlock,                        LinearAllocBlock)
 
 
@@ -70,13 +70,13 @@
   declare_toplevel_type(CompactibleFreeListSpace*)                        \
   declare_toplevel_type(CMSCollector*)                                    \
   declare_toplevel_type(FreeChunk*)                                       \
-  declare_toplevel_type(BinaryTreeDictionary*)                            \
-  declare_toplevel_type(FreeBlockDictionary*)                             \
-  declare_toplevel_type(FreeList*)                                        \
-  declare_toplevel_type(FreeList)                                         \
+  declare_toplevel_type(BinaryTreeDictionary<FreeChunk>*)                 \
+  declare_toplevel_type(FreeBlockDictionary<FreeChunk>*)                  \
+  declare_toplevel_type(FreeList<FreeChunk>*)                             \
+  declare_toplevel_type(FreeList<FreeChunk>)                              \
   declare_toplevel_type(LinearAllocBlock)                                 \
-  declare_toplevel_type(FreeBlockDictionary)                              \
-            declare_type(BinaryTreeDictionary,        FreeBlockDictionary)
+  declare_toplevel_type(FreeBlockDictionary<FreeChunk>)                   \
+            declare_type(BinaryTreeDictionary<FreeChunk>, FreeBlockDictionary<FreeChunk>)
 
 #define VM_INT_CONSTANTS_CMS(declare_constant)                            \
   declare_constant(Generation::ConcurrentMarkSweep)                       \
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,102 +29,6 @@
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "memory/space.inline.hpp"
 
-CSetChooserCache::CSetChooserCache() {
-  for (int i = 0; i < CacheLength; ++i)
-    _cache[i] = NULL;
-  clear();
-}
-
-void CSetChooserCache::clear() {
-  _occupancy = 0;
-  _first = 0;
-  for (int i = 0; i < CacheLength; ++i) {
-    HeapRegion *hr = _cache[i];
-    if (hr != NULL)
-      hr->set_sort_index(-1);
-    _cache[i] = NULL;
-  }
-}
-
-#ifndef PRODUCT
-bool CSetChooserCache::verify() {
-  guarantee(false, "CSetChooserCache::verify(): don't call this any more");
-
-  int index = _first;
-  HeapRegion *prev = NULL;
-  for (int i = 0; i < _occupancy; ++i) {
-    guarantee(_cache[index] != NULL, "cache entry should not be empty");
-    HeapRegion *hr = _cache[index];
-    guarantee(!hr->is_young(), "should not be young!");
-    if (prev != NULL) {
-      guarantee(prev->gc_efficiency() >= hr->gc_efficiency(),
-                "cache should be correctly ordered");
-    }
-    guarantee(hr->sort_index() == get_sort_index(index),
-              "sort index should be correct");
-    index = trim_index(index + 1);
-    prev = hr;
-  }
-
-  for (int i = 0; i < (CacheLength - _occupancy); ++i) {
-    guarantee(_cache[index] == NULL, "cache entry should be empty");
-    index = trim_index(index + 1);
-  }
-
-  guarantee(index == _first, "we should have reached where we started from");
-  return true;
-}
-#endif // PRODUCT
-
-void CSetChooserCache::insert(HeapRegion *hr) {
-  guarantee(false, "CSetChooserCache::insert(): don't call this any more");
-
-  assert(!is_full(), "cache should not be empty");
-  hr->calc_gc_efficiency();
-
-  int empty_index;
-  if (_occupancy == 0) {
-    empty_index = _first;
-  } else {
-    empty_index = trim_index(_first + _occupancy);
-    assert(_cache[empty_index] == NULL, "last slot should be empty");
-    int last_index = trim_index(empty_index - 1);
-    HeapRegion *last = _cache[last_index];
-    assert(last != NULL,"as the cache is not empty, last should not be empty");
-    while (empty_index != _first &&
-           last->gc_efficiency() < hr->gc_efficiency()) {
-      _cache[empty_index] = last;
-      last->set_sort_index(get_sort_index(empty_index));
-      empty_index = last_index;
-      last_index = trim_index(last_index - 1);
-      last = _cache[last_index];
-    }
-  }
-  _cache[empty_index] = hr;
-  hr->set_sort_index(get_sort_index(empty_index));
-
-  ++_occupancy;
-  assert(verify(), "cache should be consistent");
-}
-
-HeapRegion *CSetChooserCache::remove_first() {
-  guarantee(false, "CSetChooserCache::remove_first(): "
-                   "don't call this any more");
-
-  if (_occupancy > 0) {
-    assert(_cache[_first] != NULL, "cache should have at least one region");
-    HeapRegion *ret = _cache[_first];
-    _cache[_first] = NULL;
-    ret->set_sort_index(-1);
-    --_occupancy;
-    _first = trim_index(_first + 1);
-    assert(verify(), "cache should be consistent");
-    return ret;
-  } else {
-    return NULL;
-  }
-}
-
 // Even though we don't use the GC efficiency in our heuristics as
 // much as we used to, we still order according to GC efficiency. This
 // will cause regions with a lot of live objects and large RSets to
@@ -134,7 +38,7 @@
 // the ones we'll skip are ones with both large RSets and a lot of
 // live objects, not the ones with just a lot of live objects if we
 // ordered according to the amount of reclaimable bytes per region.
-static int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
+static int order_regions(HeapRegion* hr1, HeapRegion* hr2) {
   if (hr1 == NULL) {
     if (hr2 == NULL) {
       return 0;
@@ -156,8 +60,8 @@
   }
 }
 
-static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
-  return orderRegions(*hr1p, *hr2p);
+static int order_regions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  return order_regions(*hr1p, *hr2p);
 }
 
 CollectionSetChooser::CollectionSetChooser() :
@@ -175,105 +79,74 @@
   //
   // Note: containing object is allocated on C heap since it is CHeapObj.
   //
-  _markedRegions((ResourceObj::set_allocation_type((address)&_markedRegions,
+  _regions((ResourceObj::set_allocation_type((address) &_regions,
                                              ResourceObj::C_HEAP),
                   100), true /* C_Heap */),
-    _curr_index(0), _length(0),
-    _regionLiveThresholdBytes(0), _remainingReclaimableBytes(0),
-    _first_par_unreserved_idx(0) {
-  _regionLiveThresholdBytes =
+    _curr_index(0), _length(0), _first_par_unreserved_idx(0),
+    _region_live_threshold_bytes(0), _remaining_reclaimable_bytes(0) {
+  _region_live_threshold_bytes =
     HeapRegion::GrainBytes * (size_t) G1OldCSetRegionLiveThresholdPercent / 100;
 }
 
 #ifndef PRODUCT
-bool CollectionSetChooser::verify() {
-  guarantee(_length >= 0, err_msg("_length: %d", _length));
-  guarantee(0 <= _curr_index && _curr_index <= _length,
-            err_msg("_curr_index: %d _length: %d", _curr_index, _length));
-  int index = 0;
+void CollectionSetChooser::verify() {
+  guarantee(_length <= regions_length(),
+         err_msg("_length: %u regions length: %u", _length, regions_length()));
+  guarantee(_curr_index <= _length,
+            err_msg("_curr_index: %u _length: %u", _curr_index, _length));
+  uint index = 0;
   size_t sum_of_reclaimable_bytes = 0;
   while (index < _curr_index) {
-    guarantee(_markedRegions.at(index) == NULL,
+    guarantee(regions_at(index) == NULL,
               "all entries before _curr_index should be NULL");
     index += 1;
   }
   HeapRegion *prev = NULL;
   while (index < _length) {
-    HeapRegion *curr = _markedRegions.at(index++);
-    guarantee(curr != NULL, "Regions in _markedRegions array cannot be NULL");
-    int si = curr->sort_index();
+    HeapRegion *curr = regions_at(index++);
+    guarantee(curr != NULL, "Regions in _regions array cannot be NULL");
     guarantee(!curr->is_young(), "should not be young!");
     guarantee(!curr->isHumongous(), "should not be humongous!");
-    guarantee(si > -1 && si == (index-1), "sort index invariant");
     if (prev != NULL) {
-      guarantee(orderRegions(prev, curr) != 1,
+      guarantee(order_regions(prev, curr) != 1,
                 err_msg("GC eff prev: %1.4f GC eff curr: %1.4f",
                         prev->gc_efficiency(), curr->gc_efficiency()));
     }
     sum_of_reclaimable_bytes += curr->reclaimable_bytes();
     prev = curr;
   }
-  guarantee(sum_of_reclaimable_bytes == _remainingReclaimableBytes,
+  guarantee(sum_of_reclaimable_bytes == _remaining_reclaimable_bytes,
             err_msg("reclaimable bytes inconsistent, "
                     "remaining: "SIZE_FORMAT" sum: "SIZE_FORMAT,
-                    _remainingReclaimableBytes, sum_of_reclaimable_bytes));
-  return true;
+                    _remaining_reclaimable_bytes, sum_of_reclaimable_bytes));
 }
-#endif
-
-void CollectionSetChooser::fillCache() {
-  guarantee(false, "fillCache: don't call this any more");
+#endif // !PRODUCT
 
-  while (!_cache.is_full() && (_curr_index < _length)) {
-    HeapRegion* hr = _markedRegions.at(_curr_index);
-    assert(hr != NULL,
-           err_msg("Unexpected NULL hr in _markedRegions at index %d",
-                   _curr_index));
-    _curr_index += 1;
-    assert(!hr->is_young(), "should not be young!");
-    assert(hr->sort_index() == _curr_index-1, "sort_index invariant");
-    _markedRegions.at_put(hr->sort_index(), NULL);
-    _cache.insert(hr);
-    assert(!_cache.is_empty(), "cache should not be empty");
-  }
-  assert(verify(), "cache should be consistent");
-}
-
-void CollectionSetChooser::sortMarkedHeapRegions() {
+void CollectionSetChooser::sort_regions() {
   // First trim any unused portion of the top in the parallel case.
   if (_first_par_unreserved_idx > 0) {
-    if (G1PrintParCleanupStats) {
-      gclog_or_tty->print("     Truncating _markedRegions from %d to %d.\n",
-                          _markedRegions.length(), _first_par_unreserved_idx);
-    }
-    assert(_first_par_unreserved_idx <= _markedRegions.length(),
+    assert(_first_par_unreserved_idx <= regions_length(),
            "Or we didn't reserved enough length");
-    _markedRegions.trunc_to(_first_par_unreserved_idx);
+    regions_trunc_to(_first_par_unreserved_idx);
   }
-  _markedRegions.sort(orderRegions);
-  assert(_length <= _markedRegions.length(), "Requirement");
-  assert(_length == 0 || _markedRegions.at(_length - 1) != NULL,
-         "Testing _length");
-  assert(_length == _markedRegions.length() ||
-                        _markedRegions.at(_length) == NULL, "Testing _length");
-  if (G1PrintParCleanupStats) {
-    gclog_or_tty->print_cr("     Sorted %d marked regions.", _length);
+  _regions.sort(order_regions);
+  assert(_length <= regions_length(), "Requirement");
+#ifdef ASSERT
+  for (uint i = 0; i < _length; i++) {
+    assert(regions_at(i) != NULL, "Should be true by sorting!");
   }
-  for (int i = 0; i < _length; i++) {
-    assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
-    _markedRegions.at(i)->set_sort_index(i);
-  }
+#endif // ASSERT
   if (G1PrintRegionLivenessInfo) {
     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Sorting");
-    for (int i = 0; i < _length; ++i) {
-      HeapRegion* r = _markedRegions.at(i);
+    for (uint i = 0; i < _length; ++i) {
+      HeapRegion* r = regions_at(i);
       cl.doHeapRegion(r);
     }
   }
-  assert(verify(), "CSet chooser verification");
+  verify();
 }
 
-size_t CollectionSetChooser::calcMinOldCSetLength() {
+uint CollectionSetChooser::calc_min_old_cset_length() {
   // The min old CSet region bound is based on the maximum desired
   // number of mixed GCs after a cycle. I.e., even if some old regions
   // look expensive, we should add them to the CSet anyway to make
@@ -285,16 +158,16 @@
   // that the result is the same during all mixed GCs that follow a cycle.
 
   const size_t region_num = (size_t) _length;
-  const size_t gc_num = (size_t) G1MaxMixedGCNum;
+  const size_t gc_num = (size_t) G1MixedGCCountTarget;
   size_t result = region_num / gc_num;
   // emulate ceiling
   if (result * gc_num < region_num) {
     result += 1;
   }
-  return result;
+  return (uint) result;
 }
 
-size_t CollectionSetChooser::calcMaxOldCSetLength() {
+uint CollectionSetChooser::calc_max_old_cset_length() {
   // The max old CSet region bound is based on the threshold expressed
   // as a percentage of the heap size. I.e., it should bound the
   // number of old regions added to the CSet irrespective of how many
@@ -308,23 +181,23 @@
   if (100 * result < region_num * perc) {
     result += 1;
   }
-  return result;
+  return (uint) result;
 }
 
-void CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
+void CollectionSetChooser::add_region(HeapRegion* hr) {
   assert(!hr->isHumongous(),
          "Humongous regions shouldn't be added to the collection set");
   assert(!hr->is_young(), "should not be young!");
-  _markedRegions.append(hr);
+  _regions.append(hr);
   _length++;
-  _remainingReclaimableBytes += hr->reclaimable_bytes();
+  _remaining_reclaimable_bytes += hr->reclaimable_bytes();
   hr->calc_gc_efficiency();
 }
 
-void CollectionSetChooser::prepareForAddMarkedHeapRegionsPar(size_t n_regions,
-                                                             size_t chunkSize) {
+void CollectionSetChooser::prepare_for_par_region_addition(uint n_regions,
+                                                           uint chunk_size) {
   _first_par_unreserved_idx = 0;
-  int n_threads = ParallelGCThreads;
+  uint n_threads = (uint) ParallelGCThreads;
   if (UseDynamicNumberOfGCThreads) {
     assert(G1CollectedHeap::heap()->workers()->active_workers() > 0,
       "Should have been set earlier");
@@ -335,57 +208,46 @@
     n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
                      1U);
   }
-  size_t max_waste = n_threads * chunkSize;
-  // it should be aligned with respect to chunkSize
-  size_t aligned_n_regions =
-                     (n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
-  assert( aligned_n_regions % chunkSize == 0, "should be aligned" );
-  _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
+  uint max_waste = n_threads * chunk_size;
+  // it should be aligned with respect to chunk_size
+  uint aligned_n_regions = (n_regions + chunk_size - 1) / chunk_size * chunk_size;
+  assert(aligned_n_regions % chunk_size == 0, "should be aligned");
+  regions_at_put_grow(aligned_n_regions + max_waste - 1, NULL);
 }
 
-jint CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
-  // Don't do this assert because this can be called at a point
-  // where the loop up stream will not execute again but might
-  // try to claim more chunks (loop test has not been done yet).
-  // assert(_markedRegions.length() > _first_par_unreserved_idx,
-  //  "Striding beyond the marked regions");
-  jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
-  assert(_markedRegions.length() > res + n_regions - 1,
+uint CollectionSetChooser::claim_array_chunk(uint chunk_size) {
+  uint res = (uint) Atomic::add((jint) chunk_size,
+                                (volatile jint*) &_first_par_unreserved_idx);
+  assert(regions_length() > res + chunk_size - 1,
          "Should already have been expanded");
-  return res - n_regions;
+  return res - chunk_size;
 }
 
-void CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
-  assert(_markedRegions.at(index) == NULL, "precondition");
+void CollectionSetChooser::set_region(uint index, HeapRegion* hr) {
+  assert(regions_at(index) == NULL, "precondition");
   assert(!hr->is_young(), "should not be young!");
-  _markedRegions.at_put(index, hr);
+  regions_at_put(index, hr);
   hr->calc_gc_efficiency();
 }
 
-void CollectionSetChooser::updateTotals(jint region_num,
-                                        size_t reclaimable_bytes) {
+void CollectionSetChooser::update_totals(uint region_num,
+                                         size_t reclaimable_bytes) {
   // Only take the lock if we actually need to update the totals.
   if (region_num > 0) {
     assert(reclaimable_bytes > 0, "invariant");
     // We could have just used atomics instead of taking the
     // lock. However, we currently don't have an atomic add for size_t.
     MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-    _length += (int) region_num;
-    _remainingReclaimableBytes += reclaimable_bytes;
+    _length += region_num;
+    _remaining_reclaimable_bytes += reclaimable_bytes;
   } else {
     assert(reclaimable_bytes == 0, "invariant");
   }
 }
 
-void CollectionSetChooser::clearMarkedHeapRegions() {
-  for (int i = 0; i < _markedRegions.length(); i++) {
-    HeapRegion* r = _markedRegions.at(i);
-    if (r != NULL) {
-      r->set_sort_index(-1);
-    }
-  }
-  _markedRegions.clear();
+void CollectionSetChooser::clear() {
+  _regions.clear();
   _curr_index = 0;
   _length = 0;
-  _remainingReclaimableBytes = 0;
+  _remaining_reclaimable_bytes = 0;
 };
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -28,77 +28,42 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "utilities/growableArray.hpp"
 
-class CSetChooserCache VALUE_OBJ_CLASS_SPEC {
-private:
-  enum {
-    CacheLength = 16
-  } PrivateConstants;
+class CollectionSetChooser: public CHeapObj<mtGC> {
 
-  HeapRegion*  _cache[CacheLength];
-  int          _occupancy; // number of regions in cache
-  int          _first;     // (index of) "first" region in the cache
+  GrowableArray<HeapRegion*> _regions;
 
-  // adding CacheLength to deal with negative values
-  inline int trim_index(int index) {
-    return (index + CacheLength) % CacheLength;
-  }
-
-  inline int get_sort_index(int index) {
-    return -index-2;
-  }
-  inline int get_index(int sort_index) {
-    return -sort_index-2;
-  }
-
-public:
-  CSetChooserCache(void);
+  // Unfortunately, GrowableArray uses ints for length and indexes. To
+  // avoid excessive casting in the rest of the class the following
+  // wrapper methods are provided that use uints.
 
-  inline int occupancy(void) { return _occupancy; }
-  inline bool is_full()      { return _occupancy == CacheLength; }
-  inline bool is_empty()     { return _occupancy == 0; }
-
-  void clear(void);
-  void insert(HeapRegion *hr);
-  HeapRegion *remove_first(void);
-  inline HeapRegion *get_first(void) {
-    return _cache[_first];
+  uint regions_length()          { return (uint) _regions.length(); }
+  HeapRegion* regions_at(uint i) { return _regions.at((int) i);     }
+  void regions_at_put(uint i, HeapRegion* hr) {
+    _regions.at_put((int) i, hr);
   }
-
-#ifndef PRODUCT
-  bool verify (void);
-  bool region_in_cache(HeapRegion *hr) {
-    int sort_index = hr->sort_index();
-    if (sort_index < -1) {
-      int index = get_index(sort_index);
-      guarantee(index < CacheLength, "should be within bounds");
-      return _cache[index] == hr;
-    } else
-      return 0;
+  void regions_at_put_grow(uint i, HeapRegion* hr) {
+    _regions.at_put_grow((int) i, hr);
   }
-#endif // PRODUCT
-};
-
-class CollectionSetChooser: public CHeapObj {
-
-  GrowableArray<HeapRegion*> _markedRegions;
+  void regions_trunc_to(uint i)  { _regions.trunc_to((uint) i); }
 
   // The index of the next candidate old region to be considered for
   // addition to the CSet.
-  int _curr_index;
+  uint _curr_index;
 
   // The number of candidate old regions added to the CSet chooser.
-  int _length;
+  uint _length;
 
-  CSetChooserCache _cache;
-  jint _first_par_unreserved_idx;
+  // Keeps track of the start of the next array chunk to be claimed by
+  // parallel GC workers.
+  uint _first_par_unreserved_idx;
 
   // If a region has more live bytes than this threshold, it will not
   // be added to the CSet chooser and will not be a candidate for
   // collection.
-  size_t _regionLiveThresholdBytes;
+  size_t _region_live_threshold_bytes;
 
   // The sum of reclaimable bytes over all the regions in the CSet chooser.
-  size_t _remainingReclaimableBytes;
+  size_t _remaining_reclaimable_bytes;
 
 public:
 
@@ -107,9 +72,9 @@
   HeapRegion* peek() {
     HeapRegion* res = NULL;
     if (_curr_index < _length) {
-      res = _markedRegions.at(_curr_index);
+      res = regions_at(_curr_index);
       assert(res != NULL,
-             err_msg("Unexpected NULL hr in _markedRegions at index %d",
+             err_msg("Unexpected NULL hr in _regions at index %u",
                      _curr_index));
     }
     return res;
@@ -121,90 +86,114 @@
   void remove_and_move_to_next(HeapRegion* hr) {
     assert(hr != NULL, "pre-condition");
     assert(_curr_index < _length, "pre-condition");
-    assert(_markedRegions.at(_curr_index) == hr, "pre-condition");
-    hr->set_sort_index(-1);
-    _markedRegions.at_put(_curr_index, NULL);
-    assert(hr->reclaimable_bytes() <= _remainingReclaimableBytes,
+    assert(regions_at(_curr_index) == hr, "pre-condition");
+    regions_at_put(_curr_index, NULL);
+    assert(hr->reclaimable_bytes() <= _remaining_reclaimable_bytes,
            err_msg("remaining reclaimable bytes inconsistent "
                    "from region: "SIZE_FORMAT" remaining: "SIZE_FORMAT,
-                   hr->reclaimable_bytes(), _remainingReclaimableBytes));
-    _remainingReclaimableBytes -= hr->reclaimable_bytes();
+                   hr->reclaimable_bytes(), _remaining_reclaimable_bytes));
+    _remaining_reclaimable_bytes -= hr->reclaimable_bytes();
     _curr_index += 1;
   }
 
   CollectionSetChooser();
 
-  void sortMarkedHeapRegions();
-  void fillCache();
+  void sort_regions();
 
   // Determine whether to add the given region to the CSet chooser or
   // not. Currently, we skip humongous regions (we never add them to
   // the CSet, we only reclaim them during cleanup) and regions whose
   // live bytes are over the threshold.
-  bool shouldAdd(HeapRegion* hr) {
+  bool should_add(HeapRegion* hr) {
     assert(hr->is_marked(), "pre-condition");
     assert(!hr->is_young(), "should never consider young regions");
     return !hr->isHumongous() &&
-            hr->live_bytes() < _regionLiveThresholdBytes;
+            hr->live_bytes() < _region_live_threshold_bytes;
   }
 
   // Calculate the minimum number of old regions we'll add to the CSet
   // during a mixed GC.
-  size_t calcMinOldCSetLength();
+  uint calc_min_old_cset_length();
 
   // Calculate the maximum number of old regions we'll add to the CSet
   // during a mixed GC.
-  size_t calcMaxOldCSetLength();
+  uint calc_max_old_cset_length();
 
   // Serial version.
-  void addMarkedHeapRegion(HeapRegion *hr);
+  void add_region(HeapRegion *hr);
 
-  // Must be called before calls to getParMarkedHeapRegionChunk.
-  // "n_regions" is the number of regions, "chunkSize" the chunk size.
-  void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize);
-  // Returns the first index in a contiguous chunk of "n_regions" indexes
+  // Must be called before calls to claim_array_chunk().
+  // n_regions is the number of regions, chunk_size the chunk size.
+  void prepare_for_par_region_addition(uint n_regions, uint chunk_size);
+  // Returns the first index in a contiguous chunk of chunk_size indexes
   // that the calling thread has reserved.  These must be set by the
-  // calling thread using "setMarkedHeapRegion" (to NULL if necessary).
-  jint getParMarkedHeapRegionChunk(jint n_regions);
+  // calling thread using set_region() (to NULL if necessary).
+  uint claim_array_chunk(uint chunk_size);
   // Set the marked array entry at index to hr.  Careful to claim the index
   // first if in parallel.
-  void setMarkedHeapRegion(jint index, HeapRegion* hr);
+  void set_region(uint index, HeapRegion* hr);
   // Atomically increment the number of added regions by region_num
   // and the amount of reclaimable bytes by reclaimable_bytes.
-  void updateTotals(jint region_num, size_t reclaimable_bytes);
+  void update_totals(uint region_num, size_t reclaimable_bytes);
 
-  void clearMarkedHeapRegions();
+  void clear();
 
   // Return the number of candidate regions that remain to be collected.
-  size_t remainingRegions() { return _length - _curr_index; }
+  uint remaining_regions() { return _length - _curr_index; }
 
   // Determine whether the CSet chooser has more candidate regions or not.
-  bool isEmpty() { return remainingRegions() == 0; }
+  bool is_empty() { return remaining_regions() == 0; }
 
   // Return the reclaimable bytes that remain to be collected on
   // all the candidate regions in the CSet chooser.
-  size_t remainingReclaimableBytes () { return _remainingReclaimableBytes; }
+  size_t remaining_reclaimable_bytes() { return _remaining_reclaimable_bytes; }
 
-  // Returns true if the used portion of "_markedRegions" is properly
+  // Returns true if the used portion of "_regions" is properly
   // sorted, otherwise asserts false.
-#ifndef PRODUCT
-  bool verify(void);
-  bool regionProperlyOrdered(HeapRegion* r) {
-    int si = r->sort_index();
-    if (si > -1) {
-      guarantee(_curr_index <= si && si < _length,
-                err_msg("curr: %d sort index: %d: length: %d",
-                        _curr_index, si, _length));
-      guarantee(_markedRegions.at(si) == r,
-                err_msg("sort index: %d at: "PTR_FORMAT" r: "PTR_FORMAT,
-                        si, _markedRegions.at(si), r));
+  void verify() PRODUCT_RETURN;
+};
+
+class CSetChooserParUpdater : public StackObj {
+private:
+  CollectionSetChooser* _chooser;
+  bool _parallel;
+  uint _chunk_size;
+  uint _cur_chunk_idx;
+  uint _cur_chunk_end;
+  uint _regions_added;
+  size_t _reclaimable_bytes_added;
+
+public:
+  CSetChooserParUpdater(CollectionSetChooser* chooser,
+                        bool parallel, uint chunk_size) :
+    _chooser(chooser), _parallel(parallel), _chunk_size(chunk_size),
+    _cur_chunk_idx(0), _cur_chunk_end(0),
+    _regions_added(0), _reclaimable_bytes_added(0) { }
+
+  ~CSetChooserParUpdater() {
+    if (_parallel && _regions_added > 0) {
+      _chooser->update_totals(_regions_added, _reclaimable_bytes_added);
+    }
+  }
+
+  void add_region(HeapRegion* hr) {
+    if (_parallel) {
+      if (_cur_chunk_idx == _cur_chunk_end) {
+        _cur_chunk_idx = _chooser->claim_array_chunk(_chunk_size);
+        _cur_chunk_end = _cur_chunk_idx + _chunk_size;
+      }
+      assert(_cur_chunk_idx < _cur_chunk_end, "invariant");
+      _chooser->set_region(_cur_chunk_idx, hr);
+      _cur_chunk_idx += 1;
     } else {
-      guarantee(si == -1, err_msg("sort index: %d", si));
+      _chooser->add_region(hr);
     }
-    return true;
+    _regions_added += 1;
+    _reclaimable_bytes_added += hr->reclaimable_bytes();
   }
-#endif
 
+  bool should_add(HeapRegion* hr) { return _chooser->should_add(hr); }
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_COLLECTIONSETCHOOSER_HPP
+
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "memory/space.inline.hpp"
@@ -79,7 +80,7 @@
   _n_threads = _n_worker_threads + 1;
   reset_threshold_step();
 
-  _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
+  _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads, mtGC);
   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
   ConcurrentG1RefineThread *next = NULL;
   for (int i = _n_threads - 1; i >= 0; i--) {
@@ -157,7 +158,7 @@
     _def_use_cache = true;
     _use_cache = true;
     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
-    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
+    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);
     _n_hot = 0;
     _hot_cache_idx = 0;
 
@@ -191,18 +192,18 @@
     // Please see the comment in allocate_card_count_cache
     // for why we call os::malloc() and os::free() directly.
     assert(_card_counts != NULL, "Logic");
-    os::free(_card_counts);
+    os::free(_card_counts, mtGC);
     assert(_card_epochs != NULL, "Logic");
-    os::free(_card_epochs);
+    os::free(_card_epochs, mtGC);
 
     assert(_hot_cache != NULL, "Logic");
-    FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
+    FREE_C_HEAP_ARRAY(jbyte*, _hot_cache, mtGC);
   }
   if (_threads != NULL) {
     for (int i = 0; i < _n_threads; i++) {
       delete _threads[i];
     }
-    FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
+    FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads, mtGC);
   }
 }
 
@@ -436,17 +437,17 @@
   size_t counts_size = n * sizeof(CardCountCacheEntry);
   size_t epochs_size = n * sizeof(CardEpochCacheEntry);
 
-  *counts = (CardCountCacheEntry*) os::malloc(counts_size);
+  *counts = (CardCountCacheEntry*) os::malloc(counts_size, mtGC);
   if (*counts == NULL) {
     // allocation was unsuccessful
     return false;
   }
 
-  *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size);
+  *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size, mtGC);
   if (*epochs == NULL) {
     // allocation was unsuccessful - free counts array
     assert(*counts != NULL, "must be");
-    os::free(*counts);
+    os::free(*counts, mtGC);
     *counts = NULL;
     return false;
   }
@@ -479,8 +480,8 @@
         // Allocation was successful.
         // We can just free the old arrays; we're
         // not interested in preserving the contents
-        if (_card_counts != NULL) os::free(_card_counts);
-        if (_card_epochs != NULL) os::free(_card_epochs);
+        if (_card_counts != NULL) os::free(_card_counts, mtGC);
+        if (_card_epochs != NULL) os::free(_card_epochs, mtGC);
 
         // Cache the size of the arrays and the index that got us there.
         _n_card_counts = cache_size;
@@ -500,11 +501,11 @@
 }
 
 void ConcurrentG1Refine::clear_and_record_card_counts() {
-  if (G1ConcRSLogCacheSize == 0) return;
+  if (G1ConcRSLogCacheSize == 0) {
+    return;
+  }
 
-#ifndef PRODUCT
   double start = os::elapsedTime();
-#endif
 
   if (_expand_card_counts) {
     int new_idx = _cache_size_index + 1;
@@ -523,11 +524,8 @@
   assert((this_epoch+1) <= max_jint, "to many periods");
   // Update epoch
   _n_periods++;
-
-#ifndef PRODUCT
-  double elapsed = os::elapsedTime() - start;
-  _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
-#endif
+  double cc_clear_time_ms = (os::elapsedTime() - start) * 1000;
+  _g1h->g1_policy()->phase_times()->record_cc_clear_time_ms(cc_clear_time_ms);
 }
 
 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 class ConcurrentG1RefineThread;
 class G1RemSet;
 
-class ConcurrentG1Refine: public CHeapObj {
+class ConcurrentG1Refine: public CHeapObj<mtGC> {
   ConcurrentG1RefineThread** _threads;
   int _n_threads;
   int _n_worker_threads;
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,6 +29,7 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/heapRegion.inline.hpp"
@@ -41,6 +42,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "services/memTracker.hpp"
 
 // Concurrent marking bit map wrapper
 
@@ -52,6 +54,8 @@
   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 
+  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
+
   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
   // For now we'll just commit all of the bit map up fromt.
   // Later on we'll try to be more parsimonious with swap.
@@ -103,20 +107,10 @@
   return (int) (diff >> _shifter);
 }
 
-void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
-                                             size_t    from_start_index,
-                                             HeapWord* to_start_word,
-                                             size_t    word_num) {
-  _bm.mostly_disjoint_range_union(from_bitmap,
-                                  from_start_index,
-                                  heapWordToOffset(to_start_word),
-                                  word_num);
-}
-
 #ifndef PRODUCT
 bool CMBitMapRO::covers(ReservedSpace rs) const {
   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
-  assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
+  assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
          "size inconsistency");
   return _bmStartWord == (HeapWord*)(rs.base()) &&
          _bmWordSize  == rs.size()>>LogHeapWordSize;
@@ -170,7 +164,7 @@
 {}
 
 void CMMarkStack::allocate(size_t size) {
-  _base = NEW_C_HEAP_ARRAY(oop, size);
+  _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
   if (_base == NULL) {
     vm_exit_during_initialization("Failed to allocate CM region mark stack");
   }
@@ -182,7 +176,7 @@
 
 CMMarkStack::~CMMarkStack() {
   if (_base != NULL) {
-    FREE_C_HEAP_ARRAY(oop, _base);
+    FREE_C_HEAP_ARRAY(oop, _base, mtGC);
   }
 }
 
@@ -271,140 +265,6 @@
   }
 }
 
-CMRegionStack::CMRegionStack() : _base(NULL) {}
-
-void CMRegionStack::allocate(size_t size) {
-  _base = NEW_C_HEAP_ARRAY(MemRegion, size);
-  if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate CM region mark stack");
-  }
-  _index = 0;
-  _capacity = (jint) size;
-}
-
-CMRegionStack::~CMRegionStack() {
-  if (_base != NULL) {
-    FREE_C_HEAP_ARRAY(oop, _base);
-  }
-}
-
-void CMRegionStack::push_lock_free(MemRegion mr) {
-  guarantee(false, "push_lock_free(): don't call this any more");
-
-  assert(mr.word_size() > 0, "Precondition");
-  while (true) {
-    jint index = _index;
-
-    if (index >= _capacity) {
-      _overflow = true;
-      return;
-    }
-    // Otherwise...
-    jint next_index = index+1;
-    jint res = Atomic::cmpxchg(next_index, &_index, index);
-    if (res == index) {
-      _base[index] = mr;
-      return;
-    }
-    // Otherwise, we need to try again.
-  }
-}
-
-// Lock-free pop of the region stack. Called during the concurrent
-// marking / remark phases. Should only be called in tandem with
-// other lock-free pops.
-MemRegion CMRegionStack::pop_lock_free() {
-  guarantee(false, "pop_lock_free(): don't call this any more");
-
-  while (true) {
-    jint index = _index;
-
-    if (index == 0) {
-      return MemRegion();
-    }
-    // Otherwise...
-    jint next_index = index-1;
-    jint res = Atomic::cmpxchg(next_index, &_index, index);
-    if (res == index) {
-      MemRegion mr = _base[next_index];
-      if (mr.start() != NULL) {
-        assert(mr.end() != NULL, "invariant");
-        assert(mr.word_size() > 0, "invariant");
-        return mr;
-      } else {
-        // that entry was invalidated... let's skip it
-        assert(mr.end() == NULL, "invariant");
-      }
-    }
-    // Otherwise, we need to try again.
-  }
-}
-
-#if 0
-// The routines that manipulate the region stack with a lock are
-// not currently used. They should be retained, however, as a
-// diagnostic aid.
-
-void CMRegionStack::push_with_lock(MemRegion mr) {
-  assert(mr.word_size() > 0, "Precondition");
-  MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
-
-  if (isFull()) {
-    _overflow = true;
-    return;
-  }
-
-  _base[_index] = mr;
-  _index += 1;
-}
-
-MemRegion CMRegionStack::pop_with_lock() {
-  MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
-
-  while (true) {
-    if (_index == 0) {
-      return MemRegion();
-    }
-    _index -= 1;
-
-    MemRegion mr = _base[_index];
-    if (mr.start() != NULL) {
-      assert(mr.end() != NULL, "invariant");
-      assert(mr.word_size() > 0, "invariant");
-      return mr;
-    } else {
-      // that entry was invalidated... let's skip it
-      assert(mr.end() == NULL, "invariant");
-    }
-  }
-}
-#endif
-
-bool CMRegionStack::invalidate_entries_into_cset() {
-  guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
-
-  bool result = false;
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  for (int i = 0; i < _oops_do_bound; ++i) {
-    MemRegion mr = _base[i];
-    if (mr.start() != NULL) {
-      assert(mr.end() != NULL, "invariant");
-      assert(mr.word_size() > 0, "invariant");
-      HeapRegion* hr = g1h->heap_region_containing(mr.start());
-      assert(hr != NULL, "invariant");
-      if (hr->in_collection_set()) {
-        // The region points into the collection set
-        _base[i] = MemRegion();
-        result = true;
-      }
-    } else {
-      // that entry was invalidated... let's skip it
-      assert(mr.end() == NULL, "invariant");
-    }
-  }
-  return result;
-}
-
 template<class OopClosureClass>
 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
@@ -546,8 +406,7 @@
   return MAX2((n_par_threads + 2) / 4, 1U);
 }
 
-ConcurrentMark::ConcurrentMark(ReservedSpace rs,
-                               int max_regions) :
+ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
   _markBitMap1(rs, MinObjAlignment - 1),
   _markBitMap2(rs, MinObjAlignment - 1),
 
@@ -558,17 +417,15 @@
   _cleanup_sleep_factor(0.0),
   _cleanup_task_overhead(1.0),
   _cleanup_list("Cleanup List"),
-  _region_bm(max_regions, false /* in_resource_area*/),
+  _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
            CardTableModRefBS::card_shift,
            false /* in_resource_area*/),
 
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
-  _at_least_one_mark_complete(false),
 
   _markStack(this),
-  _regionStack(),
   // _finger set in set_non_marking_state
 
   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
@@ -582,7 +439,6 @@
   _has_aborted(false),
   _restart_for_overflow(false),
   _concurrent_marking_in_progress(false),
-  _should_gray_objects(false),
 
   // _verbose_level set below
 
@@ -611,7 +467,6 @@
   }
 
   _markStack.allocate(MarkStackSize);
-  _regionStack.allocate(G1MarkRegionStackSize);
 
   // Create & start a ConcurrentMark thread.
   _cmThread = new ConcurrentMarkThread(this);
@@ -628,11 +483,11 @@
 
   _root_regions.init(_g1h, this);
 
-  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
-  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
-
-  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num);
-  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
+  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
+
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
 
   BitMap::idx_t card_bm_size = _card_bm.size();
 
@@ -644,7 +499,7 @@
     _task_queues->register_queue(i, task_queue);
 
     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
-    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 
     _tasks[i] = new CMTask(i, this,
                            _count_marked_bytes[i],
@@ -744,15 +599,7 @@
 
 void ConcurrentMark::update_g1_committed(bool force) {
   // If concurrent marking is not in progress, then we do not need to
-  // update _heap_end. This has a subtle and important
-  // side-effect. Imagine that two evacuation pauses happen between
-  // marking completion and remark. The first one can grow the
-  // heap (hence now the finger is below the heap end). Then, the
-  // second one could unnecessarily push regions on the region
-  // stack. This causes the invariant that the region stack is empty
-  // at the beginning of remark to be false. By ensuring that we do
-  // not observe heap expansions after marking is complete, then we do
-  // not have this problem.
+  // update _heap_end.
   if (!concurrent_marking_in_progress() && !force) return;
 
   MemRegion committed = _g1h->g1_committed();
@@ -1002,7 +849,7 @@
     clear_marking_state(concurrent() /* clear_overflow */);
     force_overflow()->update();
 
-    if (PrintGC) {
+    if (G1Log::fine()) {
       gclog_or_tty->date_stamp(PrintGCDateStamps);
       gclog_or_tty->stamp(PrintGCTimeStamps);
       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
@@ -1058,86 +905,6 @@
 }
 #endif // !PRODUCT
 
-void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
-  guarantee(false, "grayRegionIfNecessary(): don't call this any more");
-
-  // The objects on the region have already been marked "in bulk" by
-  // the caller. We only need to decide whether to push the region on
-  // the region stack or not.
-
-  if (!concurrent_marking_in_progress() || !_should_gray_objects) {
-    // We're done with marking and waiting for remark. We do not need to
-    // push anything else on the region stack.
-    return;
-  }
-
-  HeapWord* finger = _finger;
-
-  if (verbose_low()) {
-    gclog_or_tty->print_cr("[global] attempting to push "
-                           "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
-                           PTR_FORMAT, mr.start(), mr.end(), finger);
-  }
-
-  if (mr.start() < finger) {
-    // The finger is always heap region aligned and it is not possible
-    // for mr to span heap regions.
-    assert(mr.end() <= finger, "invariant");
-
-    // Separated the asserts so that we know which one fires.
-    assert(mr.start() <= mr.end(),
-           "region boundaries should fall within the committed space");
-    assert(_heap_start <= mr.start(),
-           "region boundaries should fall within the committed space");
-    assert(mr.end() <= _heap_end,
-           "region boundaries should fall within the committed space");
-    if (verbose_low()) {
-      gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
-                             "below the finger, pushing it",
-                             mr.start(), mr.end());
-    }
-
-    if (!region_stack_push_lock_free(mr)) {
-      if (verbose_low()) {
-        gclog_or_tty->print_cr("[global] region stack has overflown.");
-      }
-    }
-  }
-}
-
-void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
-  guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
-
-  // The object is not marked by the caller. We need to at least mark
-  // it and maybe push in on the stack.
-
-  HeapWord* addr = (HeapWord*)p;
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    // We definitely need to mark it, irrespective whether we bail out
-    // because we're done with marking.
-    if (_nextMarkBitMap->parMark(addr)) {
-      if (!concurrent_marking_in_progress() || !_should_gray_objects) {
-        // If we're done with concurrent marking and we're waiting for
-        // remark, then we're not pushing anything on the stack.
-        return;
-      }
-
-      // No OrderAccess:store_load() is needed. It is implicit in the
-      // CAS done in parMark(addr) above
-      HeapWord* finger = _finger;
-
-      if (addr < finger) {
-        if (!mark_stack_push(oop(addr))) {
-          if (verbose_low()) {
-            gclog_or_tty->print_cr("[global] global stack overflow "
-                                   "during parMark");
-          }
-        }
-      }
-    }
-  }
-}
-
 class CMConcurrentMarkingTask: public AbstractGangTask {
 private:
   ConcurrentMark*       _cm;
@@ -1355,8 +1122,7 @@
     HandleMark hm;  // handle scope
     gclog_or_tty->print(" VerifyDuringGC:(before)");
     Universe::heap()->prepare_for_verify();
-    Universe::verify(/* allow dirty */ true,
-                     /* silent      */ false,
+    Universe::verify(/* silent      */ false,
                      /* option      */ VerifyOption_G1UsePrevMarking);
   }
 
@@ -1395,8 +1161,7 @@
       HandleMark hm;  // handle scope
       gclog_or_tty->print(" VerifyDuringGC:(after)");
       Universe::heap()->prepare_for_verify();
-      Universe::verify(/* allow dirty */ true,
-                       /* silent      */ false,
+      Universe::verify(/* silent      */ false,
                        /* option      */ VerifyOption_G1UseNextMarking);
     }
     assert(!restart_for_overflow(), "sanity");
@@ -1421,42 +1186,31 @@
   g1p->record_concurrent_mark_remark_end();
 }
 
-// Used to calculate the # live objects per region
-// for verification purposes
-class CalcLiveObjectsClosure: public HeapRegionClosure {
-
-  CMBitMapRO* _bm;
+// Base class of the closures that finalize and verify the
+// liveness counting data.
+class CMCountDataClosureBase: public HeapRegionClosure {
+protected:
   ConcurrentMark* _cm;
   BitMap* _region_bm;
   BitMap* _card_bm;
 
-  // Debugging
-  size_t _tot_words_done;
-  size_t _tot_live;
-  size_t _tot_used;
-
-  size_t _region_marked_bytes;
-
-  intptr_t _bottom_card_num;
-
-  void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
-    assert(start_card_num <= last_card_num, "sanity");
-    BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
-    BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
-
-    for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
-      _card_bm->par_at_put(i, 1);
+  void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
+    assert(start_idx <= last_idx, "sanity");
+
+    // Set the inclusive bit range [start_idx, last_idx].
+    // For small ranges (up to 8 cards) use a simple loop; otherwise
+    // use par_at_put_range.
+    if ((last_idx - start_idx) < 8) {
+      for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+        _card_bm->par_set_bit(i);
+      }
+    } else {
+      assert(last_idx < _card_bm->size(), "sanity");
+      // Note BitMap::par_at_put_range() is exclusive.
+      _card_bm->par_at_put_range(start_idx, last_idx+1, true);
     }
   }
 
-public:
-  CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
-                         BitMap* region_bm, BitMap* card_bm) :
-    _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
-    _region_marked_bytes(0), _tot_words_done(0),
-    _tot_live(0), _tot_used(0),
-    _bottom_card_num(cm->heap_bottom_card_num()) { }
-
   // It takes a region that's not empty (i.e., it has at least one
   // live object in it and sets its corresponding bit on the region
   // bitmap to 1. If the region is "starts humongous" it will also set
@@ -1465,21 +1219,36 @@
   void set_bit_for_region(HeapRegion* hr) {
     assert(!hr->continuesHumongous(), "should have filtered those out");
 
-    size_t index = hr->hrs_index();
+    BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
     if (!hr->startsHumongous()) {
       // Normal (non-humongous) case: just set the bit.
-      _region_bm->par_at_put((BitMap::idx_t) index, true);
+      _region_bm->par_at_put(index, true);
     } else {
       // Starts humongous case: calculate how many regions are part of
       // this humongous region and then set the bit range.
-      G1CollectedHeap* g1h = G1CollectedHeap::heap();
-      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
-      size_t end_index = last_hr->hrs_index() + 1;
-      _region_bm->par_at_put_range((BitMap::idx_t) index,
-                                   (BitMap::idx_t) end_index, true);
+      BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
+      _region_bm->par_at_put_range(index, end_index, true);
     }
   }
 
+public:
+  CMCountDataClosureBase(ConcurrentMark *cm,
+                         BitMap* region_bm, BitMap* card_bm):
+    _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }
+};
+
+// Closure that calculates the # live objects per region. Used
+// for verification purposes during the cleanup pause.
+class CalcLiveObjectsClosure: public CMCountDataClosureBase {
+  CMBitMapRO* _bm;
+  size_t _region_marked_bytes;
+
+public:
+  CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
+                         BitMap* region_bm, BitMap* card_bm) :
+    CMCountDataClosureBase(cm, region_bm, card_bm),
+    _bm(bm), _region_marked_bytes(0) { }
+
   bool doHeapRegion(HeapRegion* hr) {
 
     if (hr->continuesHumongous()) {
@@ -1501,73 +1270,36 @@
                    "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
                    start, nextTop, hr->end()));
 
-    // Record the number of word's we'll examine.
-    size_t words_done = (nextTop - start);
-
     // Find the first marked object at or after "start".
     start = _bm->getNextMarkedWordAddress(start, nextTop);
 
     size_t marked_bytes = 0;
 
-    // Below, the term "card num" means the result of shifting an address
-    // by the card shift -- address 0 corresponds to card number 0.  One
-    // must subtract the card num of the bottom of the heap to obtain a
-    // card table index.
-
-    // The first card num of the sequence of live cards currently being
-    // constructed.  -1 ==> no sequence.
-    intptr_t start_card_num = -1;
-
-    // The last card num of the sequence of live cards currently being
-    // constructed.  -1 ==> no sequence.
-    intptr_t last_card_num = -1;
-
     while (start < nextTop) {
       oop obj = oop(start);
       int obj_sz = obj->size();
-
-      // The card num of the start of the current object.
-      intptr_t obj_card_num =
-        intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
       HeapWord* obj_last = start + obj_sz - 1;
-      intptr_t obj_last_card_num =
-        intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
-
-      if (obj_card_num != last_card_num) {
-        if (start_card_num == -1) {
-          assert(last_card_num == -1, "Both or neither.");
-          start_card_num = obj_card_num;
-        } else {
-          assert(last_card_num != -1, "Both or neither.");
-          assert(obj_card_num >= last_card_num, "Inv");
-          if ((obj_card_num - last_card_num) > 1) {
-            // Mark the last run, and start a new one.
-            mark_card_num_range(start_card_num, last_card_num);
-            start_card_num = obj_card_num;
-          }
-        }
-      }
-      // In any case, we set the last card num.
-      last_card_num = obj_last_card_num;
-
+
+      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+      BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);
+
+      // Set the bits in the card BM for this object (inclusive).
+      set_card_bitmap_range(start_idx, last_idx);
+
+      // Add the size of this object to the number of marked bytes.
       marked_bytes += (size_t)obj_sz * HeapWordSize;
 
       // Find the next marked object after this one.
-      start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
-    }
-
-    // Handle the last range, if any.
-    if (start_card_num != -1) {
-      mark_card_num_range(start_card_num, last_card_num);
+      start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);
     }
 
     // Mark the allocated-since-marking portion...
     HeapWord* top = hr->top();
     if (nextTop < top) {
-      start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
-      last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
-
-      mark_card_num_range(start_card_num, last_card_num);
+      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);
+      BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);
+
+      set_card_bitmap_range(start_idx, last_idx);
 
       // This definitely means the region has live objects.
       set_bit_for_region(hr);
@@ -1582,19 +1314,10 @@
     // it can be queried by a calling verificiation routine
     _region_marked_bytes = marked_bytes;
 
-    _tot_live += hr->next_live_bytes();
-    _tot_used += hr->used();
-    _tot_words_done = words_done;
-
     return false;
   }
 
   size_t region_marked_bytes() const { return _region_marked_bytes; }
-
-  // Debugging
-  size_t tot_words_done() const      { return _tot_words_done; }
-  size_t tot_live() const            { return _tot_live; }
-  size_t tot_used() const            { return _tot_used; }
 };
 
 // Heap region closure used for verifying the counting data
@@ -1652,17 +1375,6 @@
     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
                     Mutex::_no_safepoint_check_flag);
 
-    // Verify that _top_at_conc_count == ntams
-    if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
-      if (_verbose) {
-        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": top at conc count incorrect: "
-                               "expected " PTR_FORMAT ", actual: " PTR_FORMAT,
-                               hr->hrs_index(), hr->next_top_at_mark_start(),
-                               hr->top_at_conc_mark_count());
-      }
-      failures += 1;
-    }
-
     // Verify the marked bytes for this region.
     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
     size_t act_marked_bytes = hr->next_marked_bytes();
@@ -1671,7 +1383,7 @@
     // we have missed accounting some objects during the actual marking.
     if (exp_marked_bytes > act_marked_bytes) {
       if (_verbose) {
-        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": marked bytes mismatch: "
+        gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
       }
@@ -1682,15 +1394,16 @@
     // (which was just calculated) region bit maps.
     // We're not OK if the bit in the calculated expected region
     // bitmap is set and the bit in the actual region bitmap is not.
-    BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
+    BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
 
     bool expected = _exp_region_bm->at(index);
     bool actual = _region_bm->at(index);
     if (expected && !actual) {
       if (_verbose) {
-        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": region bitmap mismatch: "
-                               "expected: %d, actual: %d",
-                               hr->hrs_index(), expected, actual);
+        gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
+                               "expected: %s, actual: %s",
+                               hr->hrs_index(),
+                               BOOL_TO_STR(expected), BOOL_TO_STR(actual));
       }
       failures += 1;
     }
@@ -1708,9 +1421,10 @@
 
       if (expected && !actual) {
         if (_verbose) {
-          gclog_or_tty->print_cr("Region " SIZE_FORMAT ": card bitmap mismatch at " SIZE_FORMAT ": "
-                                 "expected: %d, actual: %d",
-                                 hr->hrs_index(), i, expected, actual);
+          gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
+                                 "expected: %s, actual: %s",
+                                 hr->hrs_index(), i,
+                                 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
         }
         failures += 1;
       }
@@ -1726,7 +1440,7 @@
     _failures += failures;
 
     // We could stop iteration over the heap when we
-    // find the first voilating region by returning true.
+    // find the first violating region by returning true.
     return false;
   }
 };
@@ -1799,68 +1513,19 @@
   int failures() const { return _failures; }
 };
 
-// Final update of count data (during cleanup).
-// Adds [top_at_count, NTAMS) to the marked bytes for each
-// region. Sets the bits in the card bitmap corresponding
-// to the interval [top_at_count, top], and sets the
-// liveness bit for each region containing live data
-// in the region bitmap.
-
-class FinalCountDataUpdateClosure: public HeapRegionClosure {
-  ConcurrentMark* _cm;
-  BitMap* _region_bm;
-  BitMap* _card_bm;
-
-  size_t _total_live_bytes;
-  size_t _total_used_bytes;
-  size_t _total_words_done;
-
-  void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
-    assert(start_idx <= last_idx, "sanity");
-
-    // Set the inclusive bit range [start_idx, last_idx].
-    // For small ranges (up to 8 cards) use a simple loop; otherwise
-    // use par_at_put_range.
-    if ((last_idx - start_idx) <= 8) {
-      for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
-        _card_bm->par_set_bit(i);
-      }
-    } else {
-      assert(last_idx < _card_bm->size(), "sanity");
-      // Note BitMap::par_at_put_range() is exclusive.
-      _card_bm->par_at_put_range(start_idx, last_idx+1, true);
-    }
-  }
-
-  // It takes a region that's not empty (i.e., it has at least one
-  // live object in it and sets its corresponding bit on the region
-  // bitmap to 1. If the region is "starts humongous" it will also set
-  // to 1 the bits on the region bitmap that correspond to its
-  // associated "continues humongous" regions.
-  void set_bit_for_region(HeapRegion* hr) {
-    assert(!hr->continuesHumongous(), "should have filtered those out");
-
-    size_t index = hr->hrs_index();
-    if (!hr->startsHumongous()) {
-      // Normal (non-humongous) case: just set the bit.
-      _region_bm->par_set_bit((BitMap::idx_t) index);
-    } else {
-      // Starts humongous case: calculate how many regions are part of
-      // this humongous region and then set the bit range.
-      G1CollectedHeap* g1h = G1CollectedHeap::heap();
-      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
-      size_t end_index = last_hr->hrs_index() + 1;
-      _region_bm->par_at_put_range((BitMap::idx_t) index,
-                                   (BitMap::idx_t) end_index, true);
-    }
-  }
-
+// Closure that finalizes the liveness counting data.
+// Used during the cleanup pause.
+// Sets the bits corresponding to the interval [NTAMS, top]
+// (which contains the implicitly live objects) in the
+// card liveness bitmap. Also sets the bit for each region,
+// containing live data, in the region liveness bitmap.
+
+class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
  public:
   FinalCountDataUpdateClosure(ConcurrentMark* cm,
                               BitMap* region_bm,
                               BitMap* card_bm) :
-    _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
-    _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) { }
+    CMCountDataClosureBase(cm, region_bm, card_bm) { }
 
   bool doHeapRegion(HeapRegion* hr) {
 
@@ -1875,28 +1540,10 @@
       return false;
     }
 
-    HeapWord* start = hr->top_at_conc_mark_count();
     HeapWord* ntams = hr->next_top_at_mark_start();
     HeapWord* top   = hr->top();
 
-    assert(hr->bottom() <= start && start <= hr->end() &&
-           hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
-
-    size_t words_done = ntams - hr->bottom();
-
-    if (start < ntams) {
-      // Region was changed between remark and cleanup pauses
-      // We need to add (ntams - start) to the marked bytes
-      // for this region, and set bits for the range
-      // [ card_idx(start), card_idx(ntams) ) in the card bitmap.
-      size_t live_bytes = (ntams - start) * HeapWordSize;
-      hr->add_to_marked_bytes(live_bytes);
-
-      // Record the new top at conc count
-      hr->set_top_at_conc_mark_count(ntams);
-
-      // The setting of the bits in the card bitmap takes place below
-    }
+    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
 
     // Mark the allocated-since-marking portion...
     if (ntams < top) {
@@ -1904,8 +1551,8 @@
       set_bit_for_region(hr);
     }
 
-    // Now set the bits for [start, top]
-    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    // Now set the bits for [ntams, top]
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
     BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
     set_card_bitmap_range(start_idx, last_idx);
 
@@ -1914,16 +1561,8 @@
       set_bit_for_region(hr);
     }
 
-    _total_words_done += words_done;
-    _total_used_bytes += hr->used();
-    _total_live_bytes += hr->next_marked_bytes();
-
     return false;
   }
-
-  size_t total_words_done() const { return _total_words_done; }
-  size_t total_live_bytes() const { return _total_live_bytes; }
-  size_t total_used_bytes() const { return _total_used_bytes; }
 };
 
 class G1ParFinalCountTask: public AbstractGangTask {
@@ -1935,9 +1574,6 @@
 
   uint    _n_workers;
 
-  size_t *_live_bytes;
-  size_t *_used_bytes;
-
 public:
   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
     : AbstractGangTask("G1 final counting"),
@@ -1945,8 +1581,7 @@
       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
       _n_workers(0) {
     // Use the value already set as the number of active threads
-    // in the call to run_task().  Needed for the allocation of
-    // _live_bytes and _used_bytes.
+    // in the call to run_task().
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       assert( _g1h->workers()->active_workers() > 0,
         "Should have been previously set");
@@ -1954,14 +1589,6 @@
     } else {
       _n_workers = 1;
     }
-
-    _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
-    _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
-  }
-
-  ~G1ParFinalCountTask() {
-    FREE_C_HEAP_ARRAY(size_t, _live_bytes);
-    FREE_C_HEAP_ARRAY(size_t, _used_bytes);
   }
 
   void work(uint worker_id) {
@@ -1979,23 +1606,6 @@
     } else {
       _g1h->heap_region_iterate(&final_update_cl);
     }
-
-    _live_bytes[worker_id] = final_update_cl.total_live_bytes();
-    _used_bytes[worker_id] = final_update_cl.total_used_bytes();
-  }
-
-  size_t live_bytes()  {
-    size_t live_bytes = 0;
-    for (uint i = 0; i < _n_workers; ++i)
-      live_bytes += _live_bytes[i];
-    return live_bytes;
-  }
-
-  size_t used_bytes()  {
-    size_t used_bytes = 0;
-    for (uint i = 0; i < _n_workers; ++i)
-      used_bytes += _used_bytes[i];
-    return used_bytes;
   }
 };
 
@@ -2005,7 +1615,7 @@
   G1CollectedHeap* _g1;
   int _worker_num;
   size_t _max_live_bytes;
-  size_t _regions_claimed;
+  uint _regions_claimed;
   size_t _freed_bytes;
   FreeRegionList* _local_cleanup_list;
   OldRegionSet* _old_proxy_set;
@@ -2033,32 +1643,33 @@
   size_t freed_bytes() { return _freed_bytes; }
 
   bool doHeapRegion(HeapRegion *hr) {
+    if (hr->continuesHumongous()) {
+      return false;
+    }
     // We use a claim value of zero here because all regions
     // were claimed with value 1 in the FinalCount task.
-    hr->reset_gc_time_stamp();
-    if (!hr->continuesHumongous()) {
-      double start = os::elapsedTime();
-      _regions_claimed++;
-      hr->note_end_of_marking();
-      _max_live_bytes += hr->max_live_bytes();
-      _g1->free_region_if_empty(hr,
-                                &_freed_bytes,
-                                _local_cleanup_list,
-                                _old_proxy_set,
-                                _humongous_proxy_set,
-                                _hrrs_cleanup_task,
-                                true /* par */);
-      double region_time = (os::elapsedTime() - start);
-      _claimed_region_time += region_time;
-      if (region_time > _max_region_time) {
-        _max_region_time = region_time;
-      }
+    _g1->reset_gc_time_stamps(hr);
+    double start = os::elapsedTime();
+    _regions_claimed++;
+    hr->note_end_of_marking();
+    _max_live_bytes += hr->max_live_bytes();
+    _g1->free_region_if_empty(hr,
+                              &_freed_bytes,
+                              _local_cleanup_list,
+                              _old_proxy_set,
+                              _humongous_proxy_set,
+                              _hrrs_cleanup_task,
+                              true /* par */);
+    double region_time = (os::elapsedTime() - start);
+    _claimed_region_time += region_time;
+    if (region_time > _max_region_time) {
+      _max_region_time = region_time;
     }
     return false;
   }
 
   size_t max_live_bytes() { return _max_live_bytes; }
-  size_t regions_claimed() { return _regions_claimed; }
+  uint regions_claimed() { return _regions_claimed; }
   double claimed_region_time_sec() { return _claimed_region_time; }
   double max_region_time_sec() { return _max_region_time; }
 };
@@ -2130,15 +1741,6 @@
 
       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
     }
-    double end = os::elapsedTime();
-    if (G1PrintParCleanupStats) {
-      gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
-                          "claimed %u regions (tot = %8.3f ms, max = %8.3f ms).\n",
-                          worker_id, start, end, (end-start)*1000.0,
-                          g1_note_end.regions_claimed(),
-                          g1_note_end.claimed_region_time_sec()*1000.0,
-                          g1_note_end.max_region_time_sec()*1000.0);
-    }
   }
   size_t max_live_bytes() { return _max_live_bytes; }
   size_t freed_bytes() { return _freed_bytes; }
@@ -2185,8 +1787,7 @@
     HandleMark hm;  // handle scope
     gclog_or_tty->print(" VerifyDuringGC:(before)");
     Universe::heap()->prepare_for_verify();
-    Universe::verify(/* allow dirty */ true,
-                     /* silent      */ false,
+    Universe::verify(/* silent      */ false,
                      /* option      */ VerifyOption_G1UsePrevMarking);
   }
 
@@ -2250,30 +1851,11 @@
     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
   }
 
-  size_t known_garbage_bytes =
-    g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
-  g1p->set_known_garbage_bytes(known_garbage_bytes);
-
   size_t start_used_bytes = g1h->used();
-  _at_least_one_mark_complete = true;
   g1h->set_marking_complete();
 
-  ergo_verbose4(ErgoConcCycles,
-           "finish cleanup",
-           ergo_format_byte("occupancy")
-           ergo_format_byte("capacity")
-           ergo_format_byte_perc("known garbage"),
-           start_used_bytes, g1h->capacity(),
-           known_garbage_bytes,
-           ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
-
   double count_end = os::elapsedTime();
   double this_final_counting_time = (count_end - start);
-  if (G1PrintParCleanupStats) {
-    gclog_or_tty->print_cr("Cleanup:");
-    gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
-                           this_final_counting_time*1000.0);
-  }
   _total_counting_time += this_final_counting_time;
 
   if (G1PrintRegionLivenessInfo) {
@@ -2287,7 +1869,6 @@
   g1h->reset_gc_time_stamp();
 
   // Note end of marking in all heap regions.
-  double note_end_start = os::elapsedTime();
   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     g1h->set_par_threads((int)n_workers);
@@ -2299,6 +1880,7 @@
   } else {
     g1_par_note_end_task.work(0);
   }
+  g1h->check_gc_time_stamps();
 
   if (!cleanup_list_is_empty()) {
     // The cleanup list is not empty, so we'll have to process it
@@ -2306,11 +1888,6 @@
     // regions that there will be more free regions coming soon.
     g1h->set_free_regions_coming();
   }
-  double note_end_end = os::elapsedTime();
-  if (G1PrintParCleanupStats) {
-    gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
-                           (note_end_end - note_end_start)*1000.0);
-  }
 
   // call below, since it affects the metric by which we sort the heap
   // regions.
@@ -2342,16 +1919,13 @@
   double end = os::elapsedTime();
   _cleanup_times.add((end - start) * 1000.0);
 
-  if (PrintGC || PrintGCDetails) {
+  if (G1Log::fine()) {
     g1h->print_size_transition(gclog_or_tty,
                                start_used_bytes,
                                g1h->used(),
                                g1h->capacity());
   }
 
-  size_t cleaned_up_bytes = start_used_bytes - g1h->used();
-  g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
-
   // Clean up will have freed any regions completely full of garbage.
   // Update the soft reference policy with the new heap occupancy.
   Universe::update_heap_info_at_gc();
@@ -2368,8 +1942,7 @@
     HandleMark hm;  // handle scope
     gclog_or_tty->print(" VerifyDuringGC:(after)");
     Universe::heap()->prepare_for_verify();
-    Universe::verify(/* allow dirty */ true,
-                     /* silent      */ false,
+    Universe::verify(/* silent      */ false,
                      /* option      */ VerifyOption_G1UsePrevMarking);
   }
 
@@ -2386,7 +1959,7 @@
 
   if (G1ConcRegionFreeingVerbose) {
     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
-                           "cleanup list has "SIZE_FORMAT" entries",
+                           "cleanup list has %u entries",
                            _cleanup_list.length());
   }
 
@@ -2408,9 +1981,8 @@
         _cleanup_list.is_empty()) {
       if (G1ConcRegionFreeingVerbose) {
         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
-                               "appending "SIZE_FORMAT" entries to the "
-                               "secondary_free_list, clean list still has "
-                               SIZE_FORMAT" entries",
+                               "appending %u entries to the secondary_free_list, "
+                               "cleanup list still has %u entries",
                                tmp_free_list.length(),
                                _cleanup_list.length());
       }
@@ -2683,11 +2255,10 @@
   // Inner scope to exclude the cleaning of the string and symbol
   // tables from the displayed time.
   {
-    bool verbose = PrintGC && PrintGCDetails;
-    if (verbose) {
+    if (G1Log::finer()) {
       gclog_or_tty->put(' ');
     }
-    TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
+    TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
 
     ReferenceProcessor* rp = g1h->ref_processor_cm();
 
@@ -2878,24 +2449,8 @@
     } else {
       HeapRegion* hr  = _g1h->heap_region_containing(obj);
       guarantee(hr != NULL, "invariant");
-      bool over_tams = false;
-      bool marked = false;
-
-      switch (_vo) {
-        case VerifyOption_G1UsePrevMarking:
-          over_tams = hr->obj_allocated_since_prev_marking(obj);
-          marked = _g1h->isMarkedPrev(obj);
-          break;
-        case VerifyOption_G1UseNextMarking:
-          over_tams = hr->obj_allocated_since_next_marking(obj);
-          marked = _g1h->isMarkedNext(obj);
-          break;
-        case VerifyOption_G1UseMarkWord:
-          marked = obj->is_gc_marked();
-          break;
-        default:
-          ShouldNotReachHere();
-      }
+      bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
+      bool marked = _g1h->is_marked(obj, _vo);
 
       if (over_tams) {
         str = " >";
@@ -2931,24 +2486,8 @@
     _out(out), _vo(vo), _all(all), _hr(hr) { }
 
   void do_object(oop o) {
-    bool over_tams = false;
-    bool marked = false;
-
-    switch (_vo) {
-      case VerifyOption_G1UsePrevMarking:
-        over_tams = _hr->obj_allocated_since_prev_marking(o);
-        marked = _g1h->isMarkedPrev(o);
-        break;
-      case VerifyOption_G1UseNextMarking:
-        over_tams = _hr->obj_allocated_since_next_marking(o);
-        marked = _g1h->isMarkedNext(o);
-        break;
-      case VerifyOption_G1UseMarkWord:
-        marked = o->is_gc_marked();
-        break;
-      default:
-        ShouldNotReachHere();
-    }
+    bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
+    bool marked = _g1h->is_marked(o, _vo);
     bool print_it = _all || over_tams || marked;
 
     if (print_it) {
@@ -2962,32 +2501,17 @@
 
 class PrintReachableRegionClosure : public HeapRegionClosure {
 private:
-  outputStream* _out;
-  VerifyOption  _vo;
-  bool          _all;
+  G1CollectedHeap* _g1h;
+  outputStream*    _out;
+  VerifyOption     _vo;
+  bool             _all;
 
 public:
   bool doHeapRegion(HeapRegion* hr) {
     HeapWord* b = hr->bottom();
     HeapWord* e = hr->end();
     HeapWord* t = hr->top();
-    HeapWord* p = NULL;
-
-    switch (_vo) {
-      case VerifyOption_G1UsePrevMarking:
-        p = hr->prev_top_at_mark_start();
-        break;
-      case VerifyOption_G1UseNextMarking:
-        p = hr->next_top_at_mark_start();
-        break;
-      case VerifyOption_G1UseMarkWord:
-        // When we are verifying marking using the mark word
-        // TAMS has no relevance.
-        assert(p == NULL, "post-condition");
-        break;
-      default:
-        ShouldNotReachHere();
-    }
+    HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
                    "TAMS: "PTR_FORMAT, b, e, t, p);
     _out->cr();
@@ -3009,20 +2533,9 @@
   PrintReachableRegionClosure(outputStream* out,
                               VerifyOption  vo,
                               bool          all) :
-    _out(out), _vo(vo), _all(all) { }
+    _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
 };
 
-static const char* verify_option_to_tams(VerifyOption vo) {
-  switch (vo) {
-    case VerifyOption_G1UsePrevMarking:
-      return "PTAMS";
-    case VerifyOption_G1UseNextMarking:
-      return "NTAMS";
-    default:
-      return "NONE";
-  }
-}
-
 void ConcurrentMark::print_reachable(const char* str,
                                      VerifyOption vo,
                                      bool all) {
@@ -3051,7 +2564,7 @@
   }
 
   outputStream* out = &fout;
-  out->print_cr("-- USING %s", verify_option_to_tams(vo));
+  out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
   out->cr();
 
   out->print_cr("--- ITERATING OVER REGIONS");
@@ -3066,89 +2579,6 @@
 
 #endif // PRODUCT
 
-// This note is for drainAllSATBBuffers and the code in between.
-// In the future we could reuse a task to do this work during an
-// evacuation pause (since now tasks are not active and can be claimed
-// during an evacuation pause). This was a late change to the code and
-// is currently not being taken advantage of.
-
-void ConcurrentMark::deal_with_reference(oop obj) {
-  if (verbose_high()) {
-    gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
-                           (void*) obj);
-  }
-
-  HeapWord* objAddr = (HeapWord*) obj;
-  assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
-  if (_g1h->is_in_g1_reserved(objAddr)) {
-    assert(obj != NULL, "null check is implicit");
-    if (!_nextMarkBitMap->isMarked(objAddr)) {
-      // Only get the containing region if the object is not marked on the
-      // bitmap (otherwise, it's a waste of time since we won't do
-      // anything with it).
-      HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
-      if (!hr->obj_allocated_since_next_marking(obj)) {
-        if (verbose_high()) {
-          gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
-                                 "marked", (void*) obj);
-        }
-
-        // we need to mark it first
-        if (_nextMarkBitMap->parMark(objAddr)) {
-          // No OrderAccess:store_load() is needed. It is implicit in the
-          // CAS done in parMark(objAddr) above
-          HeapWord* finger = _finger;
-          if (objAddr < finger) {
-            if (verbose_high()) {
-              gclog_or_tty->print_cr("[global] below the global finger "
-                                     "("PTR_FORMAT"), pushing it", finger);
-            }
-            if (!mark_stack_push(obj)) {
-              if (verbose_low()) {
-                gclog_or_tty->print_cr("[global] global stack overflow during "
-                                       "deal_with_reference");
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-class CMGlobalObjectClosure : public ObjectClosure {
-private:
-  ConcurrentMark* _cm;
-
-public:
-  void do_object(oop obj) {
-    _cm->deal_with_reference(obj);
-  }
-
-  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
-};
-
-void ConcurrentMark::drainAllSATBBuffers() {
-  guarantee(false, "drainAllSATBBuffers(): don't call this any more");
-
-  CMGlobalObjectClosure oc(this);
-  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  satb_mq_set.set_closure(&oc);
-
-  while (satb_mq_set.apply_closure_to_completed_buffer()) {
-    if (verbose_medium()) {
-      gclog_or_tty->print_cr("[global] processed an SATB buffer");
-    }
-  }
-
-  // no need to check whether we should do this, as this is only
-  // called during an evacuation pause
-  satb_mq_set.iterate_closure_all_threads();
-
-  satb_mq_set.set_closure(NULL);
-  assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
-}
-
 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
@@ -3257,63 +2687,6 @@
   return NULL;
 }
 
-bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
-  guarantee(false, "invalidate_aborted_regions_in_cset(): "
-                   "don't call this any more");
-
-  bool result = false;
-  for (int i = 0; i < (int)_max_task_num; ++i) {
-    CMTask* the_task = _tasks[i];
-    MemRegion mr = the_task->aborted_region();
-    if (mr.start() != NULL) {
-      assert(mr.end() != NULL, "invariant");
-      assert(mr.word_size() > 0, "invariant");
-      HeapRegion* hr = _g1h->heap_region_containing(mr.start());
-      assert(hr != NULL, "invariant");
-      if (hr->in_collection_set()) {
-        // The region points into the collection set
-        the_task->set_aborted_region(MemRegion());
-        result = true;
-      }
-    }
-  }
-  return result;
-}
-
-bool ConcurrentMark::has_aborted_regions() {
-  for (int i = 0; i < (int)_max_task_num; ++i) {
-    CMTask* the_task = _tasks[i];
-    MemRegion mr = the_task->aborted_region();
-    if (mr.start() != NULL) {
-      assert(mr.end() != NULL, "invariant");
-      assert(mr.word_size() > 0, "invariant");
-      return true;
-    }
-  }
-  return false;
-}
-
-void ConcurrentMark::oops_do(OopClosure* cl) {
-  if (_markStack.size() > 0 && verbose_low()) {
-    gclog_or_tty->print_cr("[global] scanning the global marking stack, "
-                           "size = %d", _markStack.size());
-  }
-  // we first iterate over the contents of the mark stack...
-  _markStack.oops_do(cl);
-
-  for (int i = 0; i < (int)_max_task_num; ++i) {
-    OopTaskQueue* queue = _task_queues->queue((int)i);
-
-    if (queue->size() > 0 && verbose_low()) {
-      gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
-                             "size = %d", i, queue->size());
-    }
-
-    // ...then over the contents of the all the task queues.
-    queue->oops_do(cl);
-  }
-}
-
 #ifndef PRODUCT
 enum VerifyNoCSetOopsPhase {
   VerifyNoCSetOopsStack,
@@ -3445,8 +2818,6 @@
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
   _markStack.clear_overflow();
-  _regionStack.setEmpty();
-  _regionStack.clear_overflow();
   if (clear_overflow) {
     clear_has_overflown();
   } else {
@@ -3457,8 +2828,6 @@
   for (int i = 0; i < (int)_max_task_num; ++i) {
     OopTaskQueue* queue = _task_queues->queue(i);
     queue->set_empty();
-    // Clear any partial regions from the CMTasks
-    _tasks[i]->clear_aborted_region();
   }
 }
 
@@ -3525,7 +2894,7 @@
     assert(limit_idx <= end_idx, "or else use atomics");
 
     // Aggregate the "stripe" in the count data associated with hr.
-    size_t hrs_index = hr->hrs_index();
+    uint hrs_index = hr->hrs_index();
     size_t marked_bytes = 0;
 
     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
@@ -3558,9 +2927,6 @@
     // Update the marked bytes for this region.
     hr->add_to_marked_bytes(marked_bytes);
 
-    // Now set the top at count to NTAMS.
-    hr->set_top_at_conc_mark_count(limit);
-
     // Next heap region
     return false;
   }
@@ -3633,7 +2999,7 @@
   // of the final counting task.
   _region_bm.clear();
 
-  size_t max_regions = _g1h->max_regions();
+  uint max_regions = _g1h->max_regions();
   assert(_max_task_num != 0, "unitialized");
 
   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
@@ -3643,7 +3009,7 @@
     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
     assert(marked_bytes_array != NULL, "uninitialized");
 
-    memset(marked_bytes_array, 0, (max_regions * sizeof(size_t)));
+    memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
     task_card_bm->clear();
   }
 }
@@ -3658,327 +3024,6 @@
   }
 }
 
-// Closures used by ConcurrentMark::complete_marking_in_collection_set().
-
-class CSetMarkOopClosure: public OopClosure {
-  friend class CSetMarkBitMapClosure;
-
-  G1CollectedHeap* _g1h;
-  CMBitMap*        _bm;
-  ConcurrentMark*  _cm;
-  oop*             _ms;
-  jint*            _array_ind_stack;
-  int              _ms_size;
-  int              _ms_ind;
-  int              _array_increment;
-  uint             _worker_id;
-
-  bool push(oop obj, int arr_ind = 0) {
-    if (_ms_ind == _ms_size) {
-      gclog_or_tty->print_cr("Mark stack is full.");
-      return false;
-    }
-    _ms[_ms_ind] = obj;
-    if (obj->is_objArray()) {
-      _array_ind_stack[_ms_ind] = arr_ind;
-    }
-    _ms_ind++;
-    return true;
-  }
-
-  oop pop() {
-    if (_ms_ind == 0) {
-      return NULL;
-    } else {
-      _ms_ind--;
-      return _ms[_ms_ind];
-    }
-  }
-
-  template <class T> bool drain() {
-    while (_ms_ind > 0) {
-      oop obj = pop();
-      assert(obj != NULL, "Since index was non-zero.");
-      if (obj->is_objArray()) {
-        jint arr_ind = _array_ind_stack[_ms_ind];
-        objArrayOop aobj = objArrayOop(obj);
-        jint len = aobj->length();
-        jint next_arr_ind = arr_ind + _array_increment;
-        if (next_arr_ind < len) {
-          push(obj, next_arr_ind);
-        }
-        // Now process this portion of this one.
-        int lim = MIN2(next_arr_ind, len);
-        for (int j = arr_ind; j < lim; j++) {
-          do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
-        }
-      } else {
-        obj->oop_iterate(this);
-      }
-      if (abort()) return false;
-    }
-    return true;
-  }
-
-public:
-  CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, uint worker_id) :
-    _g1h(G1CollectedHeap::heap()),
-    _cm(cm),
-    _bm(cm->nextMarkBitMap()),
-    _ms_size(ms_size), _ms_ind(0),
-    _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
-    _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
-    _array_increment(MAX2(ms_size/8, 16)),
-    _worker_id(worker_id) { }
-
-  ~CSetMarkOopClosure() {
-    FREE_C_HEAP_ARRAY(oop, _ms);
-    FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
-  }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-
-  template <class T> void do_oop_work(T* p) {
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (oopDesc::is_null(heap_oop)) return;
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (obj->is_forwarded()) {
-      // If the object has already been forwarded, we have to make sure
-      // that it's marked.  So follow the forwarding pointer.  Note that
-      // this does the right thing for self-forwarding pointers in the
-      // evacuation failure case.
-      obj = obj->forwardee();
-    }
-    HeapRegion* hr = _g1h->heap_region_containing(obj);
-    if (hr != NULL) {
-      if (hr->in_collection_set()) {
-        if (_g1h->is_obj_ill(obj)) {
-          if (_bm->parMark((HeapWord*)obj)) {
-            if (!push(obj)) {
-              gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
-              set_abort();
-            }
-          }
-        }
-      } else {
-        // Outside the collection set; we need to gray it
-        _cm->deal_with_reference(obj);
-      }
-    }
-  }
-};
-
-class CSetMarkBitMapClosure: public BitMapClosure {
-  G1CollectedHeap*   _g1h;
-  CMBitMap*          _bitMap;
-  ConcurrentMark*    _cm;
-  CSetMarkOopClosure _oop_cl;
-  uint               _worker_id;
-
-public:
-  CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_id) :
-    _g1h(G1CollectedHeap::heap()),
-    _bitMap(cm->nextMarkBitMap()),
-    _oop_cl(cm, ms_size, worker_id),
-    _worker_id(worker_id) { }
-
-  bool do_bit(size_t offset) {
-    // convert offset into a HeapWord*
-    HeapWord* addr = _bitMap->offsetToHeapWord(offset);
-    assert(_bitMap->endWord() && addr < _bitMap->endWord(),
-           "address out of range");
-    assert(_bitMap->isMarked(addr), "tautology");
-    oop obj = oop(addr);
-    if (!obj->is_forwarded()) {
-      if (!_oop_cl.push(obj)) return false;
-      if (UseCompressedOops) {
-        if (!_oop_cl.drain<narrowOop>()) return false;
-      } else {
-        if (!_oop_cl.drain<oop>()) return false;
-      }
-    }
-    // Otherwise...
-    return true;
-  }
-};
-
-class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
-  CMBitMap*             _bm;
-  CSetMarkBitMapClosure _bit_cl;
-  uint                  _worker_id;
-
-  enum SomePrivateConstants {
-    MSSize = 1000
-  };
-
-public:
-  CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_id) :
-    _bm(cm->nextMarkBitMap()),
-    _bit_cl(cm, MSSize, worker_id),
-    _worker_id(worker_id) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
-      // The current worker has successfully claimed the region.
-      if (!hr->evacuation_failed()) {
-        MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
-        if (!mr.is_empty()) {
-          bool done = false;
-          while (!done) {
-            done = _bm->iterate(&_bit_cl, mr);
-          }
-        }
-      }
-    }
-    return false;
-  }
-};
-
-class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
-protected:
-  G1CollectedHeap* _g1h;
-  ConcurrentMark*  _cm;
-
-public:
-  G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
-                              ConcurrentMark* cm) :
-    AbstractGangTask("Complete Mark in CSet"),
-    _g1h(g1h), _cm(cm) { }
-
-  void work(uint worker_id) {
-    CompleteMarkingInCSetHRClosure cmplt(_cm, worker_id);
-    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
-    _g1h->collection_set_iterate_from(hr, &cmplt);
-  }
-};
-
-void ConcurrentMark::complete_marking_in_collection_set() {
-  guarantee(false, "complete_marking_in_collection_set(): "
-                   "don't call this any more");
-
-  G1CollectedHeap* g1h =  G1CollectedHeap::heap();
-
-  if (!g1h->mark_in_progress()) {
-    g1h->g1_policy()->record_mark_closure_time(0.0);
-    return;
-  }
-
-  double start = os::elapsedTime();
-  G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
-
-  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
-
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    int n_workers = g1h->workers()->active_workers();
-    g1h->set_par_threads(n_workers);
-    g1h->workers()->run_task(&complete_mark_task);
-    g1h->set_par_threads(0);
-  } else {
-    complete_mark_task.work(0);
-  }
-
-  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
-
-  // Reset the claim values in the regions in the collection set.
-  g1h->reset_cset_heap_region_claim_values();
-
-  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
-
-  double end_time = os::elapsedTime();
-  double elapsed_time_ms = (end_time - start) * 1000.0;
-  g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
-}
-
-// The next two methods deal with the following optimisation. Some
-// objects are gray by being marked and located above the finger. If
-// they are copied, during an evacuation pause, below the finger then
-// the need to be pushed on the stack. The observation is that, if
-// there are no regions in the collection set located above the
-// finger, then the above cannot happen, hence we do not need to
-// explicitly gray any objects when copying them to below the
-// finger. The global stack will be scanned to ensure that, if it
-// points to objects being copied, it will update their
-// location. There is a tricky situation with the gray objects in
-// region stack that are being coped, however. See the comment in
-// newCSet().
-
-void ConcurrentMark::newCSet() {
-  guarantee(false, "newCSet(): don't call this any more");
-
-  if (!concurrent_marking_in_progress()) {
-    // nothing to do if marking is not in progress
-    return;
-  }
-
-  // find what the lowest finger is among the global and local fingers
-  _min_finger = _finger;
-  for (int i = 0; i < (int)_max_task_num; ++i) {
-    CMTask* task = _tasks[i];
-    HeapWord* task_finger = task->finger();
-    if (task_finger != NULL && task_finger < _min_finger) {
-      _min_finger = task_finger;
-    }
-  }
-
-  _should_gray_objects = false;
-
-  // This fixes a very subtle and fustrating bug. It might be the case
-  // that, during en evacuation pause, heap regions that contain
-  // objects that are gray (by being in regions contained in the
-  // region stack) are included in the collection set. Since such gray
-  // objects will be moved, and because it's not easy to redirect
-  // region stack entries to point to a new location (because objects
-  // in one region might be scattered to multiple regions after they
-  // are copied), one option is to ensure that all marked objects
-  // copied during a pause are pushed on the stack. Notice, however,
-  // that this problem can only happen when the region stack is not
-  // empty during an evacuation pause. So, we make the fix a bit less
-  // conservative and ensure that regions are pushed on the stack,
-  // irrespective whether all collection set regions are below the
-  // finger, if the region stack is not empty. This is expected to be
-  // a rare case, so I don't think it's necessary to be smarted about it.
-  if (!region_stack_empty() || has_aborted_regions()) {
-    _should_gray_objects = true;
-  }
-}
-
-void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
-  guarantee(false, "registerCSetRegion(): don't call this any more");
-
-  if (!concurrent_marking_in_progress()) return;
-
-  HeapWord* region_end = hr->end();
-  if (region_end > _min_finger) {
-    _should_gray_objects = true;
-  }
-}
-
-// Resets the region fields of active CMTasks whose values point
-// into the collection set.
-void ConcurrentMark::reset_active_task_region_fields_in_cset() {
-  guarantee(false, "reset_active_task_region_fields_in_cset(): "
-                   "don't call this any more");
-
-  assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
-  assert(parallel_marking_threads() <= _max_task_num, "sanity");
-
-  for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
-    CMTask* task = _tasks[i];
-    HeapWord* task_finger = task->finger();
-    if (task_finger != NULL) {
-      assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
-      HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
-      if (finger_region->in_collection_set()) {
-        // The task's current region is in the collection set.
-        // This region will be evacuated in the current GC and
-        // the region fields in the task will be stale.
-        task->giveup_current_region();
-      }
-    }
-  }
-}
-
 // abandon current marking iteration due to a Full GC
 void ConcurrentMark::abort() {
   // Clear all marks to force marking thread to do nothing
@@ -4053,9 +3098,6 @@
       _g1h->g1_policy()->record_concurrent_pause();
     }
     cmThread()->yield();
-    if (worker_id == 0) {
-      _g1h->g1_policy()->record_concurrent_pause_end();
-    }
     return true;
   } else {
     return false;
@@ -4112,36 +3154,21 @@
   CMBitMap*                   _nextMarkBitMap;
   ConcurrentMark*             _cm;
   CMTask*                     _task;
-  // true if we're scanning a heap region claimed by the task (so that
-  // we move the finger along), false if we're not, i.e. currently when
-  // scanning a heap region popped from the region stack (so that we
-  // do not move the task finger along; it'd be a mistake if we did so).
-  bool                        _scanning_heap_region;
 
 public:
-  CMBitMapClosure(CMTask *task,
-                  ConcurrentMark* cm,
-                  CMBitMap* nextMarkBitMap)
-    :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
-
-  void set_scanning_heap_region(bool scanning_heap_region) {
-    _scanning_heap_region = scanning_heap_region;
-  }
+  CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
+    _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
 
   bool do_bit(size_t offset) {
     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
     assert(_nextMarkBitMap->isMarked(addr), "invariant");
     assert( addr < _cm->finger(), "invariant");
 
-    if (_scanning_heap_region) {
-      statsOnly( _task->increase_objs_found_on_bitmap() );
-      assert(addr >= _task->finger(), "invariant");
-      // We move that task's local finger along.
-      _task->move_finger_to(addr);
-    } else {
-      // We move the task's region finger along.
-      _task->move_region_finger_to(addr);
-    }
+    statsOnly( _task->increase_objs_found_on_bitmap() );
+    assert(addr >= _task->finger(), "invariant");
+
+    // We move that task's local finger along.
+    _task->move_finger_to(addr);
 
     _task->scan_object(oop(addr));
     // we only partially drain the local queue and global stack
@@ -4249,8 +3276,6 @@
   _curr_region   = NULL;
   _finger        = NULL;
   _region_limit  = NULL;
-
-  _region_finger = NULL;
 }
 
 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
@@ -4271,7 +3296,6 @@
 
   _nextMarkBitMap                = nextMarkBitMap;
   clear_region_fields();
-  assert(_aborted_region.is_empty(), "should have been cleared");
 
   _calls                         = 0;
   _elapsed_time_ms               = 0.0;
@@ -4288,7 +3312,6 @@
   _global_max_size               = 0;
   _global_transfers_to           = 0;
   _global_transfers_from         = 0;
-  _region_stack_pops             = 0;
   _regions_claimed               = 0;
   _objs_found_on_bitmap          = 0;
   _satb_buffers_processed        = 0;
@@ -4663,110 +3686,6 @@
   decrease_limits();
 }
 
-void CMTask::drain_region_stack(BitMapClosure* bc) {
-  assert(_cm->region_stack_empty(), "region stack should be empty");
-  assert(_aborted_region.is_empty(), "aborted region should be empty");
-  return;
-
-  if (has_aborted()) return;
-
-  assert(_region_finger == NULL,
-         "it should be NULL when we're not scanning a region");
-
-  if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
-    if (_cm->verbose_low()) {
-      gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
-                             _task_id, _cm->region_stack_size());
-    }
-
-    MemRegion mr;
-
-    if (!_aborted_region.is_empty()) {
-      mr = _aborted_region;
-      _aborted_region = MemRegion();
-
-      if (_cm->verbose_low()) {
-        gclog_or_tty->print_cr("[%d] scanning aborted region "
-                               "[ " PTR_FORMAT ", " PTR_FORMAT " )",
-                               _task_id, mr.start(), mr.end());
-      }
-    } else {
-      mr = _cm->region_stack_pop_lock_free();
-      // it returns MemRegion() if the pop fails
-      statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
-    }
-
-    while (mr.start() != NULL) {
-      if (_cm->verbose_medium()) {
-        gclog_or_tty->print_cr("[%d] we are scanning region "
-                               "["PTR_FORMAT", "PTR_FORMAT")",
-                               _task_id, mr.start(), mr.end());
-      }
-
-      assert(mr.end() <= _cm->finger(),
-             "otherwise the region shouldn't be on the stack");
-      assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
-      if (_nextMarkBitMap->iterate(bc, mr)) {
-        assert(!has_aborted(),
-               "cannot abort the task without aborting the bitmap iteration");
-
-        // We finished iterating over the region without aborting.
-        regular_clock_call();
-        if (has_aborted()) {
-          mr = MemRegion();
-        } else {
-          mr = _cm->region_stack_pop_lock_free();
-          // it returns MemRegion() if the pop fails
-          statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
-        }
-      } else {
-        assert(has_aborted(), "currently the only way to do so");
-
-        // The only way to abort the bitmap iteration is to return
-        // false from the do_bit() method. However, inside the
-        // do_bit() method we move the _region_finger to point to the
-        // object currently being looked at. So, if we bail out, we
-        // have definitely set _region_finger to something non-null.
-        assert(_region_finger != NULL, "invariant");
-
-        // Make sure that any previously aborted region has been
-        // cleared.
-        assert(_aborted_region.is_empty(), "aborted region not cleared");
-
-        // The iteration was actually aborted. So now _region_finger
-        // points to the address of the object we last scanned. If we
-        // leave it there, when we restart this task, we will rescan
-        // the object. It is easy to avoid this. We move the finger by
-        // enough to point to the next possible object header (the
-        // bitmap knows by how much we need to move it as it knows its
-        // granularity).
-        MemRegion newRegion =
-          MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
-
-        if (!newRegion.is_empty()) {
-          if (_cm->verbose_low()) {
-            gclog_or_tty->print_cr("[%d] recording unscanned region"
-                                   "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
-                                   _task_id,
-                                   newRegion.start(), newRegion.end());
-          }
-          // Now record the part of the region we didn't scan to
-          // make sure this task scans it later.
-          _aborted_region = newRegion;
-        }
-        // break from while
-        mr = MemRegion();
-      }
-      _region_finger = NULL;
-    }
-
-    if (_cm->verbose_low()) {
-      gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
-                             _task_id, _cm->region_stack_size());
-    }
-  }
-}
-
 void CMTask::print_stats() {
   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
                          _task_id, _calls);
@@ -4795,8 +3714,7 @@
                          _global_pushes, _global_pops, _global_max_size);
   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
                          _global_transfers_to,_global_transfers_from);
-  gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
-                         _regions_claimed, _region_stack_pops);
+  gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
                          _steal_attempts, _steals);
@@ -4855,15 +3773,7 @@
       popping by other tasks. Only when there is no more work, tasks
       will totally drain the global mark stack.
 
-      (4) Global Region Stack. Entries on it correspond to areas of
-      the bitmap that need to be scanned since they contain gray
-      objects. Pushes on the region stack only happen during
-      evacuation pauses and typically correspond to areas covered by
-      GC LABS. If it overflows, then the marking phase should restart
-      and iterate over the bitmap to identify gray objects. Tasks will
-      try to totally drain the region stack as soon as possible.
-
-      (5) SATB Buffer Queue. This is where completed SATB buffers are
+      (4) SATB Buffer Queue. This is where completed SATB buffers are
       made available. Buffers are regularly removed from this queue
       and scanned for roots, so that the queue doesn't get too
       long. During remark, all completed buffers are processed, as
@@ -4875,12 +3785,12 @@
 
       (1) When the marking phase has been aborted (after a Full GC).
 
-      (2) When a global overflow (either on the global stack or the
-      region stack) has been triggered. Before the task aborts, it
-      will actually sync up with the other tasks to ensure that all
-      the marking data structures (local queues, stacks, fingers etc.)
-      are re-initialised so that when do_marking_step() completes,
-      the marking phase can immediately restart.
+      (2) When a global overflow (on the global stack) has been
+      triggered. Before the task aborts, it will actually sync up with
+      the other tasks to ensure that all the marking data structures
+      (local queues, stacks, fingers etc.)  are re-initialised so that
+      when do_marking_step() completes, the marking phase can
+      immediately restart.
 
       (3) When enough completed SATB buffers are available. The
       do_marking_step() method only tries to drain SATB buffers right
@@ -4923,13 +3833,6 @@
   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
   assert(concurrent() == _cm->concurrent(), "they should be the same");
 
-  assert(concurrent() || _cm->region_stack_empty(),
-         "the region stack should have been cleared before remark");
-  assert(concurrent() || !_cm->has_aborted_regions(),
-         "aborted regions should have been cleared before remark");
-  assert(_region_finger == NULL,
-         "this should be non-null only when a region is being scanned");
-
   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
   assert(_task_queues != NULL, "invariant");
   assert(_task_queue != NULL, "invariant");
@@ -4978,10 +3881,10 @@
   set_cm_oop_closure(&cm_oop_closure);
 
   if (_cm->has_overflown()) {
-    // This can happen if the region stack or the mark stack overflows
-    // during a GC pause and this task, after a yield point,
-    // restarts. We have to abort as we need to get into the overflow
-    // protocol which happens right at the end of this task.
+    // This can happen if the mark stack overflows during a GC pause
+    // and this task, after a yield point, restarts. We have to abort
+    // as we need to get into the overflow protocol which happens
+    // right at the end of this task.
     set_has_aborted();
   }
 
@@ -4994,17 +3897,6 @@
   drain_local_queue(true);
   drain_global_stack(true);
 
-  // Then totally drain the region stack.  We will not look at
-  // it again before the next invocation of this method. Entries on
-  // the region stack are only added during evacuation pauses, for
-  // which we have to yield. When we do, we abort the task anyway so
-  // it will look at the region stack again when it restarts.
-  bitmap_closure.set_scanning_heap_region(false);
-  drain_region_stack(&bitmap_closure);
-  // ...then partially drain the local queue and the global stack
-  drain_local_queue(true);
-  drain_global_stack(true);
-
   do {
     if (!has_aborted() && _curr_region != NULL) {
       // This means that we're already holding on to a region.
@@ -5034,9 +3926,7 @@
 
       // Let's iterate over the bitmap of the part of the
       // region that is left.
-      bitmap_closure.set_scanning_heap_region(true);
-      if (mr.is_empty() ||
-          _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
+      if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
         // We successfully completed iterating over the region. Now,
         // let's give up the region.
         giveup_current_region();
@@ -5061,9 +3951,9 @@
         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
         // Check if bitmap iteration was aborted while scanning the last object
         if (new_finger >= _region_limit) {
-            giveup_current_region();
+          giveup_current_region();
         } else {
-            move_finger_to(new_finger);
+          move_finger_to(new_finger);
         }
       }
     }
@@ -5119,9 +4009,7 @@
 
   if (!has_aborted()) {
     // We cannot check whether the global stack is empty, since other
-    // tasks might be pushing objects to it concurrently. We also cannot
-    // check if the region stack is empty because if a thread is aborting
-    // it can push a partially done region back.
+    // tasks might be pushing objects to it concurrently.
     assert(_cm->out_of_regions(),
            "at this point we should be out of regions");
 
@@ -5145,9 +4033,7 @@
     // we could. Let's try to do some stealing...
 
     // We cannot check whether the global stack is empty, since other
-    // tasks might be pushing objects to it concurrently. We also cannot
-    // check if the region stack is empty because if a thread is aborting
-    // it can push a partially done region back.
+    // tasks might be pushing objects to it concurrently.
     assert(_cm->out_of_regions() && _task_queue->size() == 0,
            "only way to reach here");
 
@@ -5194,9 +4080,7 @@
   // termination protocol.
   if (do_termination && !has_aborted()) {
     // We cannot check whether the global stack is empty, since other
-    // tasks might be concurrently pushing objects on it. We also cannot
-    // check if the region stack is empty because if a thread is aborting
-    // it can push a partially done region back.
+    // tasks might be concurrently pushing objects on it.
     // Separated the asserts so that we know which one fires.
     assert(_cm->out_of_regions(), "only way to reach here");
     assert(_task_queue->size() == 0, "only way to reach here");
@@ -5233,13 +4117,10 @@
       // that, if a condition is false, we can immediately find out
       // which one.
       guarantee(_cm->out_of_regions(), "only way to reach here");
-      guarantee(_aborted_region.is_empty(), "only way to reach here");
-      guarantee(_cm->region_stack_empty(), "only way to reach here");
       guarantee(_cm->mark_stack_empty(), "only way to reach here");
       guarantee(_task_queue->size() == 0, "only way to reach here");
       guarantee(!_cm->has_overflown(), "only way to reach here");
       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
-      guarantee(!_cm->region_stack_overflow(), "only way to reach here");
 
       if (_cm->verbose_low()) {
         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
@@ -5342,7 +4223,6 @@
     _task_queue(task_queue),
     _task_queues(task_queues),
     _cm_oop_closure(NULL),
-    _aborted_region(MemRegion()),
     _marked_bytes_array(marked_bytes),
     _card_bm(card_bm) {
   guarantee(task_queue != NULL, "invariant");
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,8 +30,8 @@
 
 class G1CollectedHeap;
 class CMTask;
-typedef GenericTaskQueue<oop>            CMTaskQueue;
-typedef GenericTaskQueueSet<CMTaskQueue> CMTaskQueueSet;
+typedef GenericTaskQueue<oop, mtGC>            CMTaskQueue;
+typedef GenericTaskQueueSet<CMTaskQueue, mtGC> CMTaskQueueSet;
 
 // Closure used by CM during concurrent reference discovery
 // and reference processing (during remarking) to determine
@@ -42,9 +42,7 @@
 class G1CMIsAliveClosure: public BoolObjectClosure {
   G1CollectedHeap* _g1;
  public:
-  G1CMIsAliveClosure(G1CollectedHeap* g1) :
-    _g1(g1)
-  {}
+  G1CMIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { }
 
   void do_object(oop obj) {
     ShouldNotCallThis();
@@ -111,11 +109,6 @@
     return offsetToHeapWord(heapWordToOffset(addr) + 1);
   }
 
-  void mostly_disjoint_range_union(BitMap*   from_bitmap,
-                                   size_t    from_start_index,
-                                   HeapWord* to_start_word,
-                                   size_t    word_num);
-
   // debugging
   NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
 };
@@ -258,60 +251,6 @@
   void oops_do(OopClosure* f);
 };
 
-class CMRegionStack VALUE_OBJ_CLASS_SPEC {
-  MemRegion* _base;
-  jint _capacity;
-  jint _index;
-  jint _oops_do_bound;
-  bool _overflow;
-public:
-  CMRegionStack();
-  ~CMRegionStack();
-  void allocate(size_t size);
-
-  // This is lock-free; assumes that it will only be called in parallel
-  // with other "push" operations (no pops).
-  void push_lock_free(MemRegion mr);
-
-  // Lock-free; assumes that it will only be called in parallel
-  // with other "pop" operations (no pushes).
-  MemRegion pop_lock_free();
-
-#if 0
-  // The routines that manipulate the region stack with a lock are
-  // not currently used. They should be retained, however, as a
-  // diagnostic aid.
-
-  // These two are the implementations that use a lock. They can be
-  // called concurrently with each other but they should not be called
-  // concurrently with the lock-free versions (push() / pop()).
-  void push_with_lock(MemRegion mr);
-  MemRegion pop_with_lock();
-#endif
-
-  bool isEmpty()    { return _index == 0; }
-  bool isFull()     { return _index == _capacity; }
-
-  bool overflow() { return _overflow; }
-  void clear_overflow() { _overflow = false; }
-
-  int  size() { return _index; }
-
-  // It iterates over the entries in the region stack and it
-  // invalidates (i.e. assigns MemRegion()) the ones that point to
-  // regions in the collection set.
-  bool invalidate_entries_into_cset();
-
-  // This gives an upper bound up to which the iteration in
-  // invalidate_entries_into_cset() will reach. This prevents
-  // newly-added entries to be unnecessarily scanned.
-  void set_oops_do_bound() {
-    _oops_do_bound = _index;
-  }
-
-  void setEmpty()   { _index = 0; clear_overflow(); }
-};
-
 class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
 private:
 #ifndef PRODUCT
@@ -404,11 +343,10 @@
 
 class ConcurrentMarkThread;
 
-class ConcurrentMark : public CHeapObj {
+class ConcurrentMark: public CHeapObj<mtGC> {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
-  friend class CSetMarkOopClosure;
   friend class CMGlobalObjectClosure;
   friend class CMRemarkTask;
   friend class CMConcurrentMarkingTask;
@@ -443,7 +381,6 @@
   CMBitMap                _markBitMap2;
   CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
   CMBitMap*               _nextMarkBitMap; // under-construction mark bitmap
-  bool                    _at_least_one_mark_complete;
 
   BitMap                  _region_bm;
   BitMap                  _card_bm;
@@ -457,7 +394,6 @@
 
   // For gray objects
   CMMarkStack             _markStack; // Grey objects behind global finger.
-  CMRegionStack           _regionStack; // Grey regions behind global finger.
   HeapWord* volatile      _finger;  // the global finger, region aligned,
                                     // always points to the end of the
                                     // last claimed region
@@ -502,18 +438,6 @@
   // verbose level
   CMVerboseLevel          _verbose_level;
 
-  // These two fields are used to implement the optimisation that
-  // avoids pushing objects on the global/region stack if there are
-  // no collection set regions above the lowest finger.
-
-  // This is the lowest finger (among the global and local fingers),
-  // which is calculated before a new collection set is chosen.
-  HeapWord* _min_finger;
-  // If this flag is true, objects/regions that are marked below the
-  // finger should be pushed on the stack(s). If this is flag is
-  // false, it is safe not to push them on the stack(s).
-  bool      _should_gray_objects;
-
   // All of these times are in ms.
   NumberSeq _init_times;
   NumberSeq _remark_times;
@@ -604,7 +528,7 @@
   CMTaskQueueSet* task_queues()  { return _task_queues; }
 
   // Access / manipulation of the overflow flag which is set to
-  // indicate that the global stack or region stack has overflown
+  // indicate that the global stack has overflown
   bool has_overflown()           { return _has_overflown; }
   void set_has_overflown()       { _has_overflown = true; }
   void clear_has_overflown()     { _has_overflown = false; }
@@ -684,68 +608,6 @@
   bool mark_stack_overflow()              { return _markStack.overflow(); }
   bool mark_stack_empty()                 { return _markStack.isEmpty(); }
 
-  // (Lock-free) Manipulation of the region stack
-  bool region_stack_push_lock_free(MemRegion mr) {
-    // Currently we only call the lock-free version during evacuation
-    // pauses.
-    assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
-
-    _regionStack.push_lock_free(mr);
-    if (_regionStack.overflow()) {
-      set_has_overflown();
-      return false;
-    }
-    return true;
-  }
-
-  // Lock-free version of region-stack pop. Should only be
-  // called in tandem with other lock-free pops.
-  MemRegion region_stack_pop_lock_free() {
-    return _regionStack.pop_lock_free();
-  }
-
-#if 0
-  // The routines that manipulate the region stack with a lock are
-  // not currently used. They should be retained, however, as a
-  // diagnostic aid.
-
-  bool region_stack_push_with_lock(MemRegion mr) {
-    // Currently we only call the lock-based version during either
-    // concurrent marking or remark.
-    assert(!SafepointSynchronize::is_at_safepoint() || !concurrent(),
-           "if we are at a safepoint it should be the remark safepoint");
-
-    _regionStack.push_with_lock(mr);
-    if (_regionStack.overflow()) {
-      set_has_overflown();
-      return false;
-    }
-    return true;
-  }
-
-  MemRegion region_stack_pop_with_lock() {
-    // Currently we only call the lock-based version during either
-    // concurrent marking or remark.
-    assert(!SafepointSynchronize::is_at_safepoint() || !concurrent(),
-           "if we are at a safepoint it should be the remark safepoint");
-
-    return _regionStack.pop_with_lock();
-  }
-#endif
-
-  int region_stack_size()               { return _regionStack.size(); }
-  bool region_stack_overflow()          { return _regionStack.overflow(); }
-  bool region_stack_empty()             { return _regionStack.isEmpty(); }
-
-  // Iterate over any regions that were aborted while draining the
-  // region stack (any such regions are saved in the corresponding
-  // CMTask) and invalidate (i.e. assign to the empty MemRegion())
-  // any regions that point into the collection set.
-  bool invalidate_aborted_regions_in_cset();
-
-  // Returns true if there are any aborted memory regions.
-  bool has_aborted_regions();
-
   CMRootRegions* root_regions() { return &_root_regions; }
 
   bool concurrent_marking_in_progress() {
@@ -774,11 +636,7 @@
     return _task_queues->steal(task_num, hash_seed, obj);
   }
 
-  // It grays an object by first marking it. Then, if it's behind the
-  // global finger, it also pushes it on the global stack.
-  void deal_with_reference(oop obj);
-
-  ConcurrentMark(ReservedSpace rs, int max_regions);
+  ConcurrentMark(ReservedSpace rs, uint max_regions);
   ~ConcurrentMark();
 
   ConcurrentMarkThread* cmThread() { return _cmThread; }
@@ -810,22 +668,6 @@
   inline void grayRoot(oop obj, size_t word_size,
                        uint worker_id, HeapRegion* hr = NULL);
 
-  // It's used during evacuation pauses to gray a region, if
-  // necessary, and it's MT-safe. It assumes that the caller has
-  // marked any objects on that region. If _should_gray_objects is
-  // true and we're still doing concurrent marking, the region is
-  // pushed on the region stack, if it is located below the global
-  // finger, otherwise we do nothing.
-  void grayRegionIfNecessary(MemRegion mr);
-
-  // It's used during evacuation pauses to mark and, if necessary,
-  // gray a single object and it's MT-safe. It assumes the caller did
-  // not mark the object. If _should_gray_objects is true and we're
-  // still doing concurrent marking, the objects is pushed on the
-  // global stack, if it is located below the global finger, otherwise
-  // we do nothing.
-  void markAndGrayObjectIfNecessary(oop p);
-
   // It iterates over the heap and for each object it comes across it
   // will dump the contents of its reference fields, as well as
   // liveness information for the object and its referents. The dump
@@ -869,10 +711,6 @@
   // Do concurrent phase of marking, to a tentative transitive closure.
   void markFromRoots();
 
-  // Process all unprocessed SATB buffers. It is called at the
-  // beginning of an evacuation pause.
-  void drainAllSATBBuffers();
-
   void checkpointRootsFinal(bool clear_all_soft_refs);
   void checkpointRootsFinalWork();
   void cleanup();
@@ -899,10 +737,6 @@
     _markStack.note_end_of_gc();
   }
 
-  // Iterate over the oops in the mark stack and all local queues. It
-  // also calls invalidate_entries_into_cset() on the region stack.
-  void oops_do(OopClosure* f);
-
   // Verify that there are no CSet oops on the stacks (taskqueues /
   // global mark stack), enqueued SATB buffers, per-thread SATB
   // buffers, and fingers (global / per-task). The boolean parameters
@@ -919,40 +753,6 @@
   // unless the force parameter is true.
   void update_g1_committed(bool force = false);
 
-  void complete_marking_in_collection_set();
-
-  // It indicates that a new collection set is being chosen.
-  void newCSet();
-
-  // It registers a collection set heap region with CM. This is used
-  // to determine whether any heap regions are located above the finger.
-  void registerCSetRegion(HeapRegion* hr);
-
-  // Resets the region fields of any active CMTask whose region fields
-  // are in the collection set (i.e. the region currently claimed by
-  // the CMTask will be evacuated and may be used, subsequently, as
-  // an alloc region). When this happens the region fields in the CMTask
-  // are stale and, hence, should be cleared causing the worker thread
-  // to claim a new region.
-  void reset_active_task_region_fields_in_cset();
-
-  // Registers the maximum region-end associated with a set of
-  // regions with CM. Again this is used to determine whether any
-  // heap regions are located above the finger.
-  void register_collection_set_finger(HeapWord* max_finger) {
-    // max_finger is the highest heap region end of the regions currently
-    // contained in the collection set. If this value is larger than
-    // _min_finger then we need to gray objects.
-    // This routine is like registerCSetRegion but for an entire
-    // collection of regions.
-    if (max_finger > _min_finger) {
-      _should_gray_objects = true;
-    }
-  }
-
-  // Returns "true" if at least one mark has been completed.
-  bool at_least_one_mark_complete() { return _at_least_one_mark_complete; }
-
   bool isMarked(oop p) const {
     assert(p != NULL && p->is_oop(), "expected an oop");
     HeapWord* addr = (HeapWord*)p;
@@ -1164,23 +964,6 @@
   // limit of the region this task is scanning, NULL if we're not scanning one
   HeapWord*                   _region_limit;
 
-  // This is used only when we scan regions popped from the region
-  // stack. It records what the last object on such a region we
-  // scanned was. It is used to ensure that, if we abort region
-  // iteration, we do not rescan the first part of the region. This
-  // should be NULL when we're not scanning a region from the region
-  // stack.
-  HeapWord*                   _region_finger;
-
-  // If we abort while scanning a region we record the remaining
-  // unscanned portion and check this field when marking restarts.
-  // This avoids having to push on the region stack while other
-  // marking threads may still be popping regions.
-  // If we were to push the unscanned portion directly to the
-  // region stack then we would need to using locking versions
-  // of the push and pop operations.
-  MemRegion                   _aborted_region;
-
   // the number of words this task has scanned
   size_t                      _words_scanned;
   // When _words_scanned reaches this limit, the regular clock is
@@ -1268,8 +1051,6 @@
   int                         _global_transfers_to;
   int                         _global_transfers_from;
 
-  int                         _region_stack_pops;
-
   int                         _regions_claimed;
   int                         _objs_found_on_bitmap;
 
@@ -1347,15 +1128,6 @@
   bool has_timed_out()          { return _has_timed_out; }
   bool claimed()                { return _claimed; }
 
-  // Support routines for the partially scanned region that may be
-  // recorded as a result of aborting while draining the CMRegionStack
-  MemRegion aborted_region()    { return _aborted_region; }
-  void set_aborted_region(MemRegion mr)
-                                { _aborted_region = mr; }
-
-  // Clears any recorded partially scanned region
-  void clear_aborted_region()   { set_aborted_region(MemRegion()); }
-
   void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
 
   // It grays the object by marking it and, if necessary, pushing it
@@ -1385,22 +1157,12 @@
   // buffers are available.
   void drain_satb_buffers();
 
-  // It keeps popping regions from the region stack and processing
-  // them until the region stack is empty.
-  void drain_region_stack(BitMapClosure* closure);
-
   // moves the local finger to a new location
   inline void move_finger_to(HeapWord* new_finger) {
     assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
     _finger = new_finger;
   }
 
-  // moves the region finger to a new location
-  inline void move_region_finger_to(HeapWord* new_finger) {
-    assert(new_finger < _cm->finger(), "invariant");
-    _region_finger = new_finger;
-  }
-
   CMTask(int task_num, ConcurrentMark *cm,
          size_t* marked_bytes, BitMap* card_bm,
          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
--- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -49,7 +49,7 @@
   HeapWord* start = mr.start();
   HeapWord* last = mr.last();
   size_t region_size_bytes = mr.byte_size();
-  size_t index = hr->hrs_index();
+  uint index = hr->hrs_index();
 
   assert(!hr->continuesHumongous(), "should not be HC region");
   assert(hr == g1h->heap_region_containing(start), "sanity");
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,6 +26,7 @@
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MMUTracker.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
 #include "memory/resourceArea.hpp"
@@ -104,7 +105,7 @@
 
       double scan_start = os::elapsedTime();
       if (!cm()->has_aborted()) {
-        if (PrintGC) {
+        if (G1Log::fine()) {
           gclog_or_tty->date_stamp(PrintGCDateStamps);
           gclog_or_tty->stamp(PrintGCTimeStamps);
           gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
@@ -113,7 +114,7 @@
         _cm->scanRootRegions();
 
         double scan_end = os::elapsedTime();
-        if (PrintGC) {
+        if (G1Log::fine()) {
           gclog_or_tty->date_stamp(PrintGCDateStamps);
           gclog_or_tty->stamp(PrintGCTimeStamps);
           gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf]",
@@ -122,7 +123,7 @@
       }
 
       double mark_start_sec = os::elapsedTime();
-      if (PrintGC) {
+      if (G1Log::fine()) {
         gclog_or_tty->date_stamp(PrintGCDateStamps);
         gclog_or_tty->stamp(PrintGCTimeStamps);
         gclog_or_tty->print_cr("[GC concurrent-mark-start]");
@@ -146,7 +147,7 @@
             os::sleep(current_thread, sleep_time_ms, false);
           }
 
-          if (PrintGC) {
+          if (G1Log::fine()) {
             gclog_or_tty->date_stamp(PrintGCDateStamps);
             gclog_or_tty->stamp(PrintGCTimeStamps);
             gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
@@ -165,7 +166,7 @@
         }
 
         if (cm()->restart_for_overflow()) {
-          if (PrintGC) {
+          if (G1Log::fine()) {
             gclog_or_tty->date_stamp(PrintGCDateStamps);
             gclog_or_tty->stamp(PrintGCTimeStamps);
             gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
@@ -211,7 +212,7 @@
         // reclaimed by cleanup.
 
         double cleanup_start_sec = os::elapsedTime();
-        if (PrintGC) {
+        if (G1Log::fine()) {
           gclog_or_tty->date_stamp(PrintGCDateStamps);
           gclog_or_tty->stamp(PrintGCTimeStamps);
           gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
@@ -232,7 +233,7 @@
         g1h->reset_free_regions_coming();
 
         double cleanup_end_sec = os::elapsedTime();
-        if (PrintGC) {
+        if (G1Log::fine()) {
           gclog_or_tty->date_stamp(PrintGCDateStamps);
           gclog_or_tty->stamp(PrintGCTimeStamps);
           gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
@@ -273,7 +274,7 @@
       _sts.leave();
 
       if (cm()->has_aborted()) {
-        if (PrintGC) {
+        if (G1Log::fine()) {
           gclog_or_tty->date_stamp(PrintGCDateStamps);
           gclog_or_tty->stamp(PrintGCTimeStamps);
           gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
@@ -292,7 +293,7 @@
     // Java thread is waiting for a full GC to happen (e.g., it
     // called System.gc() with +ExplicitGCInvokesConcurrent).
     _sts.join();
-    g1h->increment_full_collections_completed(true /* concurrent */);
+    g1h->increment_old_marking_cycles_completed(true /* concurrent */);
     _sts.leave();
   }
   assert(_should_terminate, "just checking");
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,7 @@
 
 // A closure class for processing card table entries.  Note that we don't
 // require these closure objects to be stack-allocated.
-class CardTableEntryClosure: public CHeapObj {
+class CardTableEntryClosure: public CHeapObj<mtGC> {
 public:
   // Process the card whose card table entry is "card_ptr".  If returns
   // "false", terminate the iteration early.
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -140,7 +140,7 @@
 }
 
 void G1AllocRegion::fill_in_ext_msg(ar_ext_msg* msg, const char* message) {
-  msg->append("[%s] %s c: "SIZE_FORMAT" b: %s r: "PTR_FORMAT" u: "SIZE_FORMAT,
+  msg->append("[%s] %s c: %u b: %s r: "PTR_FORMAT" u: "SIZE_FORMAT,
               _name, message, _count, BOOL_TO_STR(_bot_updates),
               _alloc_region, _used_bytes_before);
 }
@@ -215,7 +215,7 @@
       jio_snprintf(rest_buffer, buffer_length, "");
     }
 
-    tty->print_cr("[%s] "SIZE_FORMAT" %s : %s %s",
+    tty->print_cr("[%s] %u %s : %s %s",
                   _name, _count, hr_buffer, str, rest_buffer);
   }
 }
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,7 +64,7 @@
   // the region that is re-used using the set() method. This count can
   // be used in any heuristics that might want to bound how many
   // distinct regions this object can used during an active interval.
-  size_t _count;
+  uint _count;
 
   // When we set up a new active region we save its used bytes in this
   // field so that, when we retire it, we can calculate how much space
@@ -136,7 +136,7 @@
     return (_alloc_region == _dummy_region) ? NULL : _alloc_region;
   }
 
-  size_t count() { return _count; }
+  uint count() { return _count; }
 
   // The following two are the building blocks for the allocation method.
 
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,6 +27,7 @@
 #include "memory/space.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
+#include "services/memTracker.hpp"
 
 //////////////////////////////////////////////////////////////////////
 // G1BlockOffsetSharedArray
@@ -44,6 +45,9 @@
   if (!_vs.initialize(rs, 0)) {
     vm_exit_during_initialization("Could not reserve enough space for heap offset array");
   }
+
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+
   _offset_array = (u_char*)_vs.low_boundary();
   resize(init_word_size);
   if (TraceBlockOffsetTable) {
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -117,7 +117,7 @@
 
 // Here is the shared array type.
 
-class G1BlockOffsetSharedArray: public CHeapObj {
+class G1BlockOffsetSharedArray: public CHeapObj<mtGC> {
   friend class G1BlockOffsetArray;
   friend class G1BlockOffsetArrayContigSpace;
   friend class VMStructs;
@@ -159,14 +159,30 @@
            "right address out of range");
     assert(left  < right, "Heap addresses out of order");
     size_t num_cards = pointer_delta(right, left) >> LogN_words;
-    memset(&_offset_array[index_for(left)], offset, num_cards);
+    if (UseMemSetInBOT) {
+      memset(&_offset_array[index_for(left)], offset, num_cards);
+    } else {
+      size_t i = index_for(left);
+      const size_t end = i + num_cards;
+      for (; i < end; i++) {
+        _offset_array[i] = offset;
+      }
+    }
   }
 
   void set_offset_array(size_t left, size_t right, u_char offset) {
     assert(right < _vs.committed_size(), "right address out of range");
-    assert(left  <= right, "indexes out of order");
+    assert(left <= right, "indexes out of order");
     size_t num_cards = right - left + 1;
-    memset(&_offset_array[left], offset, num_cards);
+    if (UseMemSetInBOT) {
+      memset(&_offset_array[left], offset, num_cards);
+    } else {
+      size_t i = left;
+      const size_t end = i + num_cards;
+      for (; i < end; i++) {
+        _offset_array[i] = offset;
+      }
+    }
   }
 
   void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,6 +33,8 @@
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1EvacFailure.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
@@ -233,7 +235,7 @@
 bool YoungList::check_list_well_formed() {
   bool ret = true;
 
-  size_t length = 0;
+  uint length = 0;
   HeapRegion* curr = _head;
   HeapRegion* last = NULL;
   while (curr != NULL) {
@@ -252,7 +254,7 @@
 
   if (!ret) {
     gclog_or_tty->print_cr("### YOUNG LIST seems not well formed!");
-    gclog_or_tty->print_cr("###   list has %d entries, _length is %d",
+    gclog_or_tty->print_cr("###   list has %u entries, _length is %u",
                            length, _length);
   }
 
@@ -263,7 +265,7 @@
   bool ret = true;
 
   if (_length != 0) {
-    gclog_or_tty->print_cr("### YOUNG LIST should have 0 length, not %d",
+    gclog_or_tty->print_cr("### YOUNG LIST should have 0 length, not %u",
                   _length);
     ret = false;
   }
@@ -336,8 +338,7 @@
     _g1h->g1_policy()->add_region_to_incremental_cset_rhs(curr);
     young_index_in_cset += 1;
   }
-  assert((size_t) young_index_in_cset == _survivor_length,
-         "post-condition");
+  assert((uint) young_index_in_cset == _survivor_length, "post-condition");
   _g1h->g1_policy()->note_stop_adding_survivor_regions();
 
   _head   = _survivor_head;
@@ -368,16 +369,11 @@
     if (curr == NULL)
       gclog_or_tty->print_cr("  empty");
     while (curr != NULL) {
-      gclog_or_tty->print_cr("  [%08x-%08x], t: %08x, P: %08x, N: %08x, C: %08x, "
-                             "age: %4d, y: %d, surv: %d",
-                             curr->bottom(), curr->end(),
-                             curr->top(),
+      gclog_or_tty->print_cr("  "HR_FORMAT", P: "PTR_FORMAT "N: "PTR_FORMAT", age: %4d",
+                             HR_FORMAT_PARAMS(curr),
                              curr->prev_top_at_mark_start(),
                              curr->next_top_at_mark_start(),
-                             curr->top_at_conc_mark_count(),
-                             curr->age_in_surv_rate_group_cond(),
-                             curr->is_young(),
-                             curr->is_survivor());
+                             curr->age_in_surv_rate_group_cond());
       curr = curr->get_next_young_region();
     }
   }
@@ -532,7 +528,7 @@
     if (!_secondary_free_list.is_empty()) {
       if (G1ConcRegionFreeingVerbose) {
         gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : "
-                               "secondary_free_list has "SIZE_FORMAT" entries",
+                               "secondary_free_list has %u entries",
                                _secondary_free_list.length());
       }
       // It looks as if there are free regions available on the
@@ -618,12 +614,12 @@
   return res;
 }
 
-size_t G1CollectedHeap::humongous_obj_allocate_find_first(size_t num_regions,
-                                                          size_t word_size) {
+uint G1CollectedHeap::humongous_obj_allocate_find_first(uint num_regions,
+                                                        size_t word_size) {
   assert(isHumongous(word_size), "word_size should be humongous");
   assert(num_regions * HeapRegion::GrainWords >= word_size, "pre-condition");
 
-  size_t first = G1_NULL_HRS_INDEX;
+  uint first = G1_NULL_HRS_INDEX;
   if (num_regions == 1) {
     // Only one region to allocate, no need to go through the slower
     // path. The caller will attempt the expasion if this fails, so
@@ -649,7 +645,7 @@
     if (free_regions() >= num_regions) {
       first = _hrs.find_contiguous(num_regions);
       if (first != G1_NULL_HRS_INDEX) {
-        for (size_t i = first; i < first + num_regions; ++i) {
+        for (uint i = first; i < first + num_regions; ++i) {
           HeapRegion* hr = region_at(i);
           assert(hr->is_empty(), "sanity");
           assert(is_on_master_free_list(hr), "sanity");
@@ -663,15 +659,15 @@
 }
 
 HeapWord*
-G1CollectedHeap::humongous_obj_allocate_initialize_regions(size_t first,
-                                                           size_t num_regions,
+G1CollectedHeap::humongous_obj_allocate_initialize_regions(uint first,
+                                                           uint num_regions,
                                                            size_t word_size) {
   assert(first != G1_NULL_HRS_INDEX, "pre-condition");
   assert(isHumongous(word_size), "word_size should be humongous");
   assert(num_regions * HeapRegion::GrainWords >= word_size, "pre-condition");
 
   // Index of last region in the series + 1.
-  size_t last = first + num_regions;
+  uint last = first + num_regions;
 
   // We need to initialize the region(s) we just discovered. This is
   // a bit tricky given that it can happen concurrently with
@@ -682,7 +678,7 @@
   // a specific order.
 
   // The word size sum of all the regions we will allocate.
-  size_t word_size_sum = num_regions * HeapRegion::GrainWords;
+  size_t word_size_sum = (size_t) num_regions * HeapRegion::GrainWords;
   assert(word_size <= word_size_sum, "sanity");
 
   // This will be the "starts humongous" region.
@@ -721,7 +717,7 @@
   // Then, if there are any, we will set up the "continues
   // humongous" regions.
   HeapRegion* hr = NULL;
-  for (size_t i = first + 1; i < last; ++i) {
+  for (uint i = first + 1; i < last; ++i) {
     hr = region_at(i);
     hr->set_continuesHumongous(first_hr);
   }
@@ -767,7 +763,7 @@
   // last one) is actually used when we will free up the humongous
   // region in free_humongous_region().
   hr = NULL;
-  for (size_t i = first + 1; i < last; ++i) {
+  for (uint i = first + 1; i < last; ++i) {
     hr = region_at(i);
     if ((i + 1) == last) {
       // last continues humongous region
@@ -803,14 +799,14 @@
 
   verify_region_sets_optional();
 
-  size_t num_regions =
-         round_to(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords;
-  size_t x_size = expansion_regions();
-  size_t fs = _hrs.free_suffix();
-  size_t first = humongous_obj_allocate_find_first(num_regions, word_size);
+  size_t word_size_rounded = round_to(word_size, HeapRegion::GrainWords);
+  uint num_regions = (uint) (word_size_rounded / HeapRegion::GrainWords);
+  uint x_num = expansion_regions();
+  uint fs = _hrs.free_suffix();
+  uint first = humongous_obj_allocate_find_first(num_regions, word_size);
   if (first == G1_NULL_HRS_INDEX) {
     // The only thing we can do now is attempt expansion.
-    if (fs + x_size >= num_regions) {
+    if (fs + x_num >= num_regions) {
       // If the number of regions we're trying to allocate for this
       // object is at most the number of regions in the free suffix,
       // then the call to humongous_obj_allocate_find_first() above
@@ -957,9 +953,18 @@
         }
         should_try_gc = false;
       } else {
-        // Read the GC count while still holding the Heap_lock.
-        gc_count_before = total_collections();
-        should_try_gc = true;
+        // The GCLocker may not be active but the GCLocker initiated
+        // GC may not yet have been performed (GCLocker::needs_gc()
+        // returns true). In this case we do not try this GC and
+        // wait until the GCLocker initiated GC is performed, and
+        // then retry the allocation.
+        if (GC_locker::needs_gc()) {
+          should_try_gc = false;
+        } else {
+          // Read the GC count while still holding the Heap_lock.
+          gc_count_before = total_collections();
+          should_try_gc = true;
+        }
       }
     }
 
@@ -980,6 +985,9 @@
         return NULL;
       }
     } else {
+      // The GCLocker is either active or the GCLocker initiated
+      // GC has not yet been performed. Stall until it is and
+      // then retry the allocation.
       GC_locker::stall_until_clear();
     }
 
@@ -1059,9 +1067,18 @@
       if (GC_locker::is_active_and_needs_gc()) {
         should_try_gc = false;
       } else {
-        // Read the GC count while still holding the Heap_lock.
-        gc_count_before = total_collections();
-        should_try_gc = true;
+         // The GCLocker may not be active but the GCLocker initiated
+        // GC may not yet have been performed (GCLocker::needs_gc()
+        // returns true). In this case we do not try this GC and
+        // wait until the GCLocker initiated GC is performed, and
+        // then retry the allocation.
+        if (GC_locker::needs_gc()) {
+          should_try_gc = false;
+        } else {
+          // Read the GC count while still holding the Heap_lock.
+          gc_count_before = total_collections();
+          should_try_gc = true;
+        }
       }
     }
 
@@ -1086,6 +1103,9 @@
         return NULL;
       }
     } else {
+      // The GCLocker is either active or the GCLocker initiated
+      // GC has not yet been performed. Stall until it is and
+      // then retry the allocation.
       GC_locker::stall_until_clear();
     }
 
@@ -1129,13 +1149,16 @@
 }
 
 class PostMCRemSetClearClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   ModRefBarrierSet* _mr_bs;
 public:
-  PostMCRemSetClearClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
+  PostMCRemSetClearClosure(G1CollectedHeap* g1h, ModRefBarrierSet* mr_bs) :
+    _g1h(g1h), _mr_bs(mr_bs) { }
   bool doHeapRegion(HeapRegion* r) {
-    r->reset_gc_time_stamp();
-    if (r->continuesHumongous())
+    if (r->continuesHumongous()) {
       return false;
+    }
+    _g1h->reset_gc_time_stamps(r);
     HeapRegionRemSet* hrrs = r->rem_set();
     if (hrrs != NULL) hrrs->clear();
     // You might think here that we could clear just the cards
@@ -1148,19 +1171,10 @@
   }
 };
 
-
-class PostMCRemSetInvalidateClosure: public HeapRegionClosure {
-  ModRefBarrierSet* _mr_bs;
-public:
-  PostMCRemSetInvalidateClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
-  bool doHeapRegion(HeapRegion* r) {
-    if (r->continuesHumongous()) return false;
-    if (r->used_region().word_size() != 0) {
-      _mr_bs->invalidate(r->used_region(), true /*whole heap*/);
-    }
-    return false;
-  }
-};
+void G1CollectedHeap::clear_rsets_post_compaction() {
+  PostMCRemSetClearClosure rs_clear(this, mr_bs());
+  heap_region_iterate(&rs_clear);
+}
 
 class RebuildRSOutOfRegionClosure: public HeapRegionClosure {
   G1CollectedHeap*   _g1h;
@@ -1209,7 +1223,7 @@
       if (!hr->isHumongous()) {
         _hr_printer->post_compaction(hr, G1HRPrinter::Old);
       } else if (hr->startsHumongous()) {
-        if (hr->capacity() == HeapRegion::GrainBytes) {
+        if (hr->region_num() == 1) {
           // single humongous region
           _hr_printer->post_compaction(hr, G1HRPrinter::SingleHumongous);
         } else {
@@ -1227,6 +1241,36 @@
     : _hr_printer(hr_printer) { }
 };
 
+void G1CollectedHeap::print_hrs_post_compaction() {
+  PostCompactionPrinterClosure cl(hr_printer());
+  heap_region_iterate(&cl);
+}
+
+double G1CollectedHeap::verify(bool guard, const char* msg) {
+  double verify_time_ms = 0.0;
+
+  if (guard && total_collections() >= VerifyGCStartAt) {
+    double verify_start = os::elapsedTime();
+    HandleMark hm;  // Discard invalid handles created during verification
+    gclog_or_tty->print(msg);
+    prepare_for_verify();
+    Universe::verify(false /* silent */, VerifyOption_G1UsePrevMarking);
+    verify_time_ms = (os::elapsedTime() - verify_start) * 1000;
+  }
+
+  return verify_time_ms;
+}
+
+void G1CollectedHeap::verify_before_gc() {
+  double verify_time_ms = verify(VerifyBeforeGC, " VerifyBeforeGC:");
+  g1_policy()->phase_times()->record_verify_before_time_ms(verify_time_ms);
+}
+
+void G1CollectedHeap::verify_after_gc() {
+  double verify_time_ms = verify(VerifyAfterGC, " VerifyAfterGC:");
+  g1_policy()->phase_times()->record_verify_after_time_ms(verify_time_ms);
+}
+
 bool G1CollectedHeap::do_collection(bool explicit_gc,
                                     bool clear_all_soft_refs,
                                     size_t word_size) {
@@ -1253,13 +1297,11 @@
     IsGCActiveMark x;
 
     // Timing
-    bool system_gc = (gc_cause() == GCCause::_java_lang_system_gc);
-    assert(!system_gc || explicit_gc, "invariant");
-    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
-    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    TraceTime t(system_gc ? "Full GC (System.gc())" : "Full GC",
-                PrintGC, true, gclog_or_tty);
-
+    assert(gc_cause() != GCCause::_java_lang_system_gc || explicit_gc, "invariant");
+    gclog_or_tty->date_stamp(G1Log::fine() && PrintGCDateStamps);
+    TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
+
+    TraceTime t(GCCauseString("Full GC", gc_cause()), G1Log::fine(), true, gclog_or_tty);
     TraceCollectorStats tcs(g1mm()->full_collection_counters());
     TraceMemoryManagerStats tms(true /* fullGC */, gc_cause());
 
@@ -1282,19 +1324,13 @@
 
     gc_prologue(true);
     increment_total_collections(true /* full gc */);
+    increment_old_marking_cycles_started();
 
     size_t g1h_prev_used = used();
     assert(used() == recalculate_used(), "Should be equal");
 
-    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
-      HandleMark hm;  // Discard invalid handles created during verification
-      gclog_or_tty->print(" VerifyBeforeGC:");
-      prepare_for_verify();
-      Universe::verify(/* allow dirty */ true,
-                       /* silent      */ false,
-                       /* option      */ VerifyOption_G1UsePrevMarking);
-
-    }
+    verify_before_gc();
+
     pre_full_gc_dump();
 
     COMPILER2_PRESENT(DerivedPointerTable::clear());
@@ -1361,15 +1397,7 @@
 
     MemoryService::track_memory_usage();
 
-    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
-      HandleMark hm;  // Discard invalid handles created during verification
-      gclog_or_tty->print(" VerifyAfterGC:");
-      prepare_for_verify();
-      Universe::verify(/* allow dirty */ false,
-                       /* silent      */ false,
-                       /* option      */ VerifyOption_G1UsePrevMarking);
-
-    }
+    verify_after_gc();
 
     assert(!ref_processor_stw()->discovery_enabled(), "Postcondition");
     ref_processor_stw()->verify_no_references_recorded();
@@ -1385,8 +1413,8 @@
     // Since everything potentially moved, we will clear all remembered
     // sets, and clear all cards.  Later we will rebuild remebered
     // sets. We will also reset the GC time stamps of the regions.
-    PostMCRemSetClearClosure rs_clear(mr_bs());
-    heap_region_iterate(&rs_clear);
+    clear_rsets_post_compaction();
+    check_gc_time_stamps();
 
     // Resize the heap if necessary.
     resize_if_necessary_after_full_collection(explicit_gc ? 0 : word_size);
@@ -1396,9 +1424,7 @@
       // that all the COMMIT / UNCOMMIT events are generated before
       // the end GC event.
 
-      PostCompactionPrinterClosure cl(hr_printer());
-      heap_region_iterate(&cl);
-
+      print_hrs_post_compaction();
       _hr_printer.end_gc(true /* full */, (size_t) total_collections());
     }
 
@@ -1444,7 +1470,7 @@
       heap_region_iterate(&rebuild_rs);
     }
 
-    if (PrintGC) {
+    if (G1Log::fine()) {
       print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity());
     }
 
@@ -1477,22 +1503,28 @@
     JavaThread::dirty_card_queue_set().abandon_logs();
     assert(!G1DeferredRSUpdate
            || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any");
-  }
-
-  _young_list->reset_sampled_info();
-  // At this point there should be no regions in the
-  // entire heap tagged as young.
-  assert( check_young_list_empty(true /* check_heap */),
-    "young list should be empty at this point");
-
-  // Update the number of full collections that have been completed.
-  increment_full_collections_completed(false /* concurrent */);
-
-  _hrs.verify_optional();
-  verify_region_sets_optional();
-
-  print_heap_after_gc();
-  g1mm()->update_sizes();
+
+    _young_list->reset_sampled_info();
+    // At this point there should be no regions in the
+    // entire heap tagged as young.
+    assert( check_young_list_empty(true /* check_heap */),
+      "young list should be empty at this point");
+
+    // Update the number of full collections that have been completed.
+    increment_old_marking_cycles_completed(false /* concurrent */);
+
+    _hrs.verify_optional();
+    verify_region_sets_optional();
+
+    print_heap_after_gc();
+
+    // We must call G1MonitoringSupport::update_sizes() in the same scoping level
+    // as an active TraceMemoryManagerStats object (i.e. before the destructor for the
+    // TraceMemoryManagerStats is called) so that the G1 memory pools are updated
+    // before any GC notifications are raised.
+    g1mm()->update_sizes();
+  }
+
   post_full_gc_dump();
 
   return true;
@@ -1782,7 +1814,7 @@
     ReservedSpace::page_align_size_down(shrink_bytes);
   aligned_shrink_bytes = align_size_down(aligned_shrink_bytes,
                                          HeapRegion::GrainBytes);
-  size_t num_regions_deleted = 0;
+  uint num_regions_deleted = 0;
   MemRegion mr = _hrs.shrink_by(aligned_shrink_bytes, &num_regions_deleted);
   HeapWord* old_end = (HeapWord*) _g1_storage.high();
   assert(mr.end() == old_end, "post-condition");
@@ -1871,9 +1903,12 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _retained_old_gc_alloc_region(NULL),
+  _survivor_plab_stats(YoungPLABSize, PLABWeight),
+  _old_plab_stats(OldPLABSize, PLABWeight),
   _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
-  _full_collections_completed(0),
+  _old_marking_cycles_started(0),
+  _old_marking_cycles_completed(0),
   _in_cset_fast_test(NULL),
   _in_cset_fast_test_base(NULL),
   _dirty_cards_region_list(NULL),
@@ -1893,14 +1928,14 @@
   assert(n_rem_sets > 0, "Invariant.");
 
   HeapRegionRemSetIterator** iter_arr =
-    NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues);
+    NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues, mtGC);
   for (int i = 0; i < n_queues; i++) {
     iter_arr[i] = new HeapRegionRemSetIterator();
   }
   _rem_set_iterator = iter_arr;
 
-  _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues);
-  _worker_cset_start_region_time_stamp = NEW_C_HEAP_ARRAY(unsigned int, n_queues);
+  _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues, mtGC);
+  _worker_cset_start_region_time_stamp = NEW_C_HEAP_ARRAY(unsigned int, n_queues, mtGC);
 
   for (int i = 0; i < n_queues; i++) {
     RefToScanQueue* q = new RefToScanQueue();
@@ -1910,13 +1945,26 @@
 
   clear_cset_start_regions();
 
+  // Initialize the G1EvacuationFailureALot counters and flags.
+  NOT_PRODUCT(reset_evacuation_should_fail();)
+
   guarantee(_task_queues != NULL, "task_queues allocation failure.");
+#ifdef SPARC
+  // Issue a stern warning, but allow use for experimentation and debugging.
+  if (VM_Version::is_sun4v() && UseMemSetInBOT) {
+    assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
+    warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
+            " on sun4v; please understand that you are using at your own risk!");
+  }
+#endif
 }
 
 jint G1CollectedHeap::initialize() {
   CollectedHeap::pre_initialize();
   os::enable_vtime();
 
+  G1Log::init();
+
   // Necessary to satisfy locking discipline assertions.
 
   MutexLocker x(Heap_lock);
@@ -2003,7 +2051,7 @@
   _reserved.set_start((HeapWord*)heap_rs.base());
   _reserved.set_end((HeapWord*)(heap_rs.base() + heap_rs.size()));
 
-  _expansion_regions = max_byte_size/HeapRegion::GrainBytes;
+  _expansion_regions = (uint) (max_byte_size / HeapRegion::GrainBytes);
 
   // Create the gen rem set (and barrier set) for the entire reserved region.
   _rem_set = collector_policy()->create_rem_set(_reserved, 2);
@@ -2040,7 +2088,7 @@
 
   // 6843694 - ensure that the maximum region index can fit
   // in the remembered set structures.
-  const size_t max_region_idx = ((size_t)1 << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1;
+  const uint max_region_idx = (1U << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1;
   guarantee((max_regions() - 1) <= max_region_idx, "too many regions");
 
   size_t max_cards_per_region = ((size_t)1 << (sizeof(CardIdx_t)*BitsPerByte-1)) - 1;
@@ -2056,13 +2104,14 @@
   _g1h = this;
 
    _in_cset_fast_test_length = max_regions();
-   _in_cset_fast_test_base = NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length);
+   _in_cset_fast_test_base =
+                   NEW_C_HEAP_ARRAY(bool, (size_t) _in_cset_fast_test_length, mtGC);
 
    // We're biasing _in_cset_fast_test to avoid subtracting the
    // beginning of the heap every time we want to index; basically
    // it's the same with what we do with the card table.
    _in_cset_fast_test = _in_cset_fast_test_base -
-                ((size_t) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
+               ((uintx) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
 
    // Clear the _cset_fast_test bitmap in anticipation of adding
    // regions to the incremental collection set for the first
@@ -2071,7 +2120,7 @@
 
   // Create the ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
-  _cm       = new ConcurrentMark(heap_rs, (int) max_regions());
+  _cm       = new ConcurrentMark(heap_rs, max_regions());
   _cmThread = _cm->cmThread();
 
   // Initialize the from_card cache structure of HeapRegionRemSet.
@@ -2236,6 +2285,51 @@
   return _g1_committed.byte_size();
 }
 
+void G1CollectedHeap::reset_gc_time_stamps(HeapRegion* hr) {
+  assert(!hr->continuesHumongous(), "pre-condition");
+  hr->reset_gc_time_stamp();
+  if (hr->startsHumongous()) {
+    uint first_index = hr->hrs_index() + 1;
+    uint last_index = hr->last_hc_index();
+    for (uint i = first_index; i < last_index; i += 1) {
+      HeapRegion* chr = region_at(i);
+      assert(chr->continuesHumongous(), "sanity");
+      chr->reset_gc_time_stamp();
+    }
+  }
+}
+
+#ifndef PRODUCT
+class CheckGCTimeStampsHRClosure : public HeapRegionClosure {
+private:
+  unsigned _gc_time_stamp;
+  bool _failures;
+
+public:
+  CheckGCTimeStampsHRClosure(unsigned gc_time_stamp) :
+    _gc_time_stamp(gc_time_stamp), _failures(false) { }
+
+  virtual bool doHeapRegion(HeapRegion* hr) {
+    unsigned region_gc_time_stamp = hr->get_gc_time_stamp();
+    if (_gc_time_stamp != region_gc_time_stamp) {
+      gclog_or_tty->print_cr("Region "HR_FORMAT" has GC time stamp = %d, "
+                             "expected %d", HR_FORMAT_PARAMS(hr),
+                             region_gc_time_stamp, _gc_time_stamp);
+      _failures = true;
+    }
+    return false;
+  }
+
+  bool failures() { return _failures; }
+};
+
+void G1CollectedHeap::check_gc_time_stamps() {
+  CheckGCTimeStampsHRClosure cl(_gc_time_stamp);
+  heap_region_iterate(&cl);
+  guarantee(!cl.failures(), "all GC time stamps should have been reset");
+}
+#endif // PRODUCT
+
 void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl,
                                                  DirtyCardQueue* into_cset_dcq,
                                                  bool concurrent,
@@ -2248,8 +2342,7 @@
   while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
-  g1_policy()->record_update_rs_processed_buffers(worker_i,
-                                                  (double) n_completed_buffers);
+  g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i, n_completed_buffers);
   dcqs.clear_n_completed_buffers();
   assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
 }
@@ -2342,7 +2435,16 @@
 }
 #endif // !PRODUCT
 
-void G1CollectedHeap::increment_full_collections_completed(bool concurrent) {
+void G1CollectedHeap::increment_old_marking_cycles_started() {
+  assert(_old_marking_cycles_started == _old_marking_cycles_completed ||
+    _old_marking_cycles_started == _old_marking_cycles_completed + 1,
+    err_msg("Wrong marking cycle count (started: %d, completed: %d)",
+    _old_marking_cycles_started, _old_marking_cycles_completed));
+
+  _old_marking_cycles_started++;
+}
+
+void G1CollectedHeap::increment_old_marking_cycles_completed(bool concurrent) {
   MonitorLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
 
   // We assume that if concurrent == true, then the caller is a
@@ -2350,11 +2452,6 @@
   // Set. If there's ever a cheap way to check this, we should add an
   // assert here.
 
-  // We have already incremented _total_full_collections at the start
-  // of the GC, so total_full_collections() represents how many full
-  // collections have been started.
-  unsigned int full_collections_started = total_full_collections();
-
   // Given that this method is called at the end of a Full GC or of a
   // concurrent cycle, and those can be nested (i.e., a Full GC can
   // interrupt a concurrent cycle), the number of full collections
@@ -2364,21 +2461,21 @@
 
   // This is the case for the inner caller, i.e. a Full GC.
   assert(concurrent ||
-         (full_collections_started == _full_collections_completed + 1) ||
-         (full_collections_started == _full_collections_completed + 2),
-         err_msg("for inner caller (Full GC): full_collections_started = %u "
-                 "is inconsistent with _full_collections_completed = %u",
-                 full_collections_started, _full_collections_completed));
+         (_old_marking_cycles_started == _old_marking_cycles_completed + 1) ||
+         (_old_marking_cycles_started == _old_marking_cycles_completed + 2),
+         err_msg("for inner caller (Full GC): _old_marking_cycles_started = %u "
+                 "is inconsistent with _old_marking_cycles_completed = %u",
+                 _old_marking_cycles_started, _old_marking_cycles_completed));
 
   // This is the case for the outer caller, i.e. the concurrent cycle.
   assert(!concurrent ||
-         (full_collections_started == _full_collections_completed + 1),
+         (_old_marking_cycles_started == _old_marking_cycles_completed + 1),
          err_msg("for outer caller (concurrent cycle): "
-                 "full_collections_started = %u "
-                 "is inconsistent with _full_collections_completed = %u",
-                 full_collections_started, _full_collections_completed));
-
-  _full_collections_completed += 1;
+                 "_old_marking_cycles_started = %u "
+                 "is inconsistent with _old_marking_cycles_completed = %u",
+                 _old_marking_cycles_started, _old_marking_cycles_completed));
+
+  _old_marking_cycles_completed += 1;
 
   // We need to clear the "in_progress" flag in the CM thread before
   // we wake up any waiters (especially when ExplicitInvokesConcurrent
@@ -2414,7 +2511,7 @@
   assert_heap_not_locked();
 
   unsigned int gc_count_before;
-  unsigned int full_gc_count_before;
+  unsigned int old_marking_count_before;
   bool retry_gc;
 
   do {
@@ -2425,7 +2522,7 @@
 
       // Read the GC count while holding the Heap_lock
       gc_count_before = total_collections();
-      full_gc_count_before = total_full_collections();
+      old_marking_count_before = _old_marking_cycles_started;
     }
 
     if (should_do_concurrent_full_gc(cause)) {
@@ -2440,7 +2537,7 @@
 
       VMThread::execute(&op);
       if (!op.pause_succeeded()) {
-        if (full_gc_count_before == total_full_collections()) {
+        if (old_marking_count_before == _old_marking_cycles_started) {
           retry_gc = op.should_retry_gc();
         } else {
           // A Full GC happened while we were trying to schedule the
@@ -2468,7 +2565,7 @@
         VMThread::execute(&op);
       } else {
         // Schedule a Full GC.
-        VM_G1CollectFull op(gc_count_before, full_gc_count_before, cause);
+        VM_G1CollectFull op(gc_count_before, old_marking_count_before, cause);
         VMThread::execute(&op);
       }
     }
@@ -2499,7 +2596,7 @@
   IterateOopClosureRegionClosure(MemRegion mr, OopClosure* cl)
     : _mr(mr), _cl(cl) {}
   bool doHeapRegion(HeapRegion* r) {
-    if (! r->continuesHumongous()) {
+    if (!r->continuesHumongous()) {
       r->oop_iterate(_cl);
     }
     return false;
@@ -2570,17 +2667,12 @@
   _hrs.iterate(cl);
 }
 
-void G1CollectedHeap::heap_region_iterate_from(HeapRegion* r,
-                                               HeapRegionClosure* cl) const {
-  _hrs.iterate_from(r, cl);
-}
-
 void
 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
-                                                 uint worker,
+                                                 uint worker_id,
                                                  uint no_of_par_workers,
                                                  jint claim_value) {
-  const size_t regions = n_regions();
+  const uint regions = n_regions();
   const uint max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                              no_of_par_workers :
                              1);
@@ -2588,11 +2680,13 @@
          no_of_par_workers == workers()->total_workers(),
          "Non dynamic should use fixed number of workers");
   // try to spread out the starting points of the workers
-  const size_t start_index = regions / max_workers * (size_t) worker;
+  const HeapRegion* start_hr =
+                        start_region_for_worker(worker_id, no_of_par_workers);
+  const uint start_index = start_hr->hrs_index();
 
   // each worker will actually look at all regions
-  for (size_t count = 0; count < regions; ++count) {
-    const size_t index = (start_index + count) % regions;
+  for (uint count = 0; count < regions; ++count) {
+    const uint index = (start_index + count) % regions;
     assert(0 <= index && index < regions, "sanity");
     HeapRegion* r = region_at(index);
     // we'll ignore "continues humongous" regions (we'll process them
@@ -2614,7 +2708,7 @@
         // result, we might end up processing them twice. So, we'll do
         // them first (notice: most closures will ignore them anyway) and
         // then we'll do the "starts humongous" region.
-        for (size_t ch_index = index + 1; ch_index < regions; ++ch_index) {
+        for (uint ch_index = index + 1; ch_index < regions; ++ch_index) {
           HeapRegion* chr = region_at(ch_index);
 
           // if the region has already been claimed or it's not
@@ -2682,8 +2776,9 @@
 class CheckClaimValuesClosure : public HeapRegionClosure {
 private:
   jint _claim_value;
-  size_t _failures;
+  uint _failures;
   HeapRegion* _sh_region;
+
 public:
   CheckClaimValuesClosure(jint claim_value) :
     _claim_value(claim_value), _failures(0), _sh_region(NULL) { }
@@ -2711,9 +2806,7 @@
     }
     return false;
   }
-  size_t failures() {
-    return _failures;
-  }
+  uint failures() { return _failures; }
 };
 
 bool G1CollectedHeap::check_heap_region_claim_values(jint claim_value) {
@@ -2723,17 +2816,15 @@
 }
 
 class CheckClaimValuesInCSetHRClosure: public HeapRegionClosure {
-  jint   _claim_value;
-  size_t _failures;
+private:
+  jint _claim_value;
+  uint _failures;
 
 public:
   CheckClaimValuesInCSetHRClosure(jint claim_value) :
-    _claim_value(claim_value),
-    _failures(0) { }
-
-  size_t failures() {
-    return _failures;
-  }
+    _claim_value(claim_value), _failures(0) { }
+
+  uint failures() { return _failures; }
 
   bool doHeapRegion(HeapRegion* hr) {
     assert(hr->in_collection_set(), "how?");
@@ -2800,14 +2891,14 @@
 
   result = g1_policy()->collection_set();
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    size_t cs_size = g1_policy()->cset_region_length();
+    uint cs_size = g1_policy()->cset_region_length();
     uint active_workers = workers()->active_workers();
     assert(UseDynamicNumberOfGCThreads ||
              active_workers == workers()->total_workers(),
              "Unless dynamic should use total workers");
 
-    size_t end_ind   = (cs_size * worker_i) / active_workers;
-    size_t start_ind = 0;
+    uint end_ind   = (cs_size * worker_i) / active_workers;
+    uint start_ind = 0;
 
     if (worker_i > 0 &&
         _worker_cset_start_region_time_stamp[worker_i - 1] == gc_time_stamp) {
@@ -2817,7 +2908,7 @@
       result = _worker_cset_start_region[worker_i - 1];
     }
 
-    for (size_t i = start_ind; i < end_ind; i++) {
+    for (uint i = start_ind; i < end_ind; i++) {
       result = result->next_in_collection_set();
     }
   }
@@ -2833,6 +2924,17 @@
   return result;
 }
 
+HeapRegion* G1CollectedHeap::start_region_for_worker(uint worker_i,
+                                                     uint no_of_par_workers) {
+  uint worker_num =
+           G1CollectedHeap::use_parallel_gc_threads() ? no_of_par_workers : 1U;
+  assert(UseDynamicNumberOfGCThreads ||
+         no_of_par_workers == workers()->total_workers(),
+         "Non dynamic should use fixed number of workers");
+  const uint start_index = n_regions() * worker_i / worker_num;
+  return region_at(start_index);
+}
+
 void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
   HeapRegion* r = g1_policy()->collection_set();
   while (r != NULL) {
@@ -2946,6 +3048,51 @@
   g1_rem_set()->prepare_for_verify();
 }
 
+bool G1CollectedHeap::allocated_since_marking(oop obj, HeapRegion* hr,
+                                              VerifyOption vo) {
+  switch (vo) {
+  case VerifyOption_G1UsePrevMarking:
+    return hr->obj_allocated_since_prev_marking(obj);
+  case VerifyOption_G1UseNextMarking:
+    return hr->obj_allocated_since_next_marking(obj);
+  case VerifyOption_G1UseMarkWord:
+    return false;
+  default:
+    ShouldNotReachHere();
+  }
+  return false; // keep some compilers happy
+}
+
+HeapWord* G1CollectedHeap::top_at_mark_start(HeapRegion* hr, VerifyOption vo) {
+  switch (vo) {
+  case VerifyOption_G1UsePrevMarking: return hr->prev_top_at_mark_start();
+  case VerifyOption_G1UseNextMarking: return hr->next_top_at_mark_start();
+  case VerifyOption_G1UseMarkWord:    return NULL;
+  default:                            ShouldNotReachHere();
+  }
+  return NULL; // keep some compilers happy
+}
+
+bool G1CollectedHeap::is_marked(oop obj, VerifyOption vo) {
+  switch (vo) {
+  case VerifyOption_G1UsePrevMarking: return isMarkedPrev(obj);
+  case VerifyOption_G1UseNextMarking: return isMarkedNext(obj);
+  case VerifyOption_G1UseMarkWord:    return obj->is_gc_marked();
+  default:                            ShouldNotReachHere();
+  }
+  return false; // keep some compilers happy
+}
+
+const char* G1CollectedHeap::top_at_mark_start_str(VerifyOption vo) {
+  switch (vo) {
+  case VerifyOption_G1UsePrevMarking: return "PTAMS";
+  case VerifyOption_G1UseNextMarking: return "NTAMS";
+  case VerifyOption_G1UseMarkWord:    return "NONE";
+  default:                            ShouldNotReachHere();
+  }
+  return NULL; // keep some compilers happy
+}
+
 class VerifyLivenessOopClosure: public OopClosure {
   G1CollectedHeap* _g1h;
   VerifyOption _vo;
@@ -3033,17 +3180,15 @@
 
 class VerifyRegionClosure: public HeapRegionClosure {
 private:
-  bool         _allow_dirty;
-  bool         _par;
-  VerifyOption _vo;
-  bool         _failures;
+  bool             _par;
+  VerifyOption     _vo;
+  bool             _failures;
 public:
   // _vo == UsePrevMarking -> use "prev" marking information,
   // _vo == UseNextMarking -> use "next" marking information,
   // _vo == UseMarkWord    -> use mark word from object header.
-  VerifyRegionClosure(bool allow_dirty, bool par, VerifyOption vo)
-    : _allow_dirty(allow_dirty),
-      _par(par),
+  VerifyRegionClosure(bool par, VerifyOption vo)
+    : _par(par),
       _vo(vo),
       _failures(false) {}
 
@@ -3052,11 +3197,9 @@
   }
 
   bool doHeapRegion(HeapRegion* r) {
-    guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
-              "Should be unclaimed at verify points.");
     if (!r->continuesHumongous()) {
       bool failures = false;
-      r->verify(_allow_dirty, _vo, &failures);
+      r->verify(_vo, &failures);
       if (failures) {
         _failures = true;
       } else {
@@ -3124,7 +3267,6 @@
 class G1ParVerifyTask: public AbstractGangTask {
 private:
   G1CollectedHeap* _g1h;
-  bool             _allow_dirty;
   VerifyOption     _vo;
   bool             _failures;
 
@@ -3132,10 +3274,9 @@
   // _vo == UsePrevMarking -> use "prev" marking information,
   // _vo == UseNextMarking -> use "next" marking information,
   // _vo == UseMarkWord    -> use mark word from object header.
-  G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty, VerifyOption vo) :
+  G1ParVerifyTask(G1CollectedHeap* g1h, VerifyOption vo) :
     AbstractGangTask("Parallel verify task"),
     _g1h(g1h),
-    _allow_dirty(allow_dirty),
     _vo(vo),
     _failures(false) { }
 
@@ -3145,7 +3286,7 @@
 
   void work(uint worker_id) {
     HandleMark hm;
-    VerifyRegionClosure blk(_allow_dirty, true, _vo);
+    VerifyRegionClosure blk(true, _vo);
     _g1h->heap_region_par_iterate_chunked(&blk, worker_id,
                                           _g1h->workers()->active_workers(),
                                           HeapRegion::ParVerifyClaimValue);
@@ -3155,12 +3296,11 @@
   }
 };
 
-void G1CollectedHeap::verify(bool allow_dirty, bool silent) {
-  verify(allow_dirty, silent, VerifyOption_G1UsePrevMarking);
-}
-
-void G1CollectedHeap::verify(bool allow_dirty,
-                             bool silent,
+void G1CollectedHeap::verify(bool silent) {
+  verify(silent, VerifyOption_G1UsePrevMarking);
+}
+
+void G1CollectedHeap::verify(bool silent,
                              VerifyOption vo) {
   if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
     if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
@@ -3212,7 +3352,7 @@
       assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
              "sanity check");
 
-      G1ParVerifyTask task(this, allow_dirty, vo);
+      G1ParVerifyTask task(this, vo);
       assert(UseDynamicNumberOfGCThreads ||
         workers()->active_workers() == workers()->total_workers(),
         "If not dynamic should be using all the workers");
@@ -3234,7 +3374,7 @@
       assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
              "sanity check");
     } else {
-      VerifyRegionClosure blk(allow_dirty, false, vo);
+      VerifyRegionClosure blk(false, vo);
       heap_region_iterate(&blk);
       if (blk.failures()) {
         failures = true;
@@ -3284,12 +3424,12 @@
             _g1_storage.high_boundary());
   st->cr();
   st->print("  region size " SIZE_FORMAT "K, ", HeapRegion::GrainBytes / K);
-  size_t young_regions = _young_list->length();
-  st->print(SIZE_FORMAT " young (" SIZE_FORMAT "K), ",
-            young_regions, young_regions * HeapRegion::GrainBytes / K);
-  size_t survivor_regions = g1_policy()->recorded_survivor_regions();
-  st->print(SIZE_FORMAT " survivors (" SIZE_FORMAT "K)",
-            survivor_regions, survivor_regions * HeapRegion::GrainBytes / K);
+  uint young_regions = _young_list->length();
+  st->print("%u young (" SIZE_FORMAT "K), ", young_regions,
+            (size_t) young_regions * HeapRegion::GrainBytes / K);
+  uint survivor_regions = g1_policy()->recorded_survivor_regions();
+  st->print("%u survivors (" SIZE_FORMAT "K)", survivor_regions,
+            (size_t) survivor_regions * HeapRegion::GrainBytes / K);
   st->cr();
   perm()->as_gen()->print_on(st);
 }
@@ -3299,7 +3439,11 @@
 
   // Print the per-region information.
   st->cr();
-  st->print_cr("Heap Regions: (Y=young(eden), SU=young(survivor), HS=humongous(starts), HC=humongous(continues), CS=collection set, F=free, TS=gc time stamp, PTAMS=previous top-at-mark-start, NTAMS=next top-at-mark-start)");
+  st->print_cr("Heap Regions: (Y=young(eden), SU=young(survivor), "
+               "HS=humongous(starts), HC=humongous(continues), "
+               "CS=collection set, F=free, TS=gc time stamp, "
+               "PTAMS=previous top-at-mark-start, "
+               "NTAMS=next top-at-mark-start)");
   PrintRegionClosure blk(st);
   heap_region_iterate(&blk);
 }
@@ -3460,15 +3604,11 @@
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   size_t buffer_size = dcqs.buffer_size();
   size_t buffer_num = dcqs.completed_buffers_num();
-  return buffer_size * buffer_num + extra_cards;
-}
-
-size_t G1CollectedHeap::max_pending_card_num() {
-  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  size_t buffer_size = dcqs.buffer_size();
-  size_t buffer_num  = dcqs.completed_buffers_num();
-  int thread_num  = Threads::number_of_threads();
-  return (buffer_num + thread_num) * buffer_size;
+
+  // PtrQueueSet::buffer_size() and PtrQueue:size() return sizes
+  // in bytes - not the number of 'entries'. We need to convert
+  // into a number of cards.
+  return (buffer_size * buffer_num + extra_cards) / oopSize;
 }
 
 size_t G1CollectedHeap::cards_scanned() {
@@ -3477,16 +3617,16 @@
 
 void
 G1CollectedHeap::setup_surviving_young_words() {
-  guarantee( _surviving_young_words == NULL, "pre-condition" );
-  size_t array_length = g1_policy()->young_cset_region_length();
-  _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length);
+  assert(_surviving_young_words == NULL, "pre-condition");
+  uint array_length = g1_policy()->young_cset_region_length();
+  _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, (size_t) array_length, mtGC);
   if (_surviving_young_words == NULL) {
     vm_exit_out_of_memory(sizeof(size_t) * array_length,
                           "Not enough space for young surv words summary.");
   }
-  memset(_surviving_young_words, 0, array_length * sizeof(size_t));
+  memset(_surviving_young_words, 0, (size_t) array_length * sizeof(size_t));
 #ifdef ASSERT
-  for (size_t i = 0;  i < array_length; ++i) {
+  for (uint i = 0;  i < array_length; ++i) {
     assert( _surviving_young_words[i] == 0, "memset above" );
   }
 #endif // !ASSERT
@@ -3495,15 +3635,16 @@
 void
 G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-  size_t array_length = g1_policy()->young_cset_region_length();
-  for (size_t i = 0; i < array_length; ++i)
+  uint array_length = g1_policy()->young_cset_region_length();
+  for (uint i = 0; i < array_length; ++i) {
     _surviving_young_words[i] += surv_young_words[i];
+  }
 }
 
 void
 G1CollectedHeap::cleanup_surviving_young_words() {
   guarantee( _surviving_young_words != NULL, "pre-condition" );
-  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words);
+  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words, mtGC);
   _surviving_young_words = NULL;
 }
 
@@ -3595,26 +3736,22 @@
 
   // Inner scope for scope based logging, timers, and stats collection
   {
-    char verbose_str[128];
-    sprintf(verbose_str, "GC pause ");
-    if (g1_policy()->gcs_are_young()) {
-      strcat(verbose_str, "(young)");
-    } else {
-      strcat(verbose_str, "(mixed)");
-    }
     if (g1_policy()->during_initial_mark_pause()) {
-      strcat(verbose_str, " (initial-mark)");
       // We are about to start a marking cycle, so we increment the
       // full collection counter.
-      increment_total_full_collections();
+      increment_old_marking_cycles_started();
     }
-
-    // if PrintGCDetails is on, we'll print long statistics information
+    // if the log level is "finer" is on, we'll print long statistics information
     // in the collector policy code, so let's not print this as the output
     // is messy if we do.
-    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
-    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
+    gclog_or_tty->date_stamp(G1Log::fine() && PrintGCDateStamps);
+    TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
+
+    int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                                workers()->active_workers() : 1);
+    double pause_start_sec = os::elapsedTime();
+    g1_policy()->phase_times()->note_gc_start(active_workers);
+    bool initial_mark_gc = g1_policy()->during_initial_mark_pause();
 
     TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
     TraceMemoryManagerStats tms(false /* fullGC */, gc_cause());
@@ -3643,14 +3780,7 @@
       increment_total_collections(false /* full gc */);
       increment_gc_time_stamp();
 
-      if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
-        HandleMark hm;  // Discard invalid handles created during verification
-        gclog_or_tty->print(" VerifyBeforeGC:");
-        prepare_for_verify();
-        Universe::verify(/* allow dirty */ false,
-                         /* silent      */ false,
-                         /* option      */ VerifyOption_G1UsePrevMarking);
-      }
+      verify_before_gc();
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
 
@@ -3678,9 +3808,15 @@
         // before the start GC event.
         _hr_printer.start_gc(false /* full */, (size_t) total_collections());
 
+        // This timing is only used by the ergonomics to handle our pause target.
+        // It is unclear why this should not include the full pause. We will
+        // investigate this in CR 7178365.
+        //
+        // Preserving the old comment here if that helps the investigation:
+        //
         // The elapsed time induced by the start time below deliberately elides
         // the possible verification above.
-        double start_time_sec = os::elapsedTime();
+        double sample_start_time_sec = os::elapsedTime();
         size_t start_used_bytes = used();
 
 #if YOUNG_LIST_VERBOSE
@@ -3689,7 +3825,7 @@
         g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE
 
-        g1_policy()->record_collection_pause_start(start_time_sec,
+        g1_policy()->record_collection_pause_start(sample_start_time_sec,
                                                    start_used_bytes);
 
         double scan_wait_start = os::elapsedTime();
@@ -3698,11 +3834,12 @@
         // objects on them have been correctly scanned before we start
         // moving them during the GC.
         bool waited = _cm->root_regions()->wait_until_scan_finished();
+        double wait_time_ms = 0.0;
         if (waited) {
           double scan_wait_end = os::elapsedTime();
-          double wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
-          g1_policy()->record_root_region_scan_wait_time(wait_time_ms);
+          wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
         }
+        g1_policy()->phase_times()->record_root_region_scan_wait_time(wait_time_ms);
 
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:");
@@ -3856,12 +3993,12 @@
                                  true  /* verify_fingers */);
         _cm->note_end_of_gc();
 
-        double end_time_sec = os::elapsedTime();
-        double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
-        g1_policy()->record_pause_time_ms(pause_time_ms);
-        int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                                workers()->active_workers() : 1);
-        g1_policy()->record_collection_pause_end(active_workers);
+        // This timing is only used by the ergonomics to handle our pause target.
+        // It is unclear why this should not include the full pause. We will
+        // investigate this in CR 7178365.
+        double sample_end_time_sec = os::elapsedTime();
+        double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
+        g1_policy()->record_collection_pause_end(pause_time_ms);
 
         MemoryService::track_memory_usage();
 
@@ -3888,14 +4025,7 @@
         // scanning cards (see CR 7039627).
         increment_gc_time_stamp();
 
-        if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
-          HandleMark hm;  // Discard invalid handles created during verification
-          gclog_or_tty->print(" VerifyAfterGC:");
-          prepare_for_verify();
-          Universe::verify(/* allow dirty */ true,
-                           /* silent      */ false,
-                           /* option      */ VerifyOption_G1UsePrevMarking);
-        }
+        verify_after_gc();
 
         assert(!ref_processor_stw()->discovery_enabled(), "Postcondition");
         ref_processor_stw()->verify_no_references_recorded();
@@ -3909,9 +4039,6 @@
       // RETIRE events are generated before the end GC event.
       _hr_printer.end_gc(false /* full */, (size_t) total_collections());
 
-      // We have to do this after we decide whether to expand the heap or not.
-      g1_policy()->print_heap_transition();
-
       if (mark_in_progress()) {
         concurrent_mark()->update_g1_committed();
       }
@@ -3921,32 +4048,57 @@
 #endif
 
       gc_epilogue(false);
-    }
-
-    if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
-      gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
-      print_tracing_info();
-      vm_exit(-1);
+
+      if (G1Log::fine()) {
+        if (PrintGCTimeStamps) {
+          gclog_or_tty->stamp();
+          gclog_or_tty->print(": ");
+        }
+
+        GCCauseString gc_cause_str = GCCauseString("GC pause", gc_cause())
+          .append(g1_policy()->gcs_are_young() ? " (young)" : " (mixed)")
+          .append(initial_mark_gc ? " (initial-mark)" : "");
+
+        double pause_time_sec = os::elapsedTime() - pause_start_sec;
+
+        if (G1Log::finer()) {
+          if (evacuation_failed()) {
+            gc_cause_str.append(" (to-space exhausted)");
+          }
+          gclog_or_tty->print_cr("[%s, %3.7f secs]", (const char*)gc_cause_str, pause_time_sec);
+          g1_policy()->phase_times()->note_gc_end();
+          g1_policy()->phase_times()->print(pause_time_sec);
+          g1_policy()->print_detailed_heap_transition();
+        } else {
+          if (evacuation_failed()) {
+            gc_cause_str.append("--");
+          }
+          gclog_or_tty->print("[%s", (const char*)gc_cause_str);
+      g1_policy()->print_heap_transition();
+          gclog_or_tty->print_cr(", %3.7f secs]", pause_time_sec);
+        }
+      }
     }
-  }
-
-  // The closing of the inner scope, immediately above, will complete
-  // the PrintGC logging output. The record_collection_pause_end() call
-  // above will complete the logging output of PrintGCDetails.
-  //
-  // It is not yet to safe, however, to tell the concurrent mark to
-  // start as we have some optional output below. We don't want the
-  // output from the concurrent mark thread interfering with this
-  // logging output either.
-
-  _hrs.verify_optional();
-  verify_region_sets_optional();
-
-  TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
-  TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
-
-  print_heap_after_gc();
-  g1mm()->update_sizes();
+
+    // It is not yet to safe to tell the concurrent mark to
+    // start as we have some optional output below. We don't want the
+    // output from the concurrent mark thread interfering with this
+    // logging output either.
+
+    _hrs.verify_optional();
+    verify_region_sets_optional();
+
+    TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
+    TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
+
+    print_heap_after_gc();
+
+    // We must call G1MonitoringSupport::update_sizes() in the same scoping level
+    // as an active TraceMemoryManagerStats object (i.e. before the destructor for the
+    // TraceMemoryManagerStats is called) so that the G1 memory pools are updated
+    // before any GC notifications are raised.
+    g1mm()->update_sizes();
+  }
 
   if (G1SummarizeRSetStats &&
       (G1SummarizeRSetStatsPeriod > 0) &&
@@ -3977,17 +4129,22 @@
   size_t gclab_word_size;
   switch (purpose) {
     case GCAllocForSurvived:
-      gclab_word_size = YoungPLABSize;
+      gclab_word_size = _survivor_plab_stats.desired_plab_sz();
       break;
     case GCAllocForTenured:
-      gclab_word_size = OldPLABSize;
+      gclab_word_size = _old_plab_stats.desired_plab_sz();
       break;
     default:
       assert(false, "unknown GCAllocPurpose");
-      gclab_word_size = OldPLABSize;
+      gclab_word_size = _old_plab_stats.desired_plab_sz();
       break;
   }
-  return gclab_word_size;
+
+  // Prevent humongous PLAB sizes for two reasons:
+  // * PLABs are allocated using a similar paths as oops, but should
+  //   never be in a humongous region
+  // * Allowing humongous PLABs needlessly churns the region free lists
+  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
 }
 
 void G1CollectedHeap::init_mutator_alloc_region() {
@@ -4043,6 +4200,11 @@
   // want either way so no reason to check explicitly for either
   // condition.
   _retained_old_gc_alloc_region = _old_gc_alloc_region.release();
+
+  if (ResizePLAB) {
+    _survivor_plab_stats.adjust_desired_plab_sz();
+    _old_plab_stats.adjust_desired_plab_sz();
+  }
 }
 
 void G1CollectedHeap::abandon_gc_alloc_regions() {
@@ -4054,7 +4216,7 @@
 void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
   _drain_in_progress = false;
   set_evac_failure_closure(cl);
-  _evac_failure_scan_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+  _evac_failure_scan_stack = new (ResourceObj::C_HEAP, mtGC) GrowableArray<oop>(40, true);
 }
 
 void G1CollectedHeap::finalize_for_evac_failure() {
@@ -4068,7 +4230,6 @@
 
 void G1CollectedHeap::remove_self_forwarding_pointers() {
   assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
-  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
 
   G1ParRemoveSelfForwardPtrsTask rsfp_task(this);
 
@@ -4086,7 +4247,6 @@
   reset_cset_heap_region_claim_values();
 
   assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
-  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
 
   // Now restore saved marks, if any.
   if (_objs_with_preserved_marks != NULL) {
@@ -4190,9 +4350,9 @@
     if (_objs_with_preserved_marks == NULL) {
       assert(_preserved_marks_of_objs == NULL, "Both or none.");
       _objs_with_preserved_marks =
-        new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+        new (ResourceObj::C_HEAP, mtGC) GrowableArray<oop>(40, true);
       _preserved_marks_of_objs =
-        new (ResourceObj::C_HEAP) GrowableArray<markOop>(40, true);
+        new (ResourceObj::C_HEAP, mtGC) GrowableArray<markOop>(40, true);
     }
     _objs_with_preserved_marks->push(obj);
     _preserved_marks_of_objs->push(m);
@@ -4248,16 +4408,16 @@
   // non-young regions (where the age is -1)
   // We also add a few elements at the beginning and at the end in
   // an attempt to eliminate cache contention
-  size_t real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
-  size_t array_length = PADDING_ELEM_NUM +
-                        real_length +
-                        PADDING_ELEM_NUM;
-  _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length);
+  uint real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
+  uint array_length = PADDING_ELEM_NUM +
+                      real_length +
+                      PADDING_ELEM_NUM;
+  _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
   if (_surviving_young_words_base == NULL)
     vm_exit_out_of_memory(array_length * sizeof(size_t),
                           "Not enough space for young surv histo.");
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
-  memset(_surviving_young_words, 0, real_length * sizeof(size_t));
+  memset(_surviving_young_words, 0, (size_t) real_length * sizeof(size_t));
 
   _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
   _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
@@ -4355,7 +4515,8 @@
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
-void G1ParCopyHelper::mark_object(oop obj) {
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
+void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_object>::mark_object(oop obj) {
 #ifdef ASSERT
   HeapRegion* hr = _g1->heap_region_containing(obj);
   assert(hr != NULL, "sanity");
@@ -4366,7 +4527,9 @@
   _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
 }
 
-void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
+void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_object>
+  ::mark_forwarded_object(oop from_obj, oop to_obj) {
 #ifdef ASSERT
   assert(from_obj->is_forwarded(), "from obj should be forwarded");
   assert(from_obj->forwardee() == to_obj, "to obj should be the forwardee");
@@ -4388,8 +4551,10 @@
   _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id);
 }
 
-oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
-  size_t    word_sz = old->size();
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
+oop G1ParCopyClosure<do_gen_barrier, barrier, do_mark_object>
+  ::copy_to_survivor_space(oop old) {
+  size_t word_sz = old->size();
   HeapRegion* from_region = _g1->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
   int       young_index = from_region->young_index_in_cset()+1;
@@ -4402,7 +4567,15 @@
   GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
                                                              word_sz);
   HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
-  oop       obj     = oop(obj_ptr);
+#ifndef PRODUCT
+  // Should this evacuation fail?
+  if (_g1->evacuation_should_fail()) {
+    if (obj_ptr != NULL) {
+      _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz);
+      obj_ptr = NULL;
+    }
+  }
+#endif // !PRODUCT
 
   if (obj_ptr == NULL) {
     // This will either forward-to-self, or detect that someone else has
@@ -4411,6 +4584,8 @@
     return _g1->handle_evacuation_failure_par(cl, old);
   }
 
+  oop obj = oop(obj_ptr);
+
   // We're going to allocate linearly, so might as well prefetch ahead.
   Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
 
@@ -4457,8 +4632,8 @@
     } else {
       // No point in using the slower heap_region_containing() method,
       // given that we know obj is in the heap.
-      _scanner->set_region(_g1->heap_region_containing_raw(obj));
-      obj->oop_iterate_backwards(_scanner);
+      _scanner.set_region(_g1->heap_region_containing_raw(obj));
+      obj->oop_iterate_backwards(&_scanner);
     }
   } else {
     _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz);
@@ -4673,76 +4848,141 @@
     if (worker_id >= _n_workers) return;  // no work needed this round
 
     double start_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
-
-    ResourceMark rm;
-    HandleMark   hm;
-
-    ReferenceProcessor*             rp = _g1h->ref_processor_stw();
-
-    G1ParScanThreadState            pss(_g1h, worker_id);
-    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
-    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
-    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
-
-    pss.set_evac_closure(&scan_evac_cl);
-    pss.set_evac_failure_closure(&evac_failure_cl);
-    pss.set_partial_scan_closure(&partial_scan_cl);
-
-    G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
-    G1ParScanPermClosure           only_scan_perm_cl(_g1h, &pss, rp);
-
-    G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
-    G1ParScanAndMarkPermClosure    scan_mark_perm_cl(_g1h, &pss, rp);
-
-    OopClosure*                    scan_root_cl = &only_scan_root_cl;
-    OopsInHeapRegionClosure*       scan_perm_cl = &only_scan_perm_cl;
-
-    if (_g1h->g1_policy()->during_initial_mark_pause()) {
-      // We also need to mark copied objects.
-      scan_root_cl = &scan_mark_root_cl;
-      scan_perm_cl = &scan_mark_perm_cl;
-    }
-
-    G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
-
-    pss.start_strong_roots();
-    _g1h->g1_process_strong_roots(/* not collecting perm */ false,
-                                  SharedHeap::SO_AllClasses,
-                                  scan_root_cl,
-                                  &push_heap_rs_cl,
-                                  scan_perm_cl,
-                                  worker_id);
-    pss.end_strong_roots();
+    _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms);
 
     {
-      double start = os::elapsedTime();
-      G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
-      evac.do_void();
-      double elapsed_ms = (os::elapsedTime()-start)*1000.0;
-      double term_ms = pss.term_time()*1000.0;
-      _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
-      _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
+      ResourceMark rm;
+      HandleMark   hm;
+
+      ReferenceProcessor*             rp = _g1h->ref_processor_stw();
+
+      G1ParScanThreadState            pss(_g1h, worker_id);
+      G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
+      G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
+      G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
+
+      pss.set_evac_closure(&scan_evac_cl);
+      pss.set_evac_failure_closure(&evac_failure_cl);
+      pss.set_partial_scan_closure(&partial_scan_cl);
+
+      G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
+      G1ParScanPermClosure           only_scan_perm_cl(_g1h, &pss, rp);
+
+      G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
+      G1ParScanAndMarkPermClosure    scan_mark_perm_cl(_g1h, &pss, rp);
+
+      OopClosure*                    scan_root_cl = &only_scan_root_cl;
+      OopsInHeapRegionClosure*       scan_perm_cl = &only_scan_perm_cl;
+
+      if (_g1h->g1_policy()->during_initial_mark_pause()) {
+        // We also need to mark copied objects.
+        scan_root_cl = &scan_mark_root_cl;
+        scan_perm_cl = &scan_mark_perm_cl;
+      }
+
+      G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
+
+      pss.start_strong_roots();
+      _g1h->g1_process_strong_roots(/* not collecting perm */ false,
+                                    SharedHeap::SO_AllClasses,
+                                    scan_root_cl,
+                                    &push_heap_rs_cl,
+                                    scan_perm_cl,
+                                    worker_id);
+      pss.end_strong_roots();
+
+      {
+        double start = os::elapsedTime();
+        G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
+        evac.do_void();
+        double elapsed_ms = (os::elapsedTime()-start)*1000.0;
+        double term_ms = pss.term_time()*1000.0;
+        _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms);
+        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts());
+      }
+      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
+      _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
+
+      if (ParallelGCVerbose) {
+        MutexLocker x(stats_lock());
+        pss.print_termination_stats(worker_id);
+      }
+
+      assert(pss.refs()->is_empty(), "should be empty");
+
+      // Close the inner scope so that the ResourceMark and HandleMark
+      // destructors are executed here and are included as part of the
+      // "GC Worker Time".
     }
-    _g1h->g1_policy()->record_thread_age_table(pss.age_table());
-    _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
-
-    // Clean up any par-expanded rem sets.
-    HeapRegionRemSet::par_cleanup();
-
-    if (ParallelGCVerbose) {
-      MutexLocker x(stats_lock());
-      pss.print_termination_stats(worker_id);
-    }
-
-    assert(pss.refs()->is_empty(), "should be empty");
+
     double end_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
+    _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms);
   }
 };
 
 // *** Common G1 Evacuation Stuff
 
+// Closures that support the filtering of CodeBlobs scanned during
+// external root scanning.
+
+// Closure applied to reference fields in code blobs (specifically nmethods)
+// to determine whether an nmethod contains references that point into
+// the collection set. Used as a predicate when walking code roots so
+// that only nmethods that point into the collection set are added to the
+// 'marked' list.
+
+class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure {
+
+  class G1PointsIntoCSOopClosure : public OopClosure {
+    G1CollectedHeap* _g1;
+    bool _points_into_cs;
+  public:
+    G1PointsIntoCSOopClosure(G1CollectedHeap* g1) :
+      _g1(g1), _points_into_cs(false) { }
+
+    bool points_into_cs() const { return _points_into_cs; }
+
+    template <class T>
+    void do_oop_nv(T* p) {
+      if (!_points_into_cs) {
+        T heap_oop = oopDesc::load_heap_oop(p);
+        if (!oopDesc::is_null(heap_oop) &&
+            _g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) {
+          _points_into_cs = true;
+        }
+      }
+    }
+
+    virtual void do_oop(oop* p)        { do_oop_nv(p); }
+    virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+  };
+
+  G1CollectedHeap* _g1;
+
+public:
+  G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) :
+    CodeBlobToOopClosure(cl, true), _g1(g1) { }
+
+  virtual void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = cb->as_nmethod_or_null();
+    if (nm != NULL && !(nm->test_oops_do_mark())) {
+      G1PointsIntoCSOopClosure predicate_cl(_g1);
+      nm->oops_do(&predicate_cl);
+
+      if (predicate_cl.points_into_cs()) {
+        // At least one of the reference fields or the oop relocations
+        // in the nmethod points into the collection set. We have to
+        // 'mark' this nmethod.
+        // Note: Revisit the following if CodeBlobToOopClosure::do_code_blob()
+        // or MarkingCodeBlobClosure::do_code_blob() change.
+        if (!nm->test_set_oops_do_mark()) {
+          do_newly_marked_nmethod(nm);
+        }
+      }
+    }
+  }
+};
+
 // This method is run in a GC worker.
 
 void
@@ -4764,7 +5004,7 @@
 
   // Walk the code cache w/o buffering, because StarTask cannot handle
   // unaligned oop locations.
-  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, /*do_marking=*/ true);
+  G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots);
 
   process_strong_roots(false, // no scoping; this is parallel code
                        collecting_perm_gen, so,
@@ -4785,28 +5025,29 @@
   buf_scan_non_heap_roots.done();
   buf_scan_perm.done();
 
-  double ext_roots_end = os::elapsedTime();
-
-  g1_policy()->reset_obj_copy_time(worker_i);
   double obj_copy_time_sec = buf_scan_perm.closure_app_seconds() +
                                 buf_scan_non_heap_roots.closure_app_seconds();
-  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
+  g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
 
   double ext_root_time_ms =
-    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
-
-  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
+    ((os::elapsedTime() - ext_roots_start) - obj_copy_time_sec) * 1000.0;
+
+  g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
 
   // During conc marking we have to filter the per-thread SATB buffers
   // to make sure we remove any oops into the CSet (which will show up
   // as implicitly live).
+  double satb_filtering_ms = 0.0;
   if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
     if (mark_in_progress()) {
+      double satb_filter_start = os::elapsedTime();
+
       JavaThread::satb_mark_queue_set().filter_thread_buffers();
+
+      satb_filtering_ms = (os::elapsedTime() - satb_filter_start) * 1000.0;
     }
   }
-  double satb_filtering_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_satb_filtering_time(worker_i, satb_filtering_ms);
+  g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
@@ -5306,7 +5547,7 @@
   assert(pss.refs()->is_empty(), "both queue and overflow should be empty");
 
   double ref_proc_time = os::elapsedTime() - ref_proc_start;
-  g1_policy()->record_ref_proc_time(ref_proc_time * 1000.0);
+  g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0);
 }
 
 // Weak Reference processing during an evacuation pause (part 2).
@@ -5343,13 +5584,16 @@
   // and could signicantly increase the pause time.
 
   double ref_enq_time = os::elapsedTime() - ref_enq_start;
-  g1_policy()->record_ref_enq_time(ref_enq_time * 1000.0);
+  g1_policy()->phase_times()->record_ref_enq_time(ref_enq_time * 1000.0);
 }
 
 void G1CollectedHeap::evacuate_collection_set() {
   _expand_heap_after_alloc_failure = true;
   set_evacuation_failed(false);
 
+  // Should G1EvacuationFailureALot be in effect for this GC?
+  NOT_PRODUCT(set_evacuation_failure_alot_for_current_gc();)
+
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
   concurrent_g1_refine()->set_use_cache(false);
   concurrent_g1_refine()->clear_hot_cache_claimed_index();
@@ -5378,25 +5622,39 @@
   rem_set()->prepare_for_younger_refs_iterate(true);
 
   assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
-  double start_par = os::elapsedTime();
-
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    // The individual threads will set their evac-failure closures.
+  double start_par_time_sec = os::elapsedTime();
+  double end_par_time_sec;
+
+  {
     StrongRootsScope srs(this);
-    if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
-    // These tasks use ShareHeap::_process_strong_tasks
-    assert(UseDynamicNumberOfGCThreads ||
-           workers()->active_workers() == workers()->total_workers(),
-           "If not dynamic should be using all the  workers");
-    workers()->run_task(&g1_par_task);
-  } else {
-    StrongRootsScope srs(this);
-    g1_par_task.set_for_termination(n_workers);
-    g1_par_task.work(0);
-  }
-
-  double par_time = (os::elapsedTime() - start_par) * 1000.0;
-  g1_policy()->record_par_time(par_time);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      // The individual threads will set their evac-failure closures.
+      if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
+      // These tasks use ShareHeap::_process_strong_tasks
+      assert(UseDynamicNumberOfGCThreads ||
+             workers()->active_workers() == workers()->total_workers(),
+             "If not dynamic should be using all the  workers");
+      workers()->run_task(&g1_par_task);
+    } else {
+      g1_par_task.set_for_termination(n_workers);
+      g1_par_task.work(0);
+    }
+    end_par_time_sec = os::elapsedTime();
+
+    // Closing the inner scope will execute the destructor
+    // for the StrongRootsScope object. We record the current
+    // elapsed time before closing the scope so that time
+    // taken for the SRS destructor is NOT included in the
+    // reported parallel time.
+  }
+
+  double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
+  g1_policy()->phase_times()->record_par_time(par_time_ms);
+
+  double code_root_fixup_time_ms =
+        (os::elapsedTime() - end_par_time_sec) * 1000.0;
+  g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms);
 
   set_par_threads(0);
 
@@ -5427,11 +5685,11 @@
 
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
-    if (PrintGCDetails) {
-      gclog_or_tty->print(" (to-space overflow)");
-    } else if (PrintGC) {
-      gclog_or_tty->print("--");
-    }
+
+    // Reset the G1EvacuationFailureALot counters and flags
+    // Note: the values are reset only when an actual
+    // evacuation failure occurs.
+    NOT_PRODUCT(reset_evacuation_should_fail();)
   }
 
   // Enqueue any remaining references remaining on the STW
@@ -5501,19 +5759,18 @@
   size_t hr_capacity = hr->capacity();
   size_t hr_pre_used = 0;
   _humongous_set.remove_with_proxy(hr, humongous_proxy_set);
+  // We need to read this before we make the region non-humongous,
+  // otherwise the information will be gone.
+  uint last_index = hr->last_hc_index();
   hr->set_notHumongous();
   free_region(hr, &hr_pre_used, free_list, par);
 
-  size_t i = hr->hrs_index() + 1;
-  size_t num = 1;
-  while (i < n_regions()) {
+  uint i = hr->hrs_index() + 1;
+  while (i < last_index) {
     HeapRegion* curr_hr = region_at(i);
-    if (!curr_hr->continuesHumongous()) {
-      break;
-    }
+    assert(curr_hr->continuesHumongous(), "invariant");
     curr_hr->set_notHumongous();
     free_region(curr_hr, &hr_pre_used, free_list, par);
-    num += 1;
     i += 1;
   }
   assert(hr_pre_used == hr_used,
@@ -5621,7 +5878,6 @@
 
 void G1CollectedHeap::verify_dirty_young_regions() {
   verify_dirty_young_list(_young_list->first_region());
-  verify_dirty_young_list(_young_list->first_survivor_region());
 }
 #endif
 
@@ -5658,7 +5914,7 @@
   }
 
   double elapsed = os::elapsedTime() - start;
-  g1_policy()->record_clear_ct_time(elapsed * 1000.0);
+  g1_policy()->phase_times()->record_clear_ct_time(elapsed * 1000.0);
 }
 
 void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) {
@@ -5714,7 +5970,7 @@
     if (cur->is_young()) {
       int index = cur->young_index_in_cset();
       assert(index != -1, "invariant");
-      assert((size_t) index < policy->young_cset_region_length(), "invariant");
+      assert((uint) index < policy->young_cset_region_length(), "invariant");
       size_t words_survived = _surviving_young_words[index];
       cur->record_surv_words_in_group(words_survived);
 
@@ -5767,8 +6023,8 @@
                                     NULL /* old_proxy_set */,
                                     NULL /* humongous_proxy_set */,
                                     false /* par */);
-  policy->record_young_free_cset_time_ms(young_time_ms);
-  policy->record_non_young_free_cset_time_ms(non_young_time_ms);
+  policy->phase_times()->record_young_free_cset_time_ms(young_time_ms);
+  policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }
 
 // This routine is similar to the above but does not record
@@ -6054,7 +6310,7 @@
 // Methods for the GC alloc regions
 
 HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size,
-                                                 size_t count,
+                                                 uint count,
                                                  GCAllocPurpose ap) {
   assert(FreeList_lock->owned_by_self(), "pre-condition");
 
@@ -6126,7 +6382,7 @@
   FreeRegionList*     _free_list;
   OldRegionSet*       _old_set;
   HumongousRegionSet* _humongous_set;
-  size_t              _region_count;
+  uint                _region_count;
 
 public:
   VerifyRegionListsClosure(OldRegionSet* old_set,
@@ -6135,7 +6391,7 @@
     _old_set(old_set), _humongous_set(humongous_set),
     _free_list(free_list), _region_count(0) { }
 
-  size_t region_count()      { return _region_count;      }
+  uint region_count() { return _region_count; }
 
   bool doHeapRegion(HeapRegion* hr) {
     _region_count += 1;
@@ -6157,7 +6413,7 @@
   }
 };
 
-HeapRegion* G1CollectedHeap::new_heap_region(size_t hrs_index,
+HeapRegion* G1CollectedHeap::new_heap_region(uint hrs_index,
                                              HeapWord* bottom) {
   HeapWord* end = bottom + HeapRegion::GrainWords;
   MemRegion mr(bottom, end);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,7 +33,7 @@
 #include "gc_implementation/g1/heapRegionSeq.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
 #include "gc_implementation/shared/hSpaceCounters.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
 #include "memory/memRegion.hpp"
 #include "memory/sharedHeap.hpp"
@@ -62,8 +62,8 @@
 class ConcurrentG1Refine;
 class GenerationCounters;
 
-typedef OverflowTaskQueue<StarTask>         RefToScanQueue;
-typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
+typedef OverflowTaskQueue<StarTask, mtGC>         RefToScanQueue;
+typedef GenericTaskQueueSet<RefToScanQueue, mtGC> RefToScanQueueSet;
 
 typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
 typedef int CardIdx_t;     // needs to hold [ 0..CardsPerRegion )
@@ -74,7 +74,7 @@
   GCAllocPurposeCount
 };
 
-class YoungList : public CHeapObj {
+class YoungList : public CHeapObj<mtGC> {
 private:
   G1CollectedHeap* _g1h;
 
@@ -85,8 +85,8 @@
 
   HeapRegion* _curr;
 
-  size_t      _length;
-  size_t      _survivor_length;
+  uint        _length;
+  uint        _survivor_length;
 
   size_t      _last_sampled_rs_lengths;
   size_t      _sampled_rs_lengths;
@@ -101,8 +101,8 @@
 
   void         empty_list();
   bool         is_empty() { return _length == 0; }
-  size_t       length() { return _length; }
-  size_t       survivor_length() { return _survivor_length; }
+  uint         length() { return _length; }
+  uint         survivor_length() { return _survivor_length; }
 
   // Currently we do not keep track of the used byte sum for the
   // young list and the survivors and it'd be quite a lot of work to
@@ -111,10 +111,10 @@
   // we'll report the more accurate information then.
   size_t       eden_used_bytes() {
     assert(length() >= survivor_length(), "invariant");
-    return (length() - survivor_length()) * HeapRegion::GrainBytes;
+    return (size_t) (length() - survivor_length()) * HeapRegion::GrainBytes;
   }
   size_t       survivor_used_bytes() {
-    return survivor_length() * HeapRegion::GrainBytes;
+    return (size_t) survivor_length() * HeapRegion::GrainBytes;
   }
 
   void rs_length_sampling_init();
@@ -199,7 +199,8 @@
   friend class OldGCAllocRegion;
 
   // Closures used in implementation.
-  friend class G1ParCopyHelper;
+  template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
+  friend class G1ParCopyClosure;
   friend class G1IsAliveClosure;
   friend class G1EvacuateFollowersClosure;
   friend class G1ParScanThreadState;
@@ -246,7 +247,7 @@
   MasterHumongousRegionSet  _humongous_set;
 
   // The number of regions we could create by expansion.
-  size_t _expansion_regions;
+  uint _expansion_regions;
 
   // The block offset table for the G1 heap.
   G1BlockOffsetSharedArray* _bot_shared;
@@ -277,10 +278,33 @@
   // survivor objects.
   SurvivorGCAllocRegion _survivor_gc_alloc_region;
 
+  // PLAB sizing policy for survivors.
+  PLABStats _survivor_plab_stats;
+
   // Alloc region used to satisfy allocation requests by the GC for
   // old objects.
   OldGCAllocRegion _old_gc_alloc_region;
 
+  // PLAB sizing policy for tenured objects.
+  PLABStats _old_plab_stats;
+
+  PLABStats* stats_for_purpose(GCAllocPurpose purpose) {
+    PLABStats* stats = NULL;
+
+    switch (purpose) {
+    case GCAllocForSurvived:
+      stats = &_survivor_plab_stats;
+      break;
+    case GCAllocForTenured:
+      stats = &_old_plab_stats;
+      break;
+    default:
+      assert(false, "unrecognized GCAllocPurpose");
+    }
+
+    return stats;
+  }
+
   // The last old region we allocated to during the last GC.
   // Typically, it is not full so we should re-use it during the next GC.
   HeapRegion* _retained_old_gc_alloc_region;
@@ -313,7 +337,7 @@
   G1MonitoringSupport* _g1mm;
 
   // Determines PLAB size for a particular allocation purpose.
-  static size_t desired_plab_sz(GCAllocPurpose purpose);
+  size_t desired_plab_sz(GCAllocPurpose purpose);
 
   // Outside of GC pauses, the number of bytes used in all regions other
   // than the current allocation region.
@@ -338,7 +362,7 @@
   bool* _in_cset_fast_test_base;
 
   // The length of the _in_cset_fast_test_base array.
-  size_t _in_cset_fast_test_length;
+  uint _in_cset_fast_test_length;
 
   volatile unsigned _gc_time_stamp;
 
@@ -358,10 +382,13 @@
   // (c) cause == _g1_humongous_allocation
   bool should_do_concurrent_full_gc(GCCause::Cause cause);
 
-  // Keeps track of how many "full collections" (i.e., Full GCs or
-  // concurrent cycles) we have completed. The number of them we have
-  // started is maintained in _total_full_collections in CollectedHeap.
-  volatile unsigned int _full_collections_completed;
+  // Keeps track of how many "old marking cycles" (i.e., Full GCs or
+  // concurrent cycles) we have started.
+  volatile unsigned int _old_marking_cycles_started;
+
+  // Keeps track of how many "old marking cycles" (i.e., Full GCs or
+  // concurrent cycles) we have completed.
+  volatile unsigned int _old_marking_cycles_completed;
 
   // This is a non-product method that is helpful for testing. It is
   // called at the end of a GC and artificially expands the heap by
@@ -371,6 +398,17 @@
   // this method will be found dead by the marking cycle).
   void allocate_dummy_regions() PRODUCT_RETURN;
 
+  // Clear RSets after a compaction. It also resets the GC time stamps.
+  void clear_rsets_post_compaction();
+
+  // If the HR printer is active, dump the state of the regions in the
+  // heap after a compaction.
+  void print_hrs_post_compaction();
+
+  double verify(bool guard, const char* msg);
+  void verify_before_gc();
+  void verify_after_gc();
+
   // These are macros so that, if the assert fires, we get the correct
   // line number, file, etc.
 
@@ -457,14 +495,14 @@
   // length and remove them from the master free list. Return the
   // index of the first region or G1_NULL_HRS_INDEX if the search
   // was unsuccessful.
-  size_t humongous_obj_allocate_find_first(size_t num_regions,
-                                           size_t word_size);
+  uint humongous_obj_allocate_find_first(uint num_regions,
+                                         size_t word_size);
 
   // Initialize a contiguous set of free regions of length num_regions
   // and starting at index first so that they appear as a single
   // humongous region.
-  HeapWord* humongous_obj_allocate_initialize_regions(size_t first,
-                                                      size_t num_regions,
+  HeapWord* humongous_obj_allocate_initialize_regions(uint first,
+                                                      uint num_regions,
                                                       size_t word_size);
 
   // Attempt to allocate a humongous object of the given size. Return
@@ -573,7 +611,7 @@
                                    size_t allocated_bytes);
 
   // For GC alloc regions.
-  HeapRegion* new_gc_alloc_region(size_t word_size, size_t count,
+  HeapRegion* new_gc_alloc_region(size_t word_size, uint count,
                                   GCAllocPurpose ap);
   void retire_gc_alloc_region(HeapRegion* alloc_region,
                               size_t allocated_bytes, GCAllocPurpose ap);
@@ -640,7 +678,7 @@
   void register_region_with_in_cset_fast_test(HeapRegion* r) {
     assert(_in_cset_fast_test_base != NULL, "sanity");
     assert(r->in_collection_set(), "invariant");
-    size_t index = r->hrs_index();
+    uint index = r->hrs_index();
     assert(index < _in_cset_fast_test_length, "invariant");
     assert(!_in_cset_fast_test_base[index], "invariant");
     _in_cset_fast_test_base[index] = true;
@@ -654,7 +692,7 @@
     if (_g1_committed.contains((HeapWord*) obj)) {
       // no need to subtract the bottom of the heap from obj,
       // _in_cset_fast_test is biased
-      size_t index = ((size_t) obj) >> HeapRegion::LogOfHRGrainBytes;
+      uintx index = (uintx) obj >> HeapRegion::LogOfHRGrainBytes;
       bool ret = _in_cset_fast_test[index];
       // let's make sure the result is consistent with what the slower
       // test returns
@@ -669,11 +707,15 @@
   void clear_cset_fast_test() {
     assert(_in_cset_fast_test_base != NULL, "sanity");
     memset(_in_cset_fast_test_base, false,
-        _in_cset_fast_test_length * sizeof(bool));
+           (size_t) _in_cset_fast_test_length * sizeof(bool));
   }
 
+  // This is called at the start of either a concurrent cycle or a Full
+  // GC to update the number of old marking cycles started.
+  void increment_old_marking_cycles_started();
+
   // This is called at the end of either a concurrent cycle or a Full
-  // GC to update the number of full collections completed. Those two
+  // GC to update the number of old marking cycles completed. Those two
   // can happen in a nested fashion, i.e., we start a concurrent
   // cycle, a Full GC happens half-way through it which ends first,
   // and then the cycle notices that a Full GC happened and ends
@@ -682,14 +724,14 @@
   // false, the caller is the inner caller in the nesting (i.e., the
   // Full GC). If concurrent is true, the caller is the outer caller
   // in this nesting (i.e., the concurrent cycle). Further nesting is
-  // not currently supported. The end of the this call also notifies
+  // not currently supported. The end of this call also notifies
   // the FullGCCount_lock in case a Java thread is waiting for a full
   // GC to happen (e.g., it called System.gc() with
   // +ExplicitGCInvokesConcurrent).
-  void increment_full_collections_completed(bool concurrent);
+  void increment_old_marking_cycles_completed(bool concurrent);
 
-  unsigned int full_collections_completed() {
-    return _full_collections_completed;
+  unsigned int old_marking_cycles_completed() {
+    return _old_marking_cycles_completed;
   }
 
   G1HRPrinter* hr_printer() { return &_hr_printer; }
@@ -873,6 +915,39 @@
   oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
   void handle_evacuation_failure_common(oop obj, markOop m);
 
+#ifndef PRODUCT
+  // Support for forcing evacuation failures. Analogous to
+  // PromotionFailureALot for the other collectors.
+
+  // Records whether G1EvacuationFailureALot should be in effect
+  // for the current GC
+  bool _evacuation_failure_alot_for_current_gc;
+
+  // Used to record the GC number for interval checking when
+  // determining whether G1EvaucationFailureALot is in effect
+  // for the current GC.
+  size_t _evacuation_failure_alot_gc_number;
+
+  // Count of the number of evacuations between failures.
+  volatile size_t _evacuation_failure_alot_count;
+
+  // Set whether G1EvacuationFailureALot should be in effect
+  // for the current GC (based upon the type of GC and which
+  // command line flags are set);
+  inline bool evacuation_failure_alot_for_gc_type(bool gcs_are_young,
+                                                  bool during_initial_mark,
+                                                  bool during_marking);
+
+  inline void set_evacuation_failure_alot_for_current_gc();
+
+  // Return true if it's time to cause an evacuation failure.
+  inline bool evacuation_should_fail();
+
+  // Reset the G1EvacuationFailureALot counters.  Should be called at
+  // the end of an evacuation pause in which an evacuation failure ocurred.
+  inline void reset_evacuation_should_fail();
+#endif // !PRODUCT
+
   // ("Weak") Reference processing support.
   //
   // G1 has 2 instances of the referece processor class. One
@@ -1053,11 +1128,18 @@
     clear_cset_start_regions();
   }
 
+  void check_gc_time_stamps() PRODUCT_RETURN;
+
   void increment_gc_time_stamp() {
     ++_gc_time_stamp;
     OrderAccess::fence();
   }
 
+  // Reset the given region's GC timestamp. If it's starts humongous,
+  // also reset the GC timestamp of its corresponding
+  // continues humongous regions too.
+  void reset_gc_time_stamps(HeapRegion* hr);
+
   void iterate_dirty_card_closure(CardTableEntryClosure* cl,
                                   DirtyCardQueue* into_cset_dcq,
                                   bool concurrent, int worker_i);
@@ -1100,23 +1182,23 @@
   }
 
   // The total number of regions in the heap.
-  size_t n_regions() { return _hrs.length(); }
+  uint n_regions() { return _hrs.length(); }
 
   // The max number of regions in the heap.
-  size_t max_regions() { return _hrs.max_length(); }
+  uint max_regions() { return _hrs.max_length(); }
 
   // The number of regions that are completely free.
-  size_t free_regions() { return _free_list.length(); }
+  uint free_regions() { return _free_list.length(); }
 
   // The number of regions that are not completely free.
-  size_t used_regions() { return n_regions() - free_regions(); }
+  uint used_regions() { return n_regions() - free_regions(); }
 
   // The number of regions available for "regular" expansion.
-  size_t expansion_regions() { return _expansion_regions; }
+  uint expansion_regions() { return _expansion_regions; }
 
   // Factory method for HeapRegion instances. It will return NULL if
   // the allocation fails.
-  HeapRegion* new_heap_region(size_t hrs_index, HeapWord* bottom);
+  HeapRegion* new_heap_region(uint hrs_index, HeapWord* bottom);
 
   void verify_not_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
   void verify_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
@@ -1294,13 +1376,8 @@
   // iteration early if the "doHeapRegion" method returns "true".
   void heap_region_iterate(HeapRegionClosure* blk) const;
 
-  // Iterate over heap regions starting with r (or the first region if "r"
-  // is NULL), in address order, terminating early if the "doHeapRegion"
-  // method returns "true".
-  void heap_region_iterate_from(HeapRegion* r, HeapRegionClosure* blk) const;
-
   // Return the region with the given index. It assumes the index is valid.
-  HeapRegion* region_at(size_t index) const { return _hrs.at(index); }
+  HeapRegion* region_at(uint index) const { return _hrs.at(index); }
 
   // Divide the heap region sequence into "chunks" of some size (the number
   // of regions divided by the number of parallel threads times some
@@ -1343,6 +1420,11 @@
   // starting region for iterating over the current collection set.
   HeapRegion* start_cset_region_for_worker(int worker_i);
 
+  // This is a convenience method that is used by the
+  // HeapRegionIterator classes to calculate the starting region for
+  // each worker so that they do not all start from the same region.
+  HeapRegion* start_region_for_worker(uint worker_i, uint no_of_par_workers);
+
   // Iterate over the regions (if any) in the current collection set.
   void collection_set_iterate(HeapRegionClosure* blk);
 
@@ -1503,10 +1585,10 @@
   // Currently there is only one place where this is called with
   // vo == UseMarkWord, which is to verify the marking during a
   // full GC.
-  void verify(bool allow_dirty, bool silent, VerifyOption vo);
+  void verify(bool silent, VerifyOption vo);
 
   // Override; it uses the "prev" marking information
-  virtual void verify(bool allow_dirty, bool silent);
+  virtual void verify(bool silent);
   virtual void print_on(outputStream* st) const;
   virtual void print_extended_on(outputStream* st) const;
 
@@ -1550,24 +1632,6 @@
   bool isMarkedPrev(oop obj) const;
   bool isMarkedNext(oop obj) const;
 
-  // vo == UsePrevMarking -> use "prev" marking information,
-  // vo == UseNextMarking -> use "next" marking information,
-  // vo == UseMarkWord    -> use mark word from object header
-  bool is_obj_dead_cond(const oop obj,
-                        const HeapRegion* hr,
-                        const VerifyOption vo) const {
-
-    switch (vo) {
-      case VerifyOption_G1UsePrevMarking:
-        return is_obj_dead(obj, hr);
-      case VerifyOption_G1UseNextMarking:
-        return is_obj_ill(obj, hr);
-      default:
-        assert(vo == VerifyOption_G1UseMarkWord, "must be");
-        return !obj->is_gc_marked();
-    }
-  }
-
   // Determine if an object is dead, given the object and also
   // the region to which the object belongs. An object is dead
   // iff a) it was not allocated since the last mark and b) it
@@ -1579,15 +1643,6 @@
       !isMarkedPrev(obj);
   }
 
-  // This is used when copying an object to survivor space.
-  // If the object is marked live, then we mark the copy live.
-  // If the object is allocated since the start of this mark
-  // cycle, then we mark the copy live.
-  // If the object has been around since the previous mark
-  // phase, and hasn't been marked yet during this phase,
-  // then we don't mark it, we just wait for the
-  // current marking cycle to get to it.
-
   // This function returns true when an object has been
   // around since the previous marking and hasn't yet
   // been marked during this marking.
@@ -1605,23 +1660,6 @@
   // Added if it is in permanent gen it isn't dead.
   // Added if it is NULL it isn't dead.
 
-  // vo == UsePrevMarking -> use "prev" marking information,
-  // vo == UseNextMarking -> use "next" marking information,
-  // vo == UseMarkWord    -> use mark word from object header
-  bool is_obj_dead_cond(const oop obj,
-                        const VerifyOption vo) const {
-
-    switch (vo) {
-      case VerifyOption_G1UsePrevMarking:
-        return is_obj_dead(obj);
-      case VerifyOption_G1UseNextMarking:
-        return is_obj_ill(obj);
-      default:
-        assert(vo == VerifyOption_G1UseMarkWord, "must be");
-        return !obj->is_gc_marked();
-    }
-  }
-
   bool is_obj_dead(const oop obj) const {
     const HeapRegion* hr = heap_region_containing(obj);
     if (hr == NULL) {
@@ -1644,6 +1682,42 @@
     else return is_obj_ill(obj, hr);
   }
 
+  // The methods below are here for convenience and dispatch the
+  // appropriate method depending on value of the given VerifyOption
+  // parameter. The options for that parameter are:
+  //
+  // vo == UsePrevMarking -> use "prev" marking information,
+  // vo == UseNextMarking -> use "next" marking information,
+  // vo == UseMarkWord    -> use mark word from object header
+
+  bool is_obj_dead_cond(const oop obj,
+                        const HeapRegion* hr,
+                        const VerifyOption vo) const {
+    switch (vo) {
+    case VerifyOption_G1UsePrevMarking: return is_obj_dead(obj, hr);
+    case VerifyOption_G1UseNextMarking: return is_obj_ill(obj, hr);
+    case VerifyOption_G1UseMarkWord:    return !obj->is_gc_marked();
+    default:                            ShouldNotReachHere();
+    }
+    return false; // keep some compilers happy
+  }
+
+  bool is_obj_dead_cond(const oop obj,
+                        const VerifyOption vo) const {
+    switch (vo) {
+    case VerifyOption_G1UsePrevMarking: return is_obj_dead(obj);
+    case VerifyOption_G1UseNextMarking: return is_obj_ill(obj);
+    case VerifyOption_G1UseMarkWord:    return !obj->is_gc_marked();
+    default:                            ShouldNotReachHere();
+    }
+    return false; // keep some compilers happy
+  }
+
+  bool allocated_since_marking(oop obj, HeapRegion* hr, VerifyOption vo);
+  HeapWord* top_at_mark_start(HeapRegion* hr, VerifyOption vo);
+  bool is_marked(oop obj, VerifyOption vo);
+  const char* top_at_mark_start_str(VerifyOption vo);
+
   // The following is just to alert the verification code
   // that a full collection has occurred and that the
   // remembered sets are no longer up to date.
@@ -1669,209 +1743,12 @@
   void stop_conc_gc_threads();
 
   size_t pending_card_num();
-  size_t max_pending_card_num();
   size_t cards_scanned();
 
 protected:
   size_t _max_heap_capacity;
 };
 
-#define use_local_bitmaps         1
-#define verify_local_bitmaps      0
-#define oop_buffer_length       256
-
-#ifndef PRODUCT
-class GCLabBitMap;
-class GCLabBitMapClosure: public BitMapClosure {
-private:
-  ConcurrentMark* _cm;
-  GCLabBitMap*    _bitmap;
-
-public:
-  GCLabBitMapClosure(ConcurrentMark* cm,
-                     GCLabBitMap* bitmap) {
-    _cm     = cm;
-    _bitmap = bitmap;
-  }
-
-  virtual bool do_bit(size_t offset);
-};
-#endif // !PRODUCT
-
-class GCLabBitMap: public BitMap {
-private:
-  ConcurrentMark* _cm;
-
-  int       _shifter;
-  size_t    _bitmap_word_covers_words;
-
-  // beginning of the heap
-  HeapWord* _heap_start;
-
-  // this is the actual start of the GCLab
-  HeapWord* _real_start_word;
-
-  // this is the actual end of the GCLab
-  HeapWord* _real_end_word;
-
-  // this is the first word, possibly located before the actual start
-  // of the GCLab, that corresponds to the first bit of the bitmap
-  HeapWord* _start_word;
-
-  // size of a GCLab in words
-  size_t _gclab_word_size;
-
-  static int shifter() {
-    return MinObjAlignment - 1;
-  }
-
-  // how many heap words does a single bitmap word corresponds to?
-  static size_t bitmap_word_covers_words() {
-    return BitsPerWord << shifter();
-  }
-
-  size_t gclab_word_size() const {
-    return _gclab_word_size;
-  }
-
-  // Calculates actual GCLab size in words
-  size_t gclab_real_word_size() const {
-    return bitmap_size_in_bits(pointer_delta(_real_end_word, _start_word))
-           / BitsPerWord;
-  }
-
-  static size_t bitmap_size_in_bits(size_t gclab_word_size) {
-    size_t bits_in_bitmap = gclab_word_size >> shifter();
-    // We are going to ensure that the beginning of a word in this
-    // bitmap also corresponds to the beginning of a word in the
-    // global marking bitmap. To handle the case where a GCLab
-    // starts from the middle of the bitmap, we need to add enough
-    // space (i.e. up to a bitmap word) to ensure that we have
-    // enough bits in the bitmap.
-    return bits_in_bitmap + BitsPerWord - 1;
-  }
-public:
-  GCLabBitMap(HeapWord* heap_start, size_t gclab_word_size)
-    : BitMap(bitmap_size_in_bits(gclab_word_size)),
-      _cm(G1CollectedHeap::heap()->concurrent_mark()),
-      _shifter(shifter()),
-      _bitmap_word_covers_words(bitmap_word_covers_words()),
-      _heap_start(heap_start),
-      _gclab_word_size(gclab_word_size),
-      _real_start_word(NULL),
-      _real_end_word(NULL),
-      _start_word(NULL) {
-    guarantee(false, "GCLabBitMap::GCLabBitmap(): don't call this any more");
-  }
-
-  inline unsigned heapWordToOffset(HeapWord* addr) {
-    unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter;
-    assert(offset < size(), "offset should be within bounds");
-    return offset;
-  }
-
-  inline HeapWord* offsetToHeapWord(size_t offset) {
-    HeapWord* addr =  _start_word + (offset << _shifter);
-    assert(_real_start_word <= addr && addr < _real_end_word, "invariant");
-    return addr;
-  }
-
-  bool fields_well_formed() {
-    bool ret1 = (_real_start_word == NULL) &&
-                (_real_end_word == NULL) &&
-                (_start_word == NULL);
-    if (ret1)
-      return true;
-
-    bool ret2 = _real_start_word >= _start_word &&
-      _start_word < _real_end_word &&
-      (_real_start_word + _gclab_word_size) == _real_end_word &&
-      (_start_word + _gclab_word_size + _bitmap_word_covers_words)
-                                                              > _real_end_word;
-    return ret2;
-  }
-
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(fields_well_formed(), "invariant");
-
-    if (addr >= _real_start_word && addr < _real_end_word) {
-      assert(!isMarked(addr), "should not have already been marked");
-
-      // first mark it on the bitmap
-      at_put(heapWordToOffset(addr), true);
-
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  inline bool isMarked(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(fields_well_formed(), "invariant");
-
-    return at(heapWordToOffset(addr));
-  }
-
-  void set_buffer(HeapWord* start) {
-    guarantee(false, "set_buffer(): don't call this any more");
-
-    guarantee(use_local_bitmaps, "invariant");
-    clear();
-
-    assert(start != NULL, "invariant");
-    _real_start_word = start;
-    _real_end_word   = start + _gclab_word_size;
-
-    size_t diff =
-      pointer_delta(start, _heap_start) % _bitmap_word_covers_words;
-    _start_word = start - diff;
-
-    assert(fields_well_formed(), "invariant");
-  }
-
-#ifndef PRODUCT
-  void verify() {
-    // verify that the marks have been propagated
-    GCLabBitMapClosure cl(_cm, this);
-    iterate(&cl);
-  }
-#endif // PRODUCT
-
-  void retire() {
-    guarantee(false, "retire(): don't call this any more");
-
-    guarantee(use_local_bitmaps, "invariant");
-    assert(fields_well_formed(), "invariant");
-
-    if (_start_word != NULL) {
-      CMBitMap*       mark_bitmap = _cm->nextMarkBitMap();
-
-      // this means that the bitmap was set up for the GCLab
-      assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
-
-      mark_bitmap->mostly_disjoint_range_union(this,
-                                0, // always start from the start of the bitmap
-                                _start_word,
-                                gclab_real_word_size());
-      _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word));
-
-#ifndef PRODUCT
-      if (use_local_bitmaps && verify_local_bitmaps)
-        verify();
-#endif // PRODUCT
-    } else {
-      assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
-    }
-  }
-
-  size_t bitmap_size_in_words() const {
-    return (bitmap_size_in_bits(gclab_word_size()) + BitsPerWord - 1) / BitsPerWord;
-  }
-
-};
-
 class G1ParGCAllocBuffer: public ParGCAllocBuffer {
 private:
   bool        _retired;
@@ -1960,7 +1837,7 @@
   G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num);
 
   ~G1ParScanThreadState() {
-    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base, mtGC);
   }
 
   RefToScanQueue*   refs()            { return _refs;             }
@@ -1993,19 +1870,19 @@
   }
 
   HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
-
     HeapWord* obj = NULL;
     size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
     if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
       G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
-      assert(gclab_word_size == alloc_buf->word_sz(),
-             "dynamic resizing is not supported");
       add_to_alloc_buffer_waste(alloc_buf->words_remaining());
-      alloc_buf->retire(false, false);
+      alloc_buf->flush_stats_and_retire(_g1h->stats_for_purpose(purpose),
+                                        false /* end_of_gc */,
+                                        false /* retain */);
 
       HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size);
       if (buf == NULL) return NULL; // Let caller handle allocation failure.
       // Otherwise.
+      alloc_buf->set_word_size(gclab_word_size);
       alloc_buf->set_buf(buf);
 
       obj = alloc_buf->allocate(word_sz);
@@ -2090,7 +1967,9 @@
     for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
       size_t waste = _alloc_buffers[ap]->words_remaining();
       add_to_alloc_buffer_waste(waste);
-      _alloc_buffers[ap]->retire(true, false);
+      _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
+                                                 true /* end_of_gc */,
+                                                 false /* retain */);
     }
   }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -138,7 +138,7 @@
   return _task_queues->queue(i);
 }
 
-inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
+inline bool G1CollectedHeap::isMarkedPrev(oop obj) const {
   return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
 }
 
@@ -146,4 +146,77 @@
   return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
 }
 
+#ifndef PRODUCT
+// Support for G1EvacuationFailureALot
+
+inline bool
+G1CollectedHeap::evacuation_failure_alot_for_gc_type(bool gcs_are_young,
+                                                     bool during_initial_mark,
+                                                     bool during_marking) {
+  bool res = false;
+  if (during_marking) {
+    res |= G1EvacuationFailureALotDuringConcMark;
+  }
+  if (during_initial_mark) {
+    res |= G1EvacuationFailureALotDuringInitialMark;
+  }
+  if (gcs_are_young) {
+    res |= G1EvacuationFailureALotDuringYoungGC;
+  } else {
+    // GCs are mixed
+    res |= G1EvacuationFailureALotDuringMixedGC;
+  }
+  return res;
+}
+
+inline void
+G1CollectedHeap::set_evacuation_failure_alot_for_current_gc() {
+  if (G1EvacuationFailureALot) {
+    // Note we can't assert that _evacuation_failure_alot_for_current_gc
+    // is clear here. It may have been set during a previous GC but that GC
+    // did not copy enough objects (i.e. G1EvacuationFailureALotCount) to
+    // trigger an evacuation failure and clear the flags and and counts.
+
+    // Check if we have gone over the interval.
+    const size_t gc_num = total_collections();
+    const size_t elapsed_gcs = gc_num - _evacuation_failure_alot_gc_number;
+
+    _evacuation_failure_alot_for_current_gc = (elapsed_gcs >= G1EvacuationFailureALotInterval);
+
+    // Now check if G1EvacuationFailureALot is enabled for the current GC type.
+    const bool gcs_are_young = g1_policy()->gcs_are_young();
+    const bool during_im = g1_policy()->during_initial_mark_pause();
+    const bool during_marking = mark_in_progress();
+
+    _evacuation_failure_alot_for_current_gc &=
+      evacuation_failure_alot_for_gc_type(gcs_are_young,
+                                          during_im,
+                                          during_marking);
+  }
+}
+
+inline bool
+G1CollectedHeap::evacuation_should_fail() {
+  if (!G1EvacuationFailureALot || !_evacuation_failure_alot_for_current_gc) {
+    return false;
+  }
+  // G1EvacuationFailureALot is in effect for current GC
+  // Access to _evacuation_failure_alot_count is not atomic;
+  // the value does not have to be exact.
+  if (++_evacuation_failure_alot_count < G1EvacuationFailureALotCount) {
+    return false;
+  }
+  _evacuation_failure_alot_count = 0;
+  return true;
+}
+
+inline void G1CollectedHeap::reset_evacuation_should_fail() {
+  if (G1EvacuationFailureALot) {
+    _evacuation_failure_alot_gc_number = total_collections();
+    _evacuation_failure_alot_count = 0;
+    _evacuation_failure_alot_for_current_gc = false;
+  }
+}
+#endif  // #ifndef PRODUCT
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTEDHEAP_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,6 +29,8 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/shared/gcPolicyCounters.hpp"
 #include "runtime/arguments.hpp"
@@ -76,96 +78,18 @@
   1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30
 };
 
-// Help class for avoiding interleaved logging
-class LineBuffer: public StackObj {
-
-private:
-  static const int BUFFER_LEN = 1024;
-  static const int INDENT_CHARS = 3;
-  char _buffer[BUFFER_LEN];
-  int _indent_level;
-  int _cur;
-
-  void vappend(const char* format, va_list ap) {
-    int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
-    if (res != -1) {
-      _cur += res;
-    } else {
-      DEBUG_ONLY(warning("buffer too small in LineBuffer");)
-      _buffer[BUFFER_LEN -1] = 0;
-      _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
-    }
-  }
-
-public:
-  explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
-    for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
-      _buffer[_cur] = ' ';
-    }
-  }
-
-#ifndef PRODUCT
-  ~LineBuffer() {
-    assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
-  }
-#endif
-
-  void append(const char* format, ...) {
-    va_list ap;
-    va_start(ap, format);
-    vappend(format, ap);
-    va_end(ap);
-  }
-
-  void append_and_print_cr(const char* format, ...) {
-    va_list ap;
-    va_start(ap, format);
-    vappend(format, ap);
-    va_end(ap);
-    gclog_or_tty->print_cr("%s", _buffer);
-    _cur = _indent_level * INDENT_CHARS;
-  }
-};
-
 G1CollectorPolicy::G1CollectorPolicy() :
   _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
                         ? ParallelGCThreads : 1),
 
   _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
-  _all_pause_times_ms(new NumberSeq()),
   _stop_world_start(0.0),
-  _all_stop_world_times_ms(new NumberSeq()),
-  _all_yield_times_ms(new NumberSeq()),
-
-  _summary(new Summary()),
-
-  _cur_clear_ct_time_ms(0.0),
-  _mark_closure_time_ms(0.0),
-  _root_region_scan_wait_time_ms(0.0),
-
-  _cur_ref_proc_time_ms(0.0),
-  _cur_ref_enq_time_ms(0.0),
-
-#ifndef PRODUCT
-  _min_clear_cc_time_ms(-1.0),
-  _max_clear_cc_time_ms(-1.0),
-  _cur_clear_cc_time_ms(0.0),
-  _cum_clear_cc_time_ms(0.0),
-  _num_cc_clears(0L),
-#endif
-
-  _aux_num(10),
-  _all_aux_times_ms(new NumberSeq[_aux_num]),
-  _cur_aux_start_times_ms(new double[_aux_num]),
-  _cur_aux_times_ms(new double[_aux_num]),
-  _cur_aux_times_set(new bool[_aux_num]),
 
   _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
 
   _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _prev_collection_pause_end_ms(0.0),
-  _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -185,25 +109,16 @@
   _pause_time_target_ms((double) MaxGCPauseMillis),
 
   _gcs_are_young(true),
-  _young_pause_num(0),
-  _mixed_pause_num(0),
 
   _during_marking(false),
   _in_marking_window(false),
   _in_marking_window_im(false),
 
-  _known_garbage_ratio(0.0),
-  _known_garbage_bytes(0),
-
-  _young_gc_eff_seq(new TruncatedSeq(TruncatedSeqLength)),
-
   _recent_prev_end_times_for_all_gcs_sec(
                                 new TruncatedSeq(NumPrevPausesForHeuristics)),
 
   _recent_avg_pause_time_ratio(0.0),
 
-  _all_full_gc_times_ms(new NumberSeq()),
-
   _initiate_conc_mark_if_possible(false),
   _during_initial_mark_pause(false),
   _last_young_gc(false),
@@ -277,32 +192,10 @@
   _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
   _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
 
-  _par_last_gc_worker_start_times_ms = new double[_parallel_gc_threads];
-  _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
-  _par_last_satb_filtering_times_ms = new double[_parallel_gc_threads];
-
-  _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
-  _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
-
-  _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
-
-  _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
+  _phase_times = new G1GCPhaseTimes(_parallel_gc_threads);
 
-  _par_last_termination_times_ms = new double[_parallel_gc_threads];
-  _par_last_termination_attempts = new double[_parallel_gc_threads];
-  _par_last_gc_worker_end_times_ms = new double[_parallel_gc_threads];
-  _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];
-  _par_last_gc_worker_other_times_ms = new double[_parallel_gc_threads];
+  int index = MIN2(_parallel_gc_threads - 1, 7);
 
-  int index;
-  if (ParallelGCThreads == 0)
-    index = 0;
-  else if (ParallelGCThreads > 8)
-    index = 7;
-  else
-    index = ParallelGCThreads - 1;
-
-  _pending_card_diff_seq->add(0.0);
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
   _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
   _young_cards_per_entry_ratio_seq->add(
@@ -431,31 +324,36 @@
   }
 
   if (FLAG_IS_CMDLINE(NewSize)) {
-     _min_desired_young_length = MAX2((size_t) 1, NewSize / HeapRegion::GrainBytes);
+    _min_desired_young_length = MAX2((uint) (NewSize / HeapRegion::GrainBytes),
+                                     1U);
     if (FLAG_IS_CMDLINE(MaxNewSize)) {
-      _max_desired_young_length = MAX2((size_t) 1, MaxNewSize / HeapRegion::GrainBytes);
+      _max_desired_young_length =
+                             MAX2((uint) (MaxNewSize / HeapRegion::GrainBytes),
+                                  1U);
       _sizer_kind = SizerMaxAndNewSize;
       _adaptive_size = _min_desired_young_length == _max_desired_young_length;
     } else {
       _sizer_kind = SizerNewSizeOnly;
     }
   } else if (FLAG_IS_CMDLINE(MaxNewSize)) {
-    _max_desired_young_length = MAX2((size_t) 1, MaxNewSize / HeapRegion::GrainBytes);
+    _max_desired_young_length =
+                             MAX2((uint) (MaxNewSize / HeapRegion::GrainBytes),
+                                  1U);
     _sizer_kind = SizerMaxNewSizeOnly;
   }
 }
 
-size_t G1YoungGenSizer::calculate_default_min_length(size_t new_number_of_heap_regions) {
-  size_t default_value = (new_number_of_heap_regions * G1DefaultMinNewGenPercent) / 100;
-  return MAX2((size_t)1, default_value);
+uint G1YoungGenSizer::calculate_default_min_length(uint new_number_of_heap_regions) {
+  uint default_value = (new_number_of_heap_regions * G1DefaultMinNewGenPercent) / 100;
+  return MAX2(1U, default_value);
 }
 
-size_t G1YoungGenSizer::calculate_default_max_length(size_t new_number_of_heap_regions) {
-  size_t default_value = (new_number_of_heap_regions * G1DefaultMaxNewGenPercent) / 100;
-  return MAX2((size_t)1, default_value);
+uint G1YoungGenSizer::calculate_default_max_length(uint new_number_of_heap_regions) {
+  uint default_value = (new_number_of_heap_regions * G1DefaultMaxNewGenPercent) / 100;
+  return MAX2(1U, default_value);
 }
 
-void G1YoungGenSizer::heap_size_changed(size_t new_number_of_heap_regions) {
+void G1YoungGenSizer::heap_size_changed(uint new_number_of_heap_regions) {
   assert(new_number_of_heap_regions > 0, "Heap must be initialized");
 
   switch (_sizer_kind) {
@@ -512,16 +410,16 @@
   _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 3);
 }
 
-bool G1CollectorPolicy::predict_will_fit(size_t young_length,
+bool G1CollectorPolicy::predict_will_fit(uint young_length,
                                          double base_time_ms,
-                                         size_t base_free_regions,
+                                         uint base_free_regions,
                                          double target_pause_time_ms) {
   if (young_length >= base_free_regions) {
     // end condition 1: not enough space for the young regions
     return false;
   }
 
-  double accum_surv_rate = accum_yg_surv_rate_pred((int)(young_length - 1));
+  double accum_surv_rate = accum_yg_surv_rate_pred((int) young_length - 1);
   size_t bytes_to_copy =
                (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
   double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
@@ -533,7 +431,7 @@
   }
 
   size_t free_bytes =
-                  (base_free_regions - young_length) * HeapRegion::GrainBytes;
+                   (base_free_regions - young_length) * HeapRegion::GrainBytes;
   if ((2.0 * sigma()) * (double) bytes_to_copy > (double) free_bytes) {
     // end condition 3: out-of-space (conservatively!)
     return false;
@@ -543,25 +441,25 @@
   return true;
 }
 
-void G1CollectorPolicy::record_new_heap_size(size_t new_number_of_regions) {
+void G1CollectorPolicy::record_new_heap_size(uint new_number_of_regions) {
   // re-calculate the necessary reserve
   double reserve_regions_d = (double) new_number_of_regions * _reserve_factor;
   // We use ceiling so that if reserve_regions_d is > 0.0 (but
   // smaller than 1.0) we'll get 1.
-  _reserve_regions = (size_t) ceil(reserve_regions_d);
+  _reserve_regions = (uint) ceil(reserve_regions_d);
 
   _young_gen_sizer->heap_size_changed(new_number_of_regions);
 }
 
-size_t G1CollectorPolicy::calculate_young_list_desired_min_length(
-                                                     size_t base_min_length) {
-  size_t desired_min_length = 0;
+uint G1CollectorPolicy::calculate_young_list_desired_min_length(
+                                                       uint base_min_length) {
+  uint desired_min_length = 0;
   if (adaptive_young_list_length()) {
     if (_alloc_rate_ms_seq->num() > 3) {
       double now_sec = os::elapsedTime();
       double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0;
       double alloc_rate_ms = predict_alloc_rate_ms();
-      desired_min_length = (size_t) ceil(alloc_rate_ms * when_ms);
+      desired_min_length = (uint) ceil(alloc_rate_ms * when_ms);
     } else {
       // otherwise we don't have enough info to make the prediction
     }
@@ -571,7 +469,7 @@
   return MAX2(_young_gen_sizer->min_desired_young_length(), desired_min_length);
 }
 
-size_t G1CollectorPolicy::calculate_young_list_desired_max_length() {
+uint G1CollectorPolicy::calculate_young_list_desired_max_length() {
   // Here, we might want to also take into account any additional
   // constraints (i.e., user-defined minimum bound). Currently, we
   // effectively don't set this bound.
@@ -588,11 +486,11 @@
   // Calculate the absolute and desired min bounds.
 
   // This is how many young regions we already have (currently: the survivors).
-  size_t base_min_length = recorded_survivor_regions();
+  uint base_min_length = recorded_survivor_regions();
   // This is the absolute minimum young length, which ensures that we
   // can allocate one eden region in the worst-case.
-  size_t absolute_min_length = base_min_length + 1;
-  size_t desired_min_length =
+  uint absolute_min_length = base_min_length + 1;
+  uint desired_min_length =
                      calculate_young_list_desired_min_length(base_min_length);
   if (desired_min_length < absolute_min_length) {
     desired_min_length = absolute_min_length;
@@ -601,16 +499,16 @@
   // Calculate the absolute and desired max bounds.
 
   // We will try our best not to "eat" into the reserve.
-  size_t absolute_max_length = 0;
+  uint absolute_max_length = 0;
   if (_free_regions_at_end_of_collection > _reserve_regions) {
     absolute_max_length = _free_regions_at_end_of_collection - _reserve_regions;
   }
-  size_t desired_max_length = calculate_young_list_desired_max_length();
+  uint desired_max_length = calculate_young_list_desired_max_length();
   if (desired_max_length > absolute_max_length) {
     desired_max_length = absolute_max_length;
   }
 
-  size_t young_list_target_length = 0;
+  uint young_list_target_length = 0;
   if (adaptive_young_list_length()) {
     if (gcs_are_young()) {
       young_list_target_length =
@@ -648,11 +546,11 @@
   update_max_gc_locker_expansion();
 }
 
-size_t
+uint
 G1CollectorPolicy::calculate_young_list_target_length(size_t rs_lengths,
-                                                   size_t base_min_length,
-                                                   size_t desired_min_length,
-                                                   size_t desired_max_length) {
+                                                     uint base_min_length,
+                                                     uint desired_min_length,
+                                                     uint desired_max_length) {
   assert(adaptive_young_list_length(), "pre-condition");
   assert(gcs_are_young(), "only call this for young GCs");
 
@@ -667,9 +565,9 @@
   // will be reflected in the predictions by the
   // survivor_regions_evac_time prediction.
   assert(desired_min_length > base_min_length, "invariant");
-  size_t min_young_length = desired_min_length - base_min_length;
+  uint min_young_length = desired_min_length - base_min_length;
   assert(desired_max_length > base_min_length, "invariant");
-  size_t max_young_length = desired_max_length - base_min_length;
+  uint max_young_length = desired_max_length - base_min_length;
 
   double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
   double survivor_regions_evac_time = predict_survivor_regions_evac_time();
@@ -679,8 +577,8 @@
   double base_time_ms =
     predict_base_elapsed_time_ms(pending_cards, scanned_cards) +
     survivor_regions_evac_time;
-  size_t available_free_regions = _free_regions_at_end_of_collection;
-  size_t base_free_regions = 0;
+  uint available_free_regions = _free_regions_at_end_of_collection;
+  uint base_free_regions = 0;
   if (available_free_regions > _reserve_regions) {
     base_free_regions = available_free_regions - _reserve_regions;
   }
@@ -717,9 +615,9 @@
       // the new max. This way we maintain the loop invariants.
 
       assert(min_young_length < max_young_length, "invariant");
-      size_t diff = (max_young_length - min_young_length) / 2;
+      uint diff = (max_young_length - min_young_length) / 2;
       while (diff > 0) {
-        size_t young_length = min_young_length + diff;
+        uint young_length = min_young_length + diff;
         if (predict_will_fit(young_length, base_time_ms,
                              base_free_regions, target_pause_time_ms)) {
           min_young_length = young_length;
@@ -757,7 +655,7 @@
   for (HeapRegion * r = _recorded_survivor_head;
        r != NULL && r != _recorded_survivor_tail->get_next_young_region();
        r = r->get_next_young_region()) {
-    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true);
+    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, gcs_are_young());
   }
   return survivor_regions_evac_time;
 }
@@ -839,7 +737,7 @@
 #endif // PRODUCT
 
 void G1CollectorPolicy::record_full_collection_start() {
-  _cur_collection_start_sec = os::elapsedTime();
+  _full_collection_start_sec = os::elapsedTime();
   // Release the future to-space so that it is available for compaction into.
   _g1->set_full_collection();
 }
@@ -848,10 +746,10 @@
   // Consider this like a collection pause for the purposes of allocation
   // since last pause.
   double end_sec = os::elapsedTime();
-  double full_gc_time_sec = end_sec - _cur_collection_start_sec;
+  double full_gc_time_sec = end_sec - _full_collection_start_sec;
   double full_gc_time_ms = full_gc_time_sec * 1000.0;
 
-  _all_full_gc_times_ms->add(full_gc_time_ms);
+  _trace_gen1_time_data.record_full_collection(full_gc_time_ms);
 
   update_recent_gc_times(end_sec, full_gc_time_ms);
 
@@ -863,8 +761,6 @@
   _last_young_gc = false;
   clear_initiate_conc_mark_if_possible();
   clear_during_initial_mark_pause();
-  _known_garbage_bytes = 0;
-  _known_garbage_ratio = 0.0;
   _in_marking_window = false;
   _in_marking_window_im = false;
 
@@ -877,7 +773,7 @@
   // Reset survivors SurvRateGroup.
   _survivor_surv_rate_group->reset();
   update_young_list_target_length();
-  _collectionSetChooser->clearMarkedHeapRegions();
+  _collectionSetChooser->clear();
 }
 
 void G1CollectorPolicy::record_stop_world_start() {
@@ -886,12 +782,6 @@
 
 void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
                                                       size_t start_used) {
-  if (PrintGCDetails) {
-    gclog_or_tty->stamp(PrintGCTimeStamps);
-    gclog_or_tty->print("[GC pause");
-    gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
-  }
-
   // We only need to do this here as the policy will only be applied
   // to the GC we're about to start. so, no point is calculating this
   // every time we calculate / recalculate the target young length.
@@ -902,16 +792,15 @@
                  _g1->used(), _g1->recalculate_used()));
 
   double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0;
-  _all_stop_world_times_ms->add(s_w_t_ms);
+  _trace_gen0_time_data.record_start_collection(s_w_t_ms);
   _stop_world_start = 0.0;
 
-  _cur_collection_start_sec = start_time_sec;
+  phase_times()->record_cur_collection_start_sec(start_time_sec);
   _cur_collection_pause_used_at_start_bytes = start_used;
   _cur_collection_pause_used_regions_at_start = _g1->used_regions();
   _pending_cards = _g1->pending_card_num();
-  _max_pending_cards = _g1->max_pending_card_num();
 
-  _bytes_in_collection_set_before_gc = 0;
+  _collection_set_bytes_used_before = 0;
   _bytes_copied_during_gc = 0;
 
   YoungList* young_list = _g1->young_list();
@@ -919,38 +808,6 @@
   _survivor_bytes_before_gc = young_list->survivor_used_bytes();
   _capacity_before_gc = _g1->capacity();
 
-#ifdef DEBUG
-  // initialise these to something well known so that we can spot
-  // if they are not set properly
-
-  for (int i = 0; i < _parallel_gc_threads; ++i) {
-    _par_last_gc_worker_start_times_ms[i] = -1234.0;
-    _par_last_ext_root_scan_times_ms[i] = -1234.0;
-    _par_last_satb_filtering_times_ms[i] = -1234.0;
-    _par_last_update_rs_times_ms[i] = -1234.0;
-    _par_last_update_rs_processed_buffers[i] = -1234.0;
-    _par_last_scan_rs_times_ms[i] = -1234.0;
-    _par_last_obj_copy_times_ms[i] = -1234.0;
-    _par_last_termination_times_ms[i] = -1234.0;
-    _par_last_termination_attempts[i] = -1234.0;
-    _par_last_gc_worker_end_times_ms[i] = -1234.0;
-    _par_last_gc_worker_times_ms[i] = -1234.0;
-    _par_last_gc_worker_other_times_ms[i] = -1234.0;
-  }
-#endif
-
-  for (int i = 0; i < _aux_num; ++i) {
-    _cur_aux_times_ms[i] = 0.0;
-    _cur_aux_times_set[i] = false;
-  }
-
-  // This is initialized to zero here and is set during
-  // the evacuation pause if marking is in progress.
-  _cur_satb_drain_time_ms = 0.0;
-  // This is initialized to zero here and is set during the evacuation
-  // pause if we actually waited for the root region scanning to finish.
-  _root_region_scan_wait_time_ms = 0.0;
-
   _last_gc_was_young = false;
 
   // do that for any other surv rate groups
@@ -995,129 +852,8 @@
 void G1CollectorPolicy::record_concurrent_pause() {
   if (_stop_world_start > 0.0) {
     double yield_ms = (os::elapsedTime() - _stop_world_start) * 1000.0;
-    _all_yield_times_ms->add(yield_ms);
-  }
-}
-
-void G1CollectorPolicy::record_concurrent_pause_end() {
-}
-
-template<class T>
-T sum_of(T* sum_arr, int start, int n, int N) {
-  T sum = (T)0;
-  for (int i = 0; i < n; i++) {
-    int j = (start + i) % N;
-    sum += sum_arr[j];
-  }
-  return sum;
-}
-
-void G1CollectorPolicy::print_par_stats(int level,
-                                        const char* str,
-                                        double* data) {
-  double min = data[0], max = data[0];
-  double total = 0.0;
-  LineBuffer buf(level);
-  buf.append("[%s (ms):", str);
-  for (uint i = 0; i < no_of_gc_threads(); ++i) {
-    double val = data[i];
-    if (val < min)
-      min = val;
-    if (val > max)
-      max = val;
-    total += val;
-    buf.append("  %3.1lf", val);
-  }
-  buf.append_and_print_cr("");
-  double avg = total / (double) no_of_gc_threads();
-  buf.append_and_print_cr(" Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf, Diff: %5.1lf]",
-    avg, min, max, max - min);
-}
-
-void G1CollectorPolicy::print_par_sizes(int level,
-                                        const char* str,
-                                        double* data) {
-  double min = data[0], max = data[0];
-  double total = 0.0;
-  LineBuffer buf(level);
-  buf.append("[%s :", str);
-  for (uint i = 0; i < no_of_gc_threads(); ++i) {
-    double val = data[i];
-    if (val < min)
-      min = val;
-    if (val > max)
-      max = val;
-    total += val;
-    buf.append(" %d", (int) val);
+    _trace_gen0_time_data.record_yield_time(yield_ms);
   }
-  buf.append_and_print_cr("");
-  double avg = total / (double) no_of_gc_threads();
-  buf.append_and_print_cr(" Sum: %d, Avg: %d, Min: %d, Max: %d, Diff: %d]",
-    (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min);
-}
-
-void G1CollectorPolicy::print_stats(int level,
-                                    const char* str,
-                                    double value) {
-  LineBuffer(level).append_and_print_cr("[%s: %5.1lf ms]", str, value);
-}
-
-void G1CollectorPolicy::print_stats(int level,
-                                    const char* str,
-                                    int value) {
-  LineBuffer(level).append_and_print_cr("[%s: %d]", str, value);
-}
-
-double G1CollectorPolicy::avg_value(double* data) {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    double ret = 0.0;
-    for (uint i = 0; i < no_of_gc_threads(); ++i) {
-      ret += data[i];
-    }
-    return ret / (double) no_of_gc_threads();
-  } else {
-    return data[0];
-  }
-}
-
-double G1CollectorPolicy::max_value(double* data) {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    double ret = data[0];
-    for (uint i = 1; i < no_of_gc_threads(); ++i) {
-      if (data[i] > ret) {
-        ret = data[i];
-      }
-    }
-    return ret;
-  } else {
-    return data[0];
-  }
-}
-
-double G1CollectorPolicy::sum_of_values(double* data) {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    double sum = 0.0;
-    for (uint i = 0; i < no_of_gc_threads(); i++) {
-      sum += data[i];
-    }
-    return sum;
-  } else {
-    return data[0];
-  }
-}
-
-double G1CollectorPolicy::max_sum(double* data1, double* data2) {
-  double ret = data1[0] + data2[0];
-
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    for (uint i = 1; i < no_of_gc_threads(); ++i) {
-      double data = data1[i] + data2[i];
-      if (data > ret) {
-        ret = data;
-      }
-    }
-  }
-  return ret;
 }
 
 bool G1CollectorPolicy::need_to_start_conc_mark(const char* source, size_t alloc_word_size) {
@@ -1167,10 +903,8 @@
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
+void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms) {
   double end_time_sec = os::elapsedTime();
-  double elapsed_ms = _last_pause_time_ms;
-  bool parallel = G1CollectedHeap::use_parallel_gc_threads();
   assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(),
          "otherwise, the subtraction below does not make sense");
   size_t rs_size =
@@ -1179,7 +913,6 @@
   assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   bool last_pause_included_initial_mark = false;
   bool update_stats = !_g1->evacuation_failed();
-  set_no_of_gc_threads(no_of_gc_threads);
 
 #ifndef PRODUCT
   if (G1YoungSurvRateVerbose) {
@@ -1199,24 +932,9 @@
     set_initiate_conc_mark_if_possible();
   }
 
-  _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
+  _mmu_tracker->add_pause(end_time_sec - pause_time_ms/1000.0,
                           end_time_sec, false);
 
-  // This assert is exempted when we're doing parallel collection pauses,
-  // because the fragmentation caused by the parallel GC allocation buffers
-  // can lead to more memory being used during collection than was used
-  // before. Best leave this out until the fragmentation problem is fixed.
-  // Pauses in which evacuation failed can also lead to negative
-  // collections, since no space is reclaimed from a region containing an
-  // object whose evacuation failed.
-  // Further, we're now always doing parallel collection.  But I'm still
-  // leaving this here as a placeholder for a more precise assertion later.
-  // (DLD, 10/05.)
-  assert((true || parallel) // Always using GC LABs now.
-         || _g1->evacuation_failed()
-         || _cur_collection_pause_used_at_start_bytes >= cur_used_bytes,
-         "Negative collection");
-
   size_t freed_bytes =
     _cur_collection_pause_used_at_start_bytes - cur_used_bytes;
   size_t surviving_bytes = _collection_set_bytes_used_before - freed_bytes;
@@ -1225,103 +943,11 @@
     (double)surviving_bytes/
     (double)_collection_set_bytes_used_before;
 
-  // These values are used to update the summary information that is
-  // displayed when TraceGen0Time is enabled, and are output as part
-  // of the PrintGCDetails output, in the non-parallel case.
-
-  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
-  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
-  double update_rs_processed_buffers =
-    sum_of_values(_par_last_update_rs_processed_buffers);
-  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
-  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
-  double termination_time = avg_value(_par_last_termination_times_ms);
-
-  double known_time = ext_root_scan_time +
-                      satb_filtering_time +
-                      update_rs_time +
-                      scan_rs_time +
-                      obj_copy_time;
-
-  double other_time_ms = elapsed_ms;
-
-  // Subtract the SATB drain time. It's initialized to zero at the
-  // start of the pause and is updated during the pause if marking
-  // is in progress.
-  other_time_ms -= _cur_satb_drain_time_ms;
-
-  // Subtract the root region scanning wait time. It's initialized to
-  // zero at the start of the pause.
-  other_time_ms -= _root_region_scan_wait_time_ms;
-
-  if (parallel) {
-    other_time_ms -= _cur_collection_par_time_ms;
-  } else {
-    other_time_ms -= known_time;
-  }
-
-  // Subtract the time taken to clean the card table from the
-  // current value of "other time"
-  other_time_ms -= _cur_clear_ct_time_ms;
-
-  // Subtract the time spent completing marking in the collection
-  // set. Note if marking is not in progress during the pause
-  // the value of _mark_closure_time_ms will be zero.
-  other_time_ms -= _mark_closure_time_ms;
-
-  // TraceGen0Time and TraceGen1Time summary info updating.
-  _all_pause_times_ms->add(elapsed_ms);
-
   if (update_stats) {
-    _summary->record_total_time_ms(elapsed_ms);
-    _summary->record_other_time_ms(other_time_ms);
-
-    MainBodySummary* body_summary = _summary->main_body_summary();
-    assert(body_summary != NULL, "should not be null!");
-
-    // This will be non-zero iff marking is currently in progress (i.e.
-    // _g1->mark_in_progress() == true) and the currrent pause was not
-    // an initial mark pause. Since the body_summary items are NumberSeqs,
-    // however, they have to be consistent and updated in lock-step with
-    // each other. Therefore we unconditionally record the SATB drain
-    // time - even if it's zero.
-    body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
-    body_summary->record_root_region_scan_wait_time_ms(
-                                               _root_region_scan_wait_time_ms);
-
-    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
-    body_summary->record_satb_filtering_time_ms(satb_filtering_time);
-    body_summary->record_update_rs_time_ms(update_rs_time);
-    body_summary->record_scan_rs_time_ms(scan_rs_time);
-    body_summary->record_obj_copy_time_ms(obj_copy_time);
-
-    if (parallel) {
-      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
-      body_summary->record_termination_time_ms(termination_time);
-
-      double parallel_known_time = known_time + termination_time;
-      double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time;
-      body_summary->record_parallel_other_time_ms(parallel_other_time);
-    }
-
-    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
-    body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
-
-    // We exempt parallel collection from this check because Alloc Buffer
-    // fragmentation can produce negative collections.  Same with evac
-    // failure.
-    // Further, we're now always doing parallel collection.  But I'm still
-    // leaving this here as a placeholder for a more precise assertion later.
-    // (DLD, 10/05.
-    assert((true || parallel)
-           || _g1->evacuation_failed()
-           || surviving_bytes <= _collection_set_bytes_used_before,
-           "Or else negative collection!");
-
+    _trace_gen0_time_data.record_end_collection(pause_time_ms, phase_times());
     // this is where we update the allocation rate of the application
     double app_time_ms =
-      (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
+      (phase_times()->cur_collection_start_sec() * 1000.0 - _prev_collection_pause_end_ms);
     if (app_time_ms < MIN_TIMER_GRANULARITY) {
       // This usually happens due to the timer not having the required
       // granularity. Some Linuxes are the usual culprits.
@@ -1336,13 +962,13 @@
     // given that humongous object allocations do not really affect
     // either the pause's duration nor when the next pause will take
     // place we can safely ignore them here.
-    size_t regions_allocated = eden_cset_region_length();
+    uint regions_allocated = eden_cset_region_length();
     double alloc_rate_ms = (double) regions_allocated / app_time_ms;
     _alloc_rate_ms_seq->add(alloc_rate_ms);
 
     double interval_ms =
       (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
-    update_recent_gc_times(end_time_sec, elapsed_ms);
+    update_recent_gc_times(end_time_sec, pause_time_ms);
     _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms;
     if (recent_avg_pause_time_ratio() < 0.0 ||
         (recent_avg_pause_time_ratio() - 1.0 > 0.0)) {
@@ -1369,106 +995,6 @@
       }
     }
   }
-
-  for (int i = 0; i < _aux_num; ++i) {
-    if (_cur_aux_times_set[i]) {
-      _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
-    }
-  }
-
-  // PrintGCDetails output
-  if (PrintGCDetails) {
-    bool print_marking_info =
-      _g1->mark_in_progress() && !last_pause_included_initial_mark;
-
-    gclog_or_tty->print_cr("%s, %1.8lf secs]",
-                           (last_pause_included_initial_mark) ? " (initial-mark)" : "",
-                           elapsed_ms / 1000.0);
-
-    if (_root_region_scan_wait_time_ms > 0.0) {
-      print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
-    }
-    if (parallel) {
-      print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
-      print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
-      print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
-      if (print_marking_info) {
-        print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
-      }
-      print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
-      print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
-      print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
-      print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
-      print_par_stats(2, "Termination", _par_last_termination_times_ms);
-      print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
-      print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
-
-      for (int i = 0; i < _parallel_gc_threads; i++) {
-        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
-
-        double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
-                                   _par_last_satb_filtering_times_ms[i] +
-                                   _par_last_update_rs_times_ms[i] +
-                                   _par_last_scan_rs_times_ms[i] +
-                                   _par_last_obj_copy_times_ms[i] +
-                                   _par_last_termination_times_ms[i];
-
-        _par_last_gc_worker_other_times_ms[i] = _cur_collection_par_time_ms - worker_known_time;
-      }
-      print_par_stats(2, "GC Worker", _par_last_gc_worker_times_ms);
-      print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
-    } else {
-      print_stats(1, "Ext Root Scanning", ext_root_scan_time);
-      if (print_marking_info) {
-        print_stats(1, "SATB Filtering", satb_filtering_time);
-      }
-      print_stats(1, "Update RS", update_rs_time);
-      print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
-      print_stats(1, "Scan RS", scan_rs_time);
-      print_stats(1, "Object Copying", obj_copy_time);
-    }
-    if (print_marking_info) {
-      print_stats(1, "Complete CSet Marking", _mark_closure_time_ms);
-    }
-    print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
-#ifndef PRODUCT
-    print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
-    print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
-    print_stats(1, "Min Clear CC", _min_clear_cc_time_ms);
-    print_stats(1, "Max Clear CC", _max_clear_cc_time_ms);
-    if (_num_cc_clears > 0) {
-      print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears));
-    }
-#endif
-    print_stats(1, "Other", other_time_ms);
-    print_stats(2, "Choose CSet",
-                   (_recorded_young_cset_choice_time_ms +
-                    _recorded_non_young_cset_choice_time_ms));
-    print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
-    print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
-    print_stats(2, "Free CSet",
-                   (_recorded_young_free_cset_time_ms +
-                    _recorded_non_young_free_cset_time_ms));
-
-    for (int i = 0; i < _aux_num; ++i) {
-      if (_cur_aux_times_set[i]) {
-        char buffer[96];
-        sprintf(buffer, "Aux%d", i);
-        print_stats(1, buffer, _cur_aux_times_ms[i]);
-      }
-    }
-  }
-
-  // Update the efficiency-since-mark vars.
-  double proc_ms = elapsed_ms * (double) _parallel_gc_threads;
-  if (elapsed_ms < MIN_TIMER_GRANULARITY) {
-    // This usually happens due to the timer not having the required
-    // granularity. Some Linuxes are the usual culprits.
-    // We'll just set it to something (arbitrarily) small.
-    proc_ms = 1.0;
-  }
-  double cur_efficiency = (double) freed_bytes / proc_ms;
-
   bool new_in_marking_window = _in_marking_window;
   bool new_in_marking_window_im = false;
   if (during_initial_mark_pause()) {
@@ -1503,24 +1029,13 @@
     }
   }
 
-  if (_last_gc_was_young && !_during_marking) {
-    _young_gc_eff_seq->add(cur_efficiency);
-  }
-
   _short_lived_surv_rate_group->start_adding_regions();
   // do that for any other surv rate groupsx
 
   if (update_stats) {
-    double pause_time_ms = elapsed_ms;
-
-    size_t diff = 0;
-    if (_max_pending_cards >= _pending_cards)
-      diff = _max_pending_cards - _pending_cards;
-    _pending_card_diff_seq->add((double) diff);
-
     double cost_per_card_ms = 0.0;
     if (_pending_cards > 0) {
-      cost_per_card_ms = update_rs_time / (double) _pending_cards;
+      cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards;
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }
 
@@ -1528,7 +1043,7 @@
 
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
-      cost_per_entry_ms = scan_rs_time / (double) cards_scanned;
+      cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned;
       if (_last_gc_was_young) {
         _cost_per_entry_ms_seq->add(cost_per_entry_ms);
       } else {
@@ -1568,7 +1083,7 @@
     size_t copied_bytes = surviving_bytes;
     double cost_per_byte_ms = 0.0;
     if (copied_bytes > 0) {
-      cost_per_byte_ms = obj_copy_time / (double) copied_bytes;
+      cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes;
       if (_in_marking_window) {
         _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
       } else {
@@ -1577,22 +1092,22 @@
     }
 
     double all_other_time_ms = pause_time_ms -
-      (update_rs_time + scan_rs_time + obj_copy_time +
-       _mark_closure_time_ms + termination_time);
+      (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time()
+      + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time());
 
     double young_other_time_ms = 0.0;
     if (young_cset_region_length() > 0) {
       young_other_time_ms =
-        _recorded_young_cset_choice_time_ms +
-        _recorded_young_free_cset_time_ms;
+        phase_times()->young_cset_choice_time_ms() +
+        phase_times()->young_free_cset_time_ms();
       _young_other_cost_per_region_ms_seq->add(young_other_time_ms /
                                           (double) young_cset_region_length());
     }
     double non_young_other_time_ms = 0.0;
     if (old_cset_region_length() > 0) {
       non_young_other_time_ms =
-        _recorded_non_young_cset_choice_time_ms +
-        _recorded_non_young_free_cset_time_ms;
+        phase_times()->non_young_cset_choice_time_ms() +
+        phase_times()->non_young_free_cset_time_ms();
 
       _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms /
                                             (double) old_cset_region_length());
@@ -1603,9 +1118,9 @@
     _constant_other_time_ms_seq->add(constant_other_time_ms);
 
     double survival_ratio = 0.0;
-    if (_bytes_in_collection_set_before_gc > 0) {
+    if (_collection_set_bytes_used_before > 0) {
       survival_ratio = (double) _bytes_copied_during_gc /
-                                   (double) _bytes_in_collection_set_before_gc;
+                                   (double) _collection_set_bytes_used_before;
     }
 
     _pending_cards_seq->add((double) _pending_cards);
@@ -1619,18 +1134,23 @@
 
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
-  adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms);
+  adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(),
+                               phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms);
 
-  assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
+  _collectionSetChooser->verify();
 }
 
-#define EXT_SIZE_FORMAT "%d%s"
+#define EXT_SIZE_FORMAT "%.1f%s"
 #define EXT_SIZE_PARAMS(bytes)                                  \
-  byte_size_in_proper_unit((bytes)),                            \
+  byte_size_in_proper_unit((double)(bytes)),                    \
   proper_unit_for_byte_size((bytes))
 
 void G1CollectorPolicy::print_heap_transition() {
-  if (PrintGCDetails) {
+  _g1->print_size_transition(gclog_or_tty,
+    _cur_collection_pause_used_at_start_bytes, _g1->used(), _g1->capacity());
+}
+
+void G1CollectorPolicy::print_detailed_heap_transition() {
     YoungList* young_list = _g1->young_list();
     size_t eden_bytes = young_list->eden_used_bytes();
     size_t survivor_bytes = young_list->survivor_used_bytes();
@@ -1657,11 +1177,6 @@
       EXT_SIZE_PARAMS(capacity));
 
     _prev_eden_capacity = eden_capacity;
-  } else if (PrintGC) {
-    _g1->print_size_transition(gclog_or_tty,
-                               _cur_collection_pause_used_at_start_bytes,
-                               _g1->used(), _g1->capacity());
-  }
 }
 
 void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time,
@@ -1706,37 +1221,11 @@
 }
 
 double
-G1CollectorPolicy::
-predict_young_collection_elapsed_time_ms(size_t adjustment) {
-  guarantee( adjustment == 0 || adjustment == 1, "invariant" );
-
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  size_t young_num = g1h->young_list()->length();
-  if (young_num == 0)
-    return 0.0;
-
-  young_num += adjustment;
-  size_t pending_cards = predict_pending_cards();
-  size_t rs_lengths = g1h->young_list()->sampled_rs_lengths() +
-                      predict_rs_length_diff();
-  size_t card_num;
-  if (gcs_are_young()) {
-    card_num = predict_young_card_num(rs_lengths);
-  } else {
-    card_num = predict_non_young_card_num(rs_lengths);
-  }
-  size_t young_byte_size = young_num * HeapRegion::GrainBytes;
-  double accum_yg_surv_rate =
-    _short_lived_surv_rate_group->accum_surv_rate(adjustment);
-
-  size_t bytes_to_copy =
-    (size_t) (accum_yg_surv_rate * (double) HeapRegion::GrainBytes);
-
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
+                                                size_t scanned_cards) {
   return
     predict_rs_update_time_ms(pending_cards) +
-    predict_rs_scan_time_ms(card_num) +
-    predict_object_copy_time_ms(bytes_to_copy) +
-    predict_young_other_time_ms(young_num) +
+    predict_rs_scan_time_ms(scanned_cards) +
     predict_constant_other_time_ms();
 }
 
@@ -1752,41 +1241,7 @@
   return predict_base_elapsed_time_ms(pending_cards, card_num);
 }
 
-double
-G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
-                                                size_t scanned_cards) {
-  return
-    predict_rs_update_time_ms(pending_cards) +
-    predict_rs_scan_time_ms(scanned_cards) +
-    predict_constant_other_time_ms();
-}
-
-double
-G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
-                                                  bool young) {
-  size_t rs_length = hr->rem_set()->occupied();
-  size_t card_num;
-  if (gcs_are_young()) {
-    card_num = predict_young_card_num(rs_length);
-  } else {
-    card_num = predict_non_young_card_num(rs_length);
-  }
-  size_t bytes_to_copy = predict_bytes_to_copy(hr);
-
-  double region_elapsed_time_ms =
-    predict_rs_scan_time_ms(card_num) +
-    predict_object_copy_time_ms(bytes_to_copy);
-
-  if (young)
-    region_elapsed_time_ms += predict_young_other_time_ms(1);
-  else
-    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
-
-  return region_elapsed_time_ms;
-}
-
-size_t
-G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) {
+size_t G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) {
   size_t bytes_to_copy;
   if (hr->is_marked())
     bytes_to_copy = hr->max_live_bytes();
@@ -1799,9 +1254,38 @@
   return bytes_to_copy;
 }
 
+double
+G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
+                                                  bool for_young_gc) {
+  size_t rs_length = hr->rem_set()->occupied();
+  size_t card_num;
+
+  // Predicting the number of cards is based on which type of GC
+  // we're predicting for.
+  if (for_young_gc) {
+    card_num = predict_young_card_num(rs_length);
+  } else {
+    card_num = predict_non_young_card_num(rs_length);
+  }
+  size_t bytes_to_copy = predict_bytes_to_copy(hr);
+
+  double region_elapsed_time_ms =
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy);
+
+  // The prediction of the "other" time for this region is based
+  // upon the region type and NOT the GC type.
+  if (hr->is_young()) {
+    region_elapsed_time_ms += predict_young_other_time_ms(1);
+  } else {
+    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
+  }
+  return region_elapsed_time_ms;
+}
+
 void
-G1CollectorPolicy::init_cset_region_lengths(size_t eden_cset_region_length,
-                                          size_t survivor_cset_region_length) {
+G1CollectorPolicy::init_cset_region_lengths(uint eden_cset_region_length,
+                                            uint survivor_cset_region_length) {
   _eden_cset_region_length     = eden_cset_region_length;
   _survivor_cset_region_length = survivor_cset_region_length;
   _old_cset_region_length      = 0;
@@ -1855,198 +1339,9 @@
   }
 }
 
-class CountCSClosure: public HeapRegionClosure {
-  G1CollectorPolicy* _g1_policy;
-public:
-  CountCSClosure(G1CollectorPolicy* g1_policy) :
-    _g1_policy(g1_policy) {}
-  bool doHeapRegion(HeapRegion* r) {
-    _g1_policy->_bytes_in_collection_set_before_gc += r->used();
-    return false;
-  }
-};
-
-void G1CollectorPolicy::count_CS_bytes_used() {
-  CountCSClosure cs_closure(this);
-  _g1->collection_set_iterate(&cs_closure);
-}
-
-void G1CollectorPolicy::print_summary(int level,
-                                      const char* str,
-                                      NumberSeq* seq) const {
-  double sum = seq->sum();
-  LineBuffer(level + 1).append_and_print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)",
-                str, sum / 1000.0, seq->avg());
-}
-
-void G1CollectorPolicy::print_summary_sd(int level,
-                                         const char* str,
-                                         NumberSeq* seq) const {
-  print_summary(level, str, seq);
-  LineBuffer(level + 6).append_and_print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
-                seq->num(), seq->sd(), seq->maximum());
-}
-
-void G1CollectorPolicy::check_other_times(int level,
-                                        NumberSeq* other_times_ms,
-                                        NumberSeq* calc_other_times_ms) const {
-  bool should_print = false;
-  LineBuffer buf(level + 2);
-
-  double max_sum = MAX2(fabs(other_times_ms->sum()),
-                        fabs(calc_other_times_ms->sum()));
-  double min_sum = MIN2(fabs(other_times_ms->sum()),
-                        fabs(calc_other_times_ms->sum()));
-  double sum_ratio = max_sum / min_sum;
-  if (sum_ratio > 1.1) {
-    should_print = true;
-    buf.append_and_print_cr("## CALCULATED OTHER SUM DOESN'T MATCH RECORDED ###");
-  }
-
-  double max_avg = MAX2(fabs(other_times_ms->avg()),
-                        fabs(calc_other_times_ms->avg()));
-  double min_avg = MIN2(fabs(other_times_ms->avg()),
-                        fabs(calc_other_times_ms->avg()));
-  double avg_ratio = max_avg / min_avg;
-  if (avg_ratio > 1.1) {
-    should_print = true;
-    buf.append_and_print_cr("## CALCULATED OTHER AVG DOESN'T MATCH RECORDED ###");
-  }
-
-  if (other_times_ms->sum() < -0.01) {
-    buf.append_and_print_cr("## RECORDED OTHER SUM IS NEGATIVE ###");
-  }
-
-  if (other_times_ms->avg() < -0.01) {
-    buf.append_and_print_cr("## RECORDED OTHER AVG IS NEGATIVE ###");
-  }
-
-  if (calc_other_times_ms->sum() < -0.01) {
-    should_print = true;
-    buf.append_and_print_cr("## CALCULATED OTHER SUM IS NEGATIVE ###");
-  }
-
-  if (calc_other_times_ms->avg() < -0.01) {
-    should_print = true;
-    buf.append_and_print_cr("## CALCULATED OTHER AVG IS NEGATIVE ###");
-  }
-
-  if (should_print)
-    print_summary(level, "Other(Calc)", calc_other_times_ms);
-}
-
-void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
-  bool parallel = G1CollectedHeap::use_parallel_gc_threads();
-  MainBodySummary*    body_summary = summary->main_body_summary();
-  if (summary->get_total_seq()->num() > 0) {
-    print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
-    if (body_summary != NULL) {
-      print_summary(1, "Root Region Scan Wait", body_summary->get_root_region_scan_wait_seq());
-      if (parallel) {
-        print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
-        print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(2, "SATB Filtering", body_summary->get_satb_filtering_seq());
-        print_summary(2, "Update RS", body_summary->get_update_rs_seq());
-        print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
-        print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
-        print_summary(2, "Termination", body_summary->get_termination_seq());
-        print_summary(2, "Parallel Other", body_summary->get_parallel_other_seq());
-        {
-          NumberSeq* other_parts[] = {
-            body_summary->get_ext_root_scan_seq(),
-            body_summary->get_satb_filtering_seq(),
-            body_summary->get_update_rs_seq(),
-            body_summary->get_scan_rs_seq(),
-            body_summary->get_obj_copy_seq(),
-            body_summary->get_termination_seq()
-          };
-          NumberSeq calc_other_times_ms(body_summary->get_parallel_seq(),
-                                        6, other_parts);
-          check_other_times(2, body_summary->get_parallel_other_seq(),
-                            &calc_other_times_ms);
-        }
-      } else {
-        print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(1, "SATB Filtering", body_summary->get_satb_filtering_seq());
-        print_summary(1, "Update RS", body_summary->get_update_rs_seq());
-        print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
-        print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
-      }
-    }
-    print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq());
-    print_summary(1, "Clear CT", body_summary->get_clear_ct_seq());
-    print_summary(1, "Other", summary->get_other_seq());
-    {
-      if (body_summary != NULL) {
-        NumberSeq calc_other_times_ms;
-        if (parallel) {
-          // parallel
-          NumberSeq* other_parts[] = {
-            body_summary->get_satb_drain_seq(),
-            body_summary->get_root_region_scan_wait_seq(),
-            body_summary->get_parallel_seq(),
-            body_summary->get_clear_ct_seq()
-          };
-          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                          4, other_parts);
-        } else {
-          // serial
-          NumberSeq* other_parts[] = {
-            body_summary->get_satb_drain_seq(),
-            body_summary->get_root_region_scan_wait_seq(),
-            body_summary->get_update_rs_seq(),
-            body_summary->get_ext_root_scan_seq(),
-            body_summary->get_satb_filtering_seq(),
-            body_summary->get_scan_rs_seq(),
-            body_summary->get_obj_copy_seq()
-          };
-          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                          7, other_parts);
-        }
-        check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
-      }
-    }
-  } else {
-    LineBuffer(1).append_and_print_cr("none");
-  }
-  LineBuffer(0).append_and_print_cr("");
-}
-
 void G1CollectorPolicy::print_tracing_info() const {
-  if (TraceGen0Time) {
-    gclog_or_tty->print_cr("ALL PAUSES");
-    print_summary_sd(0, "Total", _all_pause_times_ms);
-    gclog_or_tty->print_cr("");
-    gclog_or_tty->print_cr("");
-    gclog_or_tty->print_cr("   Young GC Pauses: %8d", _young_pause_num);
-    gclog_or_tty->print_cr("   Mixed GC Pauses: %8d", _mixed_pause_num);
-    gclog_or_tty->print_cr("");
-
-    gclog_or_tty->print_cr("EVACUATION PAUSES");
-    print_summary(_summary);
-
-    gclog_or_tty->print_cr("MISC");
-    print_summary_sd(0, "Stop World", _all_stop_world_times_ms);
-    print_summary_sd(0, "Yields", _all_yield_times_ms);
-    for (int i = 0; i < _aux_num; ++i) {
-      if (_all_aux_times_ms[i].num() > 0) {
-        char buffer[96];
-        sprintf(buffer, "Aux%d", i);
-        print_summary_sd(0, buffer, &_all_aux_times_ms[i]);
-      }
-    }
-  }
-  if (TraceGen1Time) {
-    if (_all_full_gc_times_ms->num() > 0) {
-      gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s",
-                 _all_full_gc_times_ms->num(),
-                 _all_full_gc_times_ms->sum() / 1000.0);
-      gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times_ms->avg());
-      gclog_or_tty->print_cr("                     [std. dev = %8.2f ms, max = %8.2f ms]",
-                    _all_full_gc_times_ms->sd(),
-                    _all_full_gc_times_ms->maximum());
-    }
-  }
+  _trace_gen0_time_data.print();
+  _trace_gen1_time_data.print();
 }
 
 void G1CollectorPolicy::print_yg_surv_rate_info() const {
@@ -2068,7 +1363,7 @@
 }
 #endif // PRODUCT
 
-size_t G1CollectorPolicy::max_regions(int purpose) {
+uint G1CollectorPolicy::max_regions(int purpose) {
   switch (purpose) {
     case GCAllocForSurvived:
       return _max_survivor_regions;
@@ -2081,13 +1376,13 @@
 }
 
 void G1CollectorPolicy::update_max_gc_locker_expansion() {
-  size_t expansion_region_num = 0;
+  uint expansion_region_num = 0;
   if (GCLockerEdenExpansionPercent > 0) {
     double perc = (double) GCLockerEdenExpansionPercent / 100.0;
     double expansion_region_num_d = perc * (double) _young_list_target_length;
     // We use ceiling so that if expansion_region_num_d is > 0.0 (but
     // less than 1.0) we'll get 1.
-    expansion_region_num = (size_t) ceil(expansion_region_num_d);
+    expansion_region_num = (uint) ceil(expansion_region_num_d);
   } else {
     assert(expansion_region_num == 0, "sanity");
   }
@@ -2101,34 +1396,12 @@
                  (double) _young_list_target_length / (double) SurvivorRatio;
   // We use ceiling so that if max_survivor_regions_d is > 0.0 (but
   // smaller than 1.0) we'll get 1.
-  _max_survivor_regions = (size_t) ceil(max_survivor_regions_d);
+  _max_survivor_regions = (uint) ceil(max_survivor_regions_d);
 
   _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold(
         HeapRegion::GrainWords * _max_survivor_regions);
 }
 
-#ifndef PRODUCT
-class HRSortIndexIsOKClosure: public HeapRegionClosure {
-  CollectionSetChooser* _chooser;
-public:
-  HRSortIndexIsOKClosure(CollectionSetChooser* chooser) :
-    _chooser(chooser) {}
-
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->continuesHumongous()) {
-      assert(_chooser->regionProperlyOrdered(r), "Ought to be.");
-    }
-    return false;
-  }
-};
-
-bool G1CollectorPolicy::assertMarkedBytesDataOK() {
-  HRSortIndexIsOKClosure cl(_collectionSetChooser);
-  _g1->heap_region_iterate(&cl);
-  return true;
-}
-#endif
-
 bool G1CollectorPolicy::force_initial_mark_if_outside_cycle(
                                                      GCCause::Cause gc_cause) {
   bool during_cycle = _g1->concurrent_mark()->cmThread()->during_cycle();
@@ -2226,8 +1499,8 @@
       // We will skip any region that's currently used as an old GC
       // alloc region (we should not consider those for collection
       // before we fill them up).
-      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
-        _hrSorted->addMarkedHeapRegion(r);
+      if (_hrSorted->should_add(r) && !_g1h->is_old_gc_alloc_region(r)) {
+        _hrSorted->add_region(r);
       }
     }
     return false;
@@ -2236,130 +1509,75 @@
 
 class ParKnownGarbageHRClosure: public HeapRegionClosure {
   G1CollectedHeap* _g1h;
-  CollectionSetChooser* _hrSorted;
-  jint _marked_regions_added;
-  size_t _reclaimable_bytes_added;
-  jint _chunk_size;
-  jint _cur_chunk_idx;
-  jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end)
-  int _worker;
-  int _invokes;
-
-  void get_new_chunk() {
-    _cur_chunk_idx = _hrSorted->getParMarkedHeapRegionChunk(_chunk_size);
-    _cur_chunk_end = _cur_chunk_idx + _chunk_size;
-  }
-  void add_region(HeapRegion* r) {
-    if (_cur_chunk_idx == _cur_chunk_end) {
-      get_new_chunk();
-    }
-    assert(_cur_chunk_idx < _cur_chunk_end, "postcondition");
-    _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r);
-    _marked_regions_added++;
-    _reclaimable_bytes_added += r->reclaimable_bytes();
-    _cur_chunk_idx++;
-  }
+  CSetChooserParUpdater _cset_updater;
 
 public:
   ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted,
-                           jint chunk_size,
-                           int worker) :
-      _g1h(G1CollectedHeap::heap()),
-      _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
-      _marked_regions_added(0), _reclaimable_bytes_added(0),
-      _cur_chunk_idx(0), _cur_chunk_end(0), _invokes(0) { }
+                           uint chunk_size) :
+    _g1h(G1CollectedHeap::heap()),
+    _cset_updater(hrSorted, true /* parallel */, chunk_size) { }
 
   bool doHeapRegion(HeapRegion* r) {
-    // We only include humongous regions in collection
-    // sets when concurrent mark shows that their contained object is
-    // unreachable.
-    _invokes++;
-
     // Do we have any marking information for this region?
     if (r->is_marked()) {
       // We will skip any region that's currently used as an old GC
       // alloc region (we should not consider those for collection
       // before we fill them up).
-      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
-        add_region(r);
+      if (_cset_updater.should_add(r) && !_g1h->is_old_gc_alloc_region(r)) {
+        _cset_updater.add_region(r);
       }
     }
     return false;
   }
-  jint marked_regions_added() { return _marked_regions_added; }
-  size_t reclaimable_bytes_added() { return _reclaimable_bytes_added; }
-  int invokes() { return _invokes; }
 };
 
 class ParKnownGarbageTask: public AbstractGangTask {
   CollectionSetChooser* _hrSorted;
-  jint _chunk_size;
+  uint _chunk_size;
   G1CollectedHeap* _g1;
 public:
-  ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) :
+  ParKnownGarbageTask(CollectionSetChooser* hrSorted, uint chunk_size) :
     AbstractGangTask("ParKnownGarbageTask"),
     _hrSorted(hrSorted), _chunk_size(chunk_size),
     _g1(G1CollectedHeap::heap()) { }
 
   void work(uint worker_id) {
-    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted,
-                                               _chunk_size,
-                                               worker_id);
+    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size);
+
     // Back to zero for the claim value.
     _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, worker_id,
                                          _g1->workers()->active_workers(),
                                          HeapRegion::InitialClaimValue);
-    jint regions_added = parKnownGarbageCl.marked_regions_added();
-    size_t reclaimable_bytes_added =
-                                   parKnownGarbageCl.reclaimable_bytes_added();
-    _hrSorted->updateTotals(regions_added, reclaimable_bytes_added);
-    if (G1PrintParCleanupStats) {
-      gclog_or_tty->print_cr("     Thread %d called %d times, added %d regions to list.",
-                 worker_id, parKnownGarbageCl.invokes(), regions_added);
-    }
   }
 };
 
 void
 G1CollectorPolicy::record_concurrent_mark_cleanup_end(int no_of_gc_threads) {
-  double start_sec;
-  if (G1PrintParCleanupStats) {
-    start_sec = os::elapsedTime();
-  }
+  _collectionSetChooser->clear();
 
-  _collectionSetChooser->clearMarkedHeapRegions();
-  double clear_marked_end_sec;
-  if (G1PrintParCleanupStats) {
-    clear_marked_end_sec = os::elapsedTime();
-    gclog_or_tty->print_cr("  clear marked regions: %8.3f ms.",
-                           (clear_marked_end_sec - start_sec) * 1000.0);
-  }
-
+  uint region_num = _g1->n_regions();
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    const size_t OverpartitionFactor = 4;
-    size_t WorkUnit;
+    const uint OverpartitionFactor = 4;
+    uint WorkUnit;
     // The use of MinChunkSize = 8 in the original code
     // causes some assertion failures when the total number of
     // region is less than 8.  The code here tries to fix that.
     // Should the original code also be fixed?
     if (no_of_gc_threads > 0) {
-      const size_t MinWorkUnit =
-        MAX2(_g1->n_regions() / no_of_gc_threads, (size_t) 1U);
-      WorkUnit =
-        MAX2(_g1->n_regions() / (no_of_gc_threads * OverpartitionFactor),
-             MinWorkUnit);
+      const uint MinWorkUnit = MAX2(region_num / no_of_gc_threads, 1U);
+      WorkUnit = MAX2(region_num / (no_of_gc_threads * OverpartitionFactor),
+                      MinWorkUnit);
     } else {
       assert(no_of_gc_threads > 0,
         "The active gc workers should be greater than 0");
       // In a product build do something reasonable to avoid a crash.
-      const size_t MinWorkUnit =
-        MAX2(_g1->n_regions() / ParallelGCThreads, (size_t) 1U);
+      const uint MinWorkUnit = MAX2(region_num / (uint) ParallelGCThreads, 1U);
       WorkUnit =
-        MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
+        MAX2(region_num / (uint) (ParallelGCThreads * OverpartitionFactor),
              MinWorkUnit);
     }
-    _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(),
-                                                             WorkUnit);
+    _collectionSetChooser->prepare_for_par_region_addition(_g1->n_regions(),
+                                                           WorkUnit);
     ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser,
                                             (int) WorkUnit);
     _g1->workers()->run_task(&parKnownGarbageTask);
@@ -2370,20 +1588,10 @@
     KnownGarbageClosure knownGarbagecl(_collectionSetChooser);
     _g1->heap_region_iterate(&knownGarbagecl);
   }
-  double known_garbage_end_sec;
-  if (G1PrintParCleanupStats) {
-    known_garbage_end_sec = os::elapsedTime();
-    gclog_or_tty->print_cr("  compute known garbage: %8.3f ms.",
-                      (known_garbage_end_sec - clear_marked_end_sec) * 1000.0);
-  }
 
-  _collectionSetChooser->sortMarkedHeapRegions();
+  _collectionSetChooser->sort_regions();
+
   double end_sec = os::elapsedTime();
-  if (G1PrintParCleanupStats) {
-    gclog_or_tty->print_cr("  sorting: %8.3f ms.",
-                           (end_sec - known_garbage_end_sec) * 1000.0);
-  }
-
   double elapsed_time_ms = (end_sec - _mark_cleanup_start_sec) * 1000.0;
   _concurrent_mark_cleanup_times_ms->add(elapsed_time_ms);
   _cur_mark_stop_world_time_ms += elapsed_time_ms;
@@ -2469,7 +1677,7 @@
   // retiring the current allocation region) or a concurrent
   // refine thread (RSet sampling).
 
-  double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
   size_t used_bytes = hr->used();
   _inc_cset_recorded_rs_lengths += rs_length;
   _inc_cset_predicted_elapsed_time_ms += region_elapsed_time_ms;
@@ -2504,7 +1712,7 @@
   _inc_cset_recorded_rs_lengths_diffs += rs_lengths_diff;
 
   double old_elapsed_time_ms = hr->predicted_elapsed_time_ms();
-  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
   double elapsed_ms_diff = new_region_elapsed_time_ms - old_elapsed_time_ms;
   _inc_cset_predicted_elapsed_time_ms_diffs += elapsed_ms_diff;
 
@@ -2581,16 +1789,10 @@
   while (csr != NULL) {
     HeapRegion* next = csr->next_in_collection_set();
     assert(csr->in_collection_set(), "bad CS");
-    st->print_cr("  [%08x-%08x], t: %08x, P: %08x, N: %08x, C: %08x, "
-                 "age: %4d, y: %d, surv: %d",
-                        csr->bottom(), csr->end(),
-                        csr->top(),
-                        csr->prev_top_at_mark_start(),
-                        csr->next_top_at_mark_start(),
-                        csr->top_at_conc_mark_count(),
-                        csr->age_in_surv_rate_group_cond(),
-                        csr->is_young(),
-                        csr->is_survivor());
+    st->print_cr("  "HR_FORMAT", P: "PTR_FORMAT "N: "PTR_FORMAT", age: %4d",
+                 HR_FORMAT_PARAMS(csr),
+                 csr->prev_top_at_mark_start(), csr->next_top_at_mark_start(),
+                 csr->age_in_surv_rate_group_cond());
     csr = next;
   }
 }
@@ -2599,16 +1801,16 @@
 bool G1CollectorPolicy::next_gc_should_be_mixed(const char* true_action_str,
                                                 const char* false_action_str) {
   CollectionSetChooser* cset_chooser = _collectionSetChooser;
-  if (cset_chooser->isEmpty()) {
+  if (cset_chooser->is_empty()) {
     ergo_verbose0(ErgoMixedGCs,
                   false_action_str,
                   ergo_format_reason("candidate old regions not available"));
     return false;
   }
-  size_t reclaimable_bytes = cset_chooser->remainingReclaimableBytes();
+  size_t reclaimable_bytes = cset_chooser->remaining_reclaimable_bytes();
   size_t capacity_bytes = _g1->capacity();
   double perc = (double) reclaimable_bytes * 100.0 / (double) capacity_bytes;
-  double threshold = (double) G1OldReclaimableThresholdPercent;
+  double threshold = (double) G1HeapWastePercent;
   if (perc < threshold) {
     ergo_verbose4(ErgoMixedGCs,
               false_action_str,
@@ -2616,7 +1818,7 @@
               ergo_format_region("candidate old regions")
               ergo_format_byte_perc("reclaimable")
               ergo_format_perc("threshold"),
-              cset_chooser->remainingRegions(),
+              cset_chooser->remaining_regions(),
               reclaimable_bytes, perc, threshold);
     return false;
   }
@@ -2627,14 +1829,13 @@
                 ergo_format_region("candidate old regions")
                 ergo_format_byte_perc("reclaimable")
                 ergo_format_perc("threshold"),
-                cset_chooser->remainingRegions(),
+                cset_chooser->remaining_regions(),
                 reclaimable_bytes, perc, threshold);
   return true;
 }
 
 void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
-  // Set this here - in case we're not doing young collections.
-  double non_young_start_time_sec = os::elapsedTime();
+  double young_start_time_sec = os::elapsedTime();
 
   YoungList* young_list = _g1->young_list();
   finalize_incremental_cset_building();
@@ -2648,33 +1849,31 @@
   double predicted_pause_time_ms = base_time_ms;
   double time_remaining_ms = target_pause_time_ms - base_time_ms;
 
-  ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
+  ergo_verbose4(ErgoCSetConstruction | ErgoHigh,
                 "start choosing CSet",
+                ergo_format_size("_pending_cards")
                 ergo_format_ms("predicted base time")
                 ergo_format_ms("remaining time")
                 ergo_format_ms("target pause time"),
-                base_time_ms, time_remaining_ms, target_pause_time_ms);
+                _pending_cards, base_time_ms, time_remaining_ms, target_pause_time_ms);
 
-  HeapRegion* hr;
-  double young_start_time_sec = os::elapsedTime();
-
-  _collection_set_bytes_used_before = 0;
   _last_gc_was_young = gcs_are_young() ? true : false;
 
   if (_last_gc_was_young) {
-    ++_young_pause_num;
+    _trace_gen0_time_data.increment_young_collection_count();
   } else {
-    ++_mixed_pause_num;
+    _trace_gen0_time_data.increment_mixed_collection_count();
   }
 
   // The young list is laid with the survivor regions from the previous
   // pause are appended to the RHS of the young list, i.e.
   //   [Newly Young Regions ++ Survivors from last pause].
 
-  size_t survivor_region_length = young_list->survivor_length();
-  size_t eden_region_length = young_list->length() - survivor_region_length;
+  uint survivor_region_length = young_list->survivor_length();
+  uint eden_region_length = young_list->length() - survivor_region_length;
   init_cset_region_lengths(eden_region_length, survivor_region_length);
-  hr = young_list->first_survivor_region();
+
+  HeapRegion* hr = young_list->first_survivor_region();
   while (hr != NULL) {
     assert(hr->is_survivor(), "badly formed young list");
     hr->set_young();
@@ -2702,20 +1901,20 @@
   set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths);
 
   double young_end_time_sec = os::elapsedTime();
-  _recorded_young_cset_choice_time_ms =
-    (young_end_time_sec - young_start_time_sec) * 1000.0;
+  phase_times()->record_young_cset_choice_time_ms((young_end_time_sec - young_start_time_sec) * 1000.0);
 
-  // We are doing young collections so reset this.
-  non_young_start_time_sec = young_end_time_sec;
+  // Set the start of the non-young choice time.
+  double non_young_start_time_sec = young_end_time_sec;
 
   if (!gcs_are_young()) {
     CollectionSetChooser* cset_chooser = _collectionSetChooser;
-    assert(cset_chooser->verify(), "CSet Chooser verification - pre");
-    const size_t min_old_cset_length = cset_chooser->calcMinOldCSetLength();
-    const size_t max_old_cset_length = cset_chooser->calcMaxOldCSetLength();
+    cset_chooser->verify();
+    const uint min_old_cset_length = cset_chooser->calc_min_old_cset_length();
+    const uint max_old_cset_length = cset_chooser->calc_max_old_cset_length();
 
-    size_t expensive_region_num = 0;
+    uint expensive_region_num = 0;
     bool check_time_remaining = adaptive_young_list_length();
+
     HeapRegion* hr = cset_chooser->peek();
     while (hr != NULL) {
       if (old_cset_region_length() >= max_old_cset_length) {
@@ -2729,7 +1928,7 @@
         break;
       }
 
-      double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
       if (check_time_remaining) {
         if (predicted_time_ms > time_remaining_ms) {
           // Too expensive for the current CSet.
@@ -2799,13 +1998,11 @@
                     time_remaining_ms);
     }
 
-    assert(cset_chooser->verify(), "CSet Chooser verification - post");
+    cset_chooser->verify();
   }
 
   stop_incremental_cset_building();
 
-  count_CS_bytes_used();
-
   ergo_verbose5(ErgoCSetConstruction,
                 "finish choosing CSet",
                 ergo_format_region("eden")
@@ -2818,6 +2015,129 @@
                 predicted_pause_time_ms, target_pause_time_ms);
 
   double non_young_end_time_sec = os::elapsedTime();
-  _recorded_non_young_cset_choice_time_ms =
-    (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
+  phase_times()->record_non_young_cset_choice_time_ms((non_young_end_time_sec - non_young_start_time_sec) * 1000.0);
+}
+
+void TraceGen0TimeData::record_start_collection(double time_to_stop_the_world_ms) {
+  if(TraceGen0Time) {
+    _all_stop_world_times_ms.add(time_to_stop_the_world_ms);
+  }
+}
+
+void TraceGen0TimeData::record_yield_time(double yield_time_ms) {
+  if(TraceGen0Time) {
+    _all_yield_times_ms.add(yield_time_ms);
+  }
+}
+
+void TraceGen0TimeData::record_end_collection(double pause_time_ms, G1GCPhaseTimes* phase_times) {
+  if(TraceGen0Time) {
+    _total.add(pause_time_ms);
+    _other.add(pause_time_ms - phase_times->accounted_time_ms());
+    _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms());
+    _parallel.add(phase_times->cur_collection_par_time_ms());
+    _ext_root_scan.add(phase_times->average_last_ext_root_scan_time());
+    _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms());
+    _update_rs.add(phase_times->average_last_update_rs_time());
+    _scan_rs.add(phase_times->average_last_scan_rs_time());
+    _obj_copy.add(phase_times->average_last_obj_copy_time());
+    _termination.add(phase_times->average_last_termination_time());
+
+    double parallel_known_time = phase_times->average_last_ext_root_scan_time() +
+      phase_times->average_last_satb_filtering_times_ms() +
+      phase_times->average_last_update_rs_time() +
+      phase_times->average_last_scan_rs_time() +
+      phase_times->average_last_obj_copy_time() +
+      + phase_times->average_last_termination_time();
+
+    double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time;
+    _parallel_other.add(parallel_other_time);
+    _clear_ct.add(phase_times->cur_clear_ct_time_ms());
+  }
+}
+
+void TraceGen0TimeData::increment_young_collection_count() {
+  if(TraceGen0Time) {
+    ++_young_pause_num;
+  }
+}
+
+void TraceGen0TimeData::increment_mixed_collection_count() {
+  if(TraceGen0Time) {
+    ++_mixed_pause_num;
+  }
+}
+
+void TraceGen0TimeData::print_summary(const char* str,
+                                      const NumberSeq* seq) const {
+  double sum = seq->sum();
+  gclog_or_tty->print_cr("%-27s = %8.2lf s (avg = %8.2lf ms)",
+                str, sum / 1000.0, seq->avg());
 }
+
+void TraceGen0TimeData::print_summary_sd(const char* str,
+                                         const NumberSeq* seq) const {
+  print_summary(str, seq);
+  gclog_or_tty->print_cr("%+45s = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
+                "(num", seq->num(), seq->sd(), seq->maximum());
+}
+
+void TraceGen0TimeData::print() const {
+  if (!TraceGen0Time) {
+    return;
+  }
+
+  gclog_or_tty->print_cr("ALL PAUSES");
+  print_summary_sd("   Total", &_total);
+  gclog_or_tty->print_cr("");
+  gclog_or_tty->print_cr("");
+  gclog_or_tty->print_cr("   Young GC Pauses: %8d", _young_pause_num);
+  gclog_or_tty->print_cr("   Mixed GC Pauses: %8d", _mixed_pause_num);
+  gclog_or_tty->print_cr("");
+
+  gclog_or_tty->print_cr("EVACUATION PAUSES");
+
+  if (_young_pause_num == 0 && _mixed_pause_num == 0) {
+    gclog_or_tty->print_cr("none");
+  } else {
+    print_summary_sd("   Evacuation Pauses", &_total);
+    print_summary("      Root Region Scan Wait", &_root_region_scan_wait);
+    print_summary("      Parallel Time", &_parallel);
+    print_summary("         Ext Root Scanning", &_ext_root_scan);
+    print_summary("         SATB Filtering", &_satb_filtering);
+    print_summary("         Update RS", &_update_rs);
+    print_summary("         Scan RS", &_scan_rs);
+    print_summary("         Object Copy", &_obj_copy);
+    print_summary("         Termination", &_termination);
+    print_summary("         Parallel Other", &_parallel_other);
+    print_summary("      Clear CT", &_clear_ct);
+    print_summary("      Other", &_other);
+  }
+  gclog_or_tty->print_cr("");
+
+  gclog_or_tty->print_cr("MISC");
+  print_summary_sd("   Stop World", &_all_stop_world_times_ms);
+  print_summary_sd("   Yields", &_all_yield_times_ms);
+}
+
+void TraceGen1TimeData::record_full_collection(double full_gc_time_ms) {
+  if (TraceGen1Time) {
+    _all_full_gc_times.add(full_gc_time_ms);
+  }
+}
+
+void TraceGen1TimeData::print() const {
+  if (!TraceGen1Time) {
+    return;
+  }
+
+  if (_all_full_gc_times.num() > 0) {
+    gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s",
+      _all_full_gc_times.num(),
+      _all_full_gc_times.sum() / 1000.0);
+    gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times.avg());
+    gclog_or_tty->print_cr("                     [std. dev = %8.2f ms, max = %8.2f ms]",
+      _all_full_gc_times.sd(),
+      _all_full_gc_times.maximum());
+  }
+}
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,52 +36,52 @@
 
 class HeapRegion;
 class CollectionSetChooser;
+class G1GCPhaseTimes;
 
-// Yes, this is a bit unpleasant... but it saves replicating the same thing
-// over and over again and introducing subtle problems through small typos and
-// cutting and pasting mistakes. The macros below introduces a number
-// sequnce into the following two classes and the methods that access it.
+// TraceGen0Time collects data on _both_ young and mixed evacuation pauses
+// (the latter may contain non-young regions - i.e. regions that are
+// technically in Gen1) while TraceGen1Time collects data about full GCs.
+class TraceGen0TimeData : public CHeapObj<mtGC> {
+ private:
+  unsigned  _young_pause_num;
+  unsigned  _mixed_pause_num;
+
+  NumberSeq _all_stop_world_times_ms;
+  NumberSeq _all_yield_times_ms;
 
-#define define_num_seq(name)                                                  \
-private:                                                                      \
-  NumberSeq _all_##name##_times_ms;                                           \
-public:                                                                       \
-  void record_##name##_time_ms(double ms) {                                   \
-    _all_##name##_times_ms.add(ms);                                           \
-  }                                                                           \
-  NumberSeq* get_##name##_seq() {                                             \
-    return &_all_##name##_times_ms;                                           \
-  }
+  NumberSeq _total;
+  NumberSeq _other;
+  NumberSeq _root_region_scan_wait;
+  NumberSeq _parallel;
+  NumberSeq _ext_root_scan;
+  NumberSeq _satb_filtering;
+  NumberSeq _update_rs;
+  NumberSeq _scan_rs;
+  NumberSeq _obj_copy;
+  NumberSeq _termination;
+  NumberSeq _parallel_other;
+  NumberSeq _clear_ct;
 
-class MainBodySummary;
-
-class PauseSummary: public CHeapObj {
-  define_num_seq(total)
-    define_num_seq(other)
+  void print_summary(const char* str, const NumberSeq* seq) const;
+  void print_summary_sd(const char* str, const NumberSeq* seq) const;
 
 public:
-  virtual MainBodySummary*    main_body_summary()    { return NULL; }
+   TraceGen0TimeData() : _young_pause_num(0), _mixed_pause_num(0) {};
+  void record_start_collection(double time_to_stop_the_world_ms);
+  void record_yield_time(double yield_time_ms);
+  void record_end_collection(double pause_time_ms, G1GCPhaseTimes* phase_times);
+  void increment_young_collection_count();
+  void increment_mixed_collection_count();
+  void print() const;
 };
 
-class MainBodySummary: public CHeapObj {
-  define_num_seq(satb_drain) // optional
-  define_num_seq(root_region_scan_wait)
-  define_num_seq(parallel) // parallel only
-    define_num_seq(ext_root_scan)
-    define_num_seq(satb_filtering)
-    define_num_seq(update_rs)
-    define_num_seq(scan_rs)
-    define_num_seq(obj_copy)
-    define_num_seq(termination) // parallel only
-    define_num_seq(parallel_other) // parallel only
-  define_num_seq(mark_closure)
-  define_num_seq(clear_ct)
-};
+class TraceGen1TimeData : public CHeapObj<mtGC> {
+ private:
+  NumberSeq _all_full_gc_times;
 
-class Summary: public PauseSummary,
-               public MainBodySummary {
-public:
-  virtual MainBodySummary*    main_body_summary()    { return this; }
+ public:
+  void record_full_collection(double full_gc_time_ms);
+  void print() const;
 };
 
 // There are three command line options related to the young gen size:
@@ -120,7 +120,7 @@
 //
 // NewSize and MaxNewSize override NewRatio. So, NewRatio is ignored if it is
 // combined with either NewSize or MaxNewSize. (A warning message is printed.)
-class G1YoungGenSizer : public CHeapObj {
+class G1YoungGenSizer : public CHeapObj<mtGC> {
 private:
   enum SizerKind {
     SizerDefaults,
@@ -130,19 +130,19 @@
     SizerNewRatio
   };
   SizerKind _sizer_kind;
-  size_t _min_desired_young_length;
-  size_t _max_desired_young_length;
+  uint _min_desired_young_length;
+  uint _max_desired_young_length;
   bool _adaptive_size;
-  size_t calculate_default_min_length(size_t new_number_of_heap_regions);
-  size_t calculate_default_max_length(size_t new_number_of_heap_regions);
+  uint calculate_default_min_length(uint new_number_of_heap_regions);
+  uint calculate_default_max_length(uint new_number_of_heap_regions);
 
 public:
   G1YoungGenSizer();
-  void heap_size_changed(size_t new_number_of_heap_regions);
-  size_t min_desired_young_length() {
+  void heap_size_changed(uint new_number_of_heap_regions);
+  uint min_desired_young_length() {
     return _min_desired_young_length;
   }
-  size_t max_desired_young_length() {
+  uint max_desired_young_length() {
     return _max_desired_young_length;
   }
   bool adaptive_young_list_length() {
@@ -175,23 +175,9 @@
 
   CollectionSetChooser* _collectionSetChooser;
 
-  double _cur_collection_start_sec;
+  double _full_collection_start_sec;
   size_t _cur_collection_pause_used_at_start_bytes;
-  size_t _cur_collection_pause_used_regions_at_start;
-  double _cur_collection_par_time_ms;
-  double _cur_satb_drain_time_ms;
-  double _cur_clear_ct_time_ms;
-  double _cur_ref_proc_time_ms;
-  double _cur_ref_enq_time_ms;
-
-#ifndef PRODUCT
-  // Card Table Count Cache stats
-  double _min_clear_cc_time_ms;         // min
-  double _max_clear_cc_time_ms;         // max
-  double _cur_clear_cc_time_ms;         // clearing time during current pause
-  double _cum_clear_cc_time_ms;         // cummulative clearing time
-  jlong  _num_cc_clears;                // number of times the card count cache has been cleared
-#endif
+  uint   _cur_collection_pause_used_regions_at_start;
 
   // These exclude marking times.
   TruncatedSeq* _recent_gc_times_ms;
@@ -199,53 +185,24 @@
   TruncatedSeq* _concurrent_mark_remark_times_ms;
   TruncatedSeq* _concurrent_mark_cleanup_times_ms;
 
-  Summary*           _summary;
-
-  NumberSeq* _all_pause_times_ms;
-  NumberSeq* _all_full_gc_times_ms;
-  double _stop_world_start;
-  NumberSeq* _all_stop_world_times_ms;
-  NumberSeq* _all_yield_times_ms;
-
-  int        _aux_num;
-  NumberSeq* _all_aux_times_ms;
-  double*    _cur_aux_start_times_ms;
-  double*    _cur_aux_times_ms;
-  bool*      _cur_aux_times_set;
+  TraceGen0TimeData _trace_gen0_time_data;
+  TraceGen1TimeData _trace_gen1_time_data;
 
-  double* _par_last_gc_worker_start_times_ms;
-  double* _par_last_ext_root_scan_times_ms;
-  double* _par_last_satb_filtering_times_ms;
-  double* _par_last_update_rs_times_ms;
-  double* _par_last_update_rs_processed_buffers;
-  double* _par_last_scan_rs_times_ms;
-  double* _par_last_obj_copy_times_ms;
-  double* _par_last_termination_times_ms;
-  double* _par_last_termination_attempts;
-  double* _par_last_gc_worker_end_times_ms;
-  double* _par_last_gc_worker_times_ms;
-
-  // Each workers 'other' time i.e. the elapsed time of the parallel
-  // phase of the pause minus the sum of the individual sub-phase
-  // times for a given worker thread.
-  double* _par_last_gc_worker_other_times_ms;
+  double _stop_world_start;
 
   // indicates whether we are in young or mixed GC mode
   bool _gcs_are_young;
 
-  size_t _young_list_target_length;
-  size_t _young_list_fixed_length;
+  uint _young_list_target_length;
+  uint _young_list_fixed_length;
   size_t _prev_eden_capacity; // used for logging
 
   // The max number of regions we can extend the eden by while the GC
   // locker is active. This should be >= _young_list_target_length;
-  size_t _young_list_max_length;
+  uint _young_list_max_length;
 
   bool                  _last_gc_was_young;
 
-  unsigned              _young_pause_num;
-  unsigned              _mixed_pause_num;
-
   bool                  _during_marking;
   bool                  _in_marking_window;
   bool                  _in_marking_window_im;
@@ -257,7 +214,7 @@
   double                _gc_overhead_perc;
 
   double _reserve_factor;
-  size_t _reserve_regions;
+  uint _reserve_regions;
 
   bool during_marking() {
     return _during_marking;
@@ -271,7 +228,6 @@
   TruncatedSeq* _alloc_rate_ms_seq;
   double        _prev_collection_pause_end_ms;
 
-  TruncatedSeq* _pending_card_diff_seq;
   TruncatedSeq* _rs_length_diff_seq;
   TruncatedSeq* _cost_per_card_ms_seq;
   TruncatedSeq* _young_cards_per_entry_ratio_seq;
@@ -288,36 +244,27 @@
 
   TruncatedSeq* _cost_per_byte_ms_during_cm_seq;
 
-  TruncatedSeq* _young_gc_eff_seq;
-
   G1YoungGenSizer* _young_gen_sizer;
 
-  size_t _eden_cset_region_length;
-  size_t _survivor_cset_region_length;
-  size_t _old_cset_region_length;
+  uint _eden_cset_region_length;
+  uint _survivor_cset_region_length;
+  uint _old_cset_region_length;
 
-  void init_cset_region_lengths(size_t eden_cset_region_length,
-                                size_t survivor_cset_region_length);
+  void init_cset_region_lengths(uint eden_cset_region_length,
+                                uint survivor_cset_region_length);
 
-  size_t eden_cset_region_length()     { return _eden_cset_region_length;     }
-  size_t survivor_cset_region_length() { return _survivor_cset_region_length; }
-  size_t old_cset_region_length()      { return _old_cset_region_length;      }
+  uint eden_cset_region_length()     { return _eden_cset_region_length;     }
+  uint survivor_cset_region_length() { return _survivor_cset_region_length; }
+  uint old_cset_region_length()      { return _old_cset_region_length;      }
 
-  size_t _free_regions_at_end_of_collection;
+  uint _free_regions_at_end_of_collection;
 
   size_t _recorded_rs_lengths;
   size_t _max_rs_lengths;
-
-  double _recorded_young_free_cset_time_ms;
-  double _recorded_non_young_free_cset_time_ms;
-
   double _sigma;
 
   size_t _rs_lengths_prediction;
 
-  size_t _known_garbage_bytes;
-  double _known_garbage_ratio;
-
   double sigma() { return _sigma; }
 
   // A function that prevents us putting too much stock in small sample
@@ -345,10 +292,8 @@
   void set_no_of_gc_threads(uintx v) { _no_of_gc_threads = v; }
 
   double _pause_time_target_ms;
-  double _recorded_young_cset_choice_time_ms;
-  double _recorded_non_young_cset_choice_time_ms;
+
   size_t _pending_cards;
-  size_t _max_pending_cards;
 
 public:
   // Accessors
@@ -378,28 +323,6 @@
     _max_rs_lengths = rs_lengths;
   }
 
-  size_t predict_pending_card_diff() {
-    double prediction = get_new_neg_prediction(_pending_card_diff_seq);
-    if (prediction < 0.00001) {
-      return 0;
-    } else {
-      return (size_t) prediction;
-    }
-  }
-
-  size_t predict_pending_cards() {
-    size_t max_pending_card_num = _g1->max_pending_card_num();
-    size_t diff = predict_pending_card_diff();
-    size_t prediction;
-    if (diff > max_pending_card_num) {
-      prediction = max_pending_card_num;
-    } else {
-      prediction = max_pending_card_num - diff;
-    }
-
-    return prediction;
-  }
-
   size_t predict_rs_length_diff() {
     return (size_t) get_new_prediction(_rs_length_diff_seq);
   }
@@ -488,31 +411,18 @@
            get_new_prediction(_non_young_other_cost_per_region_ms_seq);
   }
 
-  double predict_young_collection_elapsed_time_ms(size_t adjustment);
   double predict_base_elapsed_time_ms(size_t pending_cards);
   double predict_base_elapsed_time_ms(size_t pending_cards,
                                       size_t scanned_cards);
   size_t predict_bytes_to_copy(HeapRegion* hr);
-  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc);
 
   void set_recorded_rs_lengths(size_t rs_lengths);
 
-  size_t cset_region_length()       { return young_cset_region_length() +
-                                             old_cset_region_length(); }
-  size_t young_cset_region_length() { return eden_cset_region_length() +
-                                             survivor_cset_region_length(); }
-
-  void record_young_free_cset_time_ms(double time_ms) {
-    _recorded_young_free_cset_time_ms = time_ms;
-  }
-
-  void record_non_young_free_cset_time_ms(double time_ms) {
-    _recorded_non_young_free_cset_time_ms = time_ms;
-  }
-
-  double predict_young_gc_eff() {
-    return get_new_neg_prediction(_young_gc_eff_seq);
-  }
+  uint cset_region_length()       { return young_cset_region_length() +
+                                           old_cset_region_length(); }
+  uint young_cset_region_length() { return eden_cset_region_length() +
+                                           survivor_cset_region_length(); }
 
   double predict_survivor_regions_evac_time();
 
@@ -523,20 +433,6 @@
     // also call it on any more surv rate groups
   }
 
-  void set_known_garbage_bytes(size_t known_garbage_bytes) {
-    _known_garbage_bytes = known_garbage_bytes;
-    size_t heap_bytes = _g1->capacity();
-    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
-  }
-
-  void decrease_known_garbage_bytes(size_t known_garbage_bytes) {
-    guarantee( _known_garbage_bytes >= known_garbage_bytes, "invariant" );
-
-    _known_garbage_bytes -= known_garbage_bytes;
-    size_t heap_bytes = _g1->capacity();
-    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
-  }
-
   G1MMUTracker* mmu_tracker() {
     return _mmu_tracker;
   }
@@ -575,34 +471,6 @@
   }
 
 private:
-  void print_stats(int level, const char* str, double value);
-  void print_stats(int level, const char* str, int value);
-
-  void print_par_stats(int level, const char* str, double* data);
-  void print_par_sizes(int level, const char* str, double* data);
-
-  void check_other_times(int level,
-                         NumberSeq* other_times_ms,
-                         NumberSeq* calc_other_times_ms) const;
-
-  void print_summary (PauseSummary* stats) const;
-
-  void print_summary (int level, const char* str, NumberSeq* seq) const;
-  void print_summary_sd (int level, const char* str, NumberSeq* seq) const;
-
-  double avg_value (double* data);
-  double max_value (double* data);
-  double sum_of_values (double* data);
-  double max_sum (double* data1, double* data2);
-
-  double _last_pause_time_ms;
-
-  size_t _bytes_in_collection_set_before_gc;
-  size_t _bytes_copied_during_gc;
-
-  // Used to count used bytes in CS.
-  friend class CountCSClosure;
-
   // Statistics kept per GC stoppage, pause or full.
   TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec;
 
@@ -616,9 +484,13 @@
 
   // The number of bytes in the collection set before the pause. Set from
   // the incrementally built collection set at the start of an evacuation
-  // pause.
+  // pause, and incremented in finalize_cset() when adding old regions
+  // (if any) to the collection set.
   size_t _collection_set_bytes_used_before;
 
+  // The number of bytes copied during the GC.
+  size_t _bytes_copied_during_gc;
+
   // The associated information that is maintained while the incremental
   // collection set is being built with young regions. Used to populate
   // the recorded info for the evacuation pause.
@@ -670,6 +542,8 @@
   // Stash a pointer to the g1 heap.
   G1CollectedHeap* _g1;
 
+  G1GCPhaseTimes* _phase_times;
+
   // The ratio of gc time to elapsed time, computed over recent pauses.
   double _recent_avg_pause_time_ratio;
 
@@ -709,8 +583,6 @@
   double _cur_mark_stop_world_time_ms;
   double _mark_remark_start_sec;
   double _mark_cleanup_start_sec;
-  double _mark_closure_time_ms;
-  double _root_region_scan_wait_time_ms;
 
   // Update the young list target length either by setting it to the
   // desired fixed value or by calculating it using G1's pause
@@ -722,12 +594,12 @@
   // Calculate and return the minimum desired young list target
   // length. This is the minimum desired young list length according
   // to the user's inputs.
-  size_t calculate_young_list_desired_min_length(size_t base_min_length);
+  uint calculate_young_list_desired_min_length(uint base_min_length);
 
   // Calculate and return the maximum desired young list target
   // length. This is the maximum desired young list length according
   // to the user's inputs.
-  size_t calculate_young_list_desired_max_length();
+  uint calculate_young_list_desired_max_length();
 
   // Calculate and return the maximum young list target length that
   // can fit into the pause time goal. The parameters are: rs_lengths
@@ -735,21 +607,18 @@
   // be, base_min_length is the alreay existing number of regions in
   // the young list, min_length and max_length are the desired min and
   // max young list length according to the user's inputs.
-  size_t calculate_young_list_target_length(size_t rs_lengths,
-                                            size_t base_min_length,
-                                            size_t desired_min_length,
-                                            size_t desired_max_length);
+  uint calculate_young_list_target_length(size_t rs_lengths,
+                                          uint base_min_length,
+                                          uint desired_min_length,
+                                          uint desired_max_length);
 
   // Check whether a given young length (young_length) fits into the
   // given target pause time and whether the prediction for the amount
   // of objects to be copied for the given length will fit into the
   // given free space (expressed by base_free_regions).  It is used by
   // calculate_young_list_target_length().
-  bool predict_will_fit(size_t young_length, double base_time_ms,
-                        size_t base_free_regions, double target_pause_time_ms);
-
-  // Count the number of bytes used in the CS.
-  void count_CS_bytes_used();
+  bool predict_will_fit(uint young_length, double base_time_ms,
+                        uint base_free_regions, double target_pause_time_ms);
 
 public:
 
@@ -761,21 +630,15 @@
     return CollectorPolicy::G1CollectorPolicyKind;
   }
 
+  G1GCPhaseTimes* phase_times() const { return _phase_times; }
+
   // Check the current value of the young list RSet lengths and
   // compare it against the last prediction. If the current value is
   // higher, recalculate the young list target length prediction.
   void revise_young_list_target_length_if_necessary();
 
-  size_t bytes_in_collection_set() {
-    return _bytes_in_collection_set_before_gc;
-  }
-
-  unsigned calc_gc_alloc_time_stamp() {
-    return _all_pause_times_ms->num() + 1;
-  }
-
   // This should be called after the heap is resized.
-  void record_new_heap_size(size_t new_number_of_regions);
+  void record_new_heap_size(uint new_number_of_regions);
 
   void init();
 
@@ -809,14 +672,6 @@
   void record_concurrent_mark_init_end(double
                                            mark_init_elapsed_time_ms);
 
-  void record_mark_closure_time(double mark_closure_time_ms) {
-    _mark_closure_time_ms = mark_closure_time_ms;
-  }
-
-  void record_root_region_scan_wait_time(double time_ms) {
-    _root_region_scan_wait_time_ms = time_ms;
-  }
-
   void record_concurrent_mark_remark_start();
   void record_concurrent_mark_remark_end();
 
@@ -825,110 +680,15 @@
   void record_concurrent_mark_cleanup_completed();
 
   void record_concurrent_pause();
-  void record_concurrent_pause_end();
 
-  void record_collection_pause_end(int no_of_gc_threads);
+  void record_collection_pause_end(double pause_time);
   void print_heap_transition();
+  void print_detailed_heap_transition();
 
   // Record the fact that a full collection occurred.
   void record_full_collection_start();
   void record_full_collection_end();
 
-  void record_gc_worker_start_time(int worker_i, double ms) {
-    _par_last_gc_worker_start_times_ms[worker_i] = ms;
-  }
-
-  void record_ext_root_scan_time(int worker_i, double ms) {
-    _par_last_ext_root_scan_times_ms[worker_i] = ms;
-  }
-
-  void record_satb_filtering_time(int worker_i, double ms) {
-    _par_last_satb_filtering_times_ms[worker_i] = ms;
-  }
-
-  void record_satb_drain_time(double ms) {
-    assert(_g1->mark_in_progress(), "shouldn't be here otherwise");
-    _cur_satb_drain_time_ms = ms;
-  }
-
-  void record_update_rs_time(int thread, double ms) {
-    _par_last_update_rs_times_ms[thread] = ms;
-  }
-
-  void record_update_rs_processed_buffers (int thread,
-                                           double processed_buffers) {
-    _par_last_update_rs_processed_buffers[thread] = processed_buffers;
-  }
-
-  void record_scan_rs_time(int thread, double ms) {
-    _par_last_scan_rs_times_ms[thread] = ms;
-  }
-
-  void reset_obj_copy_time(int thread) {
-    _par_last_obj_copy_times_ms[thread] = 0.0;
-  }
-
-  void reset_obj_copy_time() {
-    reset_obj_copy_time(0);
-  }
-
-  void record_obj_copy_time(int thread, double ms) {
-    _par_last_obj_copy_times_ms[thread] += ms;
-  }
-
-  void record_termination(int thread, double ms, size_t attempts) {
-    _par_last_termination_times_ms[thread] = ms;
-    _par_last_termination_attempts[thread] = (double) attempts;
-  }
-
-  void record_gc_worker_end_time(int worker_i, double ms) {
-    _par_last_gc_worker_end_times_ms[worker_i] = ms;
-  }
-
-  void record_pause_time_ms(double ms) {
-    _last_pause_time_ms = ms;
-  }
-
-  void record_clear_ct_time(double ms) {
-    _cur_clear_ct_time_ms = ms;
-  }
-
-  void record_par_time(double ms) {
-    _cur_collection_par_time_ms = ms;
-  }
-
-  void record_aux_start_time(int i) {
-    guarantee(i < _aux_num, "should be within range");
-    _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;
-  }
-
-  void record_aux_end_time(int i) {
-    guarantee(i < _aux_num, "should be within range");
-    double ms = os::elapsedTime() * 1000.0 - _cur_aux_start_times_ms[i];
-    _cur_aux_times_set[i] = true;
-    _cur_aux_times_ms[i] += ms;
-  }
-
-  void record_ref_proc_time(double ms) {
-    _cur_ref_proc_time_ms = ms;
-  }
-
-  void record_ref_enq_time(double ms) {
-    _cur_ref_enq_time_ms = ms;
-  }
-
-#ifndef PRODUCT
-  void record_cc_clear_time(double ms) {
-    if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms)
-      _min_clear_cc_time_ms = ms;
-    if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms)
-      _max_clear_cc_time_ms = ms;
-    _cur_clear_cc_time_ms = ms;
-    _cum_clear_cc_time_ms += ms;
-    _num_cc_clears++;
-  }
-#endif
-
   // Record how much space we copied during a GC. This is typically
   // called when a GC alloc region is being retired.
   void record_bytes_copied_during_gc(size_t bytes) {
@@ -940,10 +700,9 @@
     return _bytes_copied_during_gc;
   }
 
-  // Determine whether the next GC should be mixed. Called to determine
-  // whether to start mixed GCs or whether to carry on doing mixed
-  // GCs. The two action strings are used in the ergo output when the
-  // method returns true or false.
+  // Determine whether there are candidate regions so that the
+  // next GC should be mixed. The two action strings are used
+  // in the ergo output when the method returns true or false.
   bool next_gc_should_be_mixed(const char* true_action_str,
                                const char* false_action_str);
 
@@ -1034,12 +793,6 @@
   // exceeded the desired limit, return an amount to expand by.
   size_t expansion_amount();
 
-#ifndef PRODUCT
-  // Check any appropriate marked bytes info, asserting false if
-  // something's wrong, else returning "true".
-  bool assertMarkedBytesDataOK();
-#endif
-
   // Print tracing information.
   void print_tracing_info() const;
 
@@ -1056,18 +809,18 @@
   }
 
   bool is_young_list_full() {
-    size_t young_list_length = _g1->young_list()->length();
-    size_t young_list_target_length = _young_list_target_length;
+    uint young_list_length = _g1->young_list()->length();
+    uint young_list_target_length = _young_list_target_length;
     return young_list_length >= young_list_target_length;
   }
 
   bool can_expand_young_list() {
-    size_t young_list_length = _g1->young_list()->length();
-    size_t young_list_max_length = _young_list_max_length;
+    uint young_list_length = _g1->young_list()->length();
+    uint young_list_max_length = _young_list_max_length;
     return young_list_length < young_list_max_length;
   }
 
-  size_t young_list_max_length() {
+  uint young_list_max_length() {
     return _young_list_max_length;
   }
 
@@ -1082,19 +835,6 @@
     return _young_gen_sizer->adaptive_young_list_length();
   }
 
-  inline double get_gc_eff_factor() {
-    double ratio = _known_garbage_ratio;
-
-    double square = ratio * ratio;
-    // square = square * square;
-    double ret = square * 9.0 + 1.0;
-#if 0
-    gclog_or_tty->print_cr("ratio = %1.2lf, ret = %1.2lf", ratio, ret);
-#endif // 0
-    guarantee(0.0 <= ret && ret < 10.0, "invariant!");
-    return ret;
-  }
-
 private:
   //
   // Survivor regions policy.
@@ -1105,7 +845,7 @@
   int _tenuring_threshold;
 
   // The limit on the number of regions allocated for survivors.
-  size_t _max_survivor_regions;
+  uint _max_survivor_regions;
 
   // For reporting purposes.
   size_t _eden_bytes_before_gc;
@@ -1113,7 +853,7 @@
   size_t _capacity_before_gc;
 
   // The amount of survor regions after a collection.
-  size_t _recorded_survivor_regions;
+  uint _recorded_survivor_regions;
   // List of survivor regions.
   HeapRegion* _recorded_survivor_head;
   HeapRegion* _recorded_survivor_tail;
@@ -1135,9 +875,9 @@
     return purpose == GCAllocForSurvived;
   }
 
-  static const size_t REGIONS_UNLIMITED = ~(size_t)0;
+  static const uint REGIONS_UNLIMITED = (uint) -1;
 
-  size_t max_regions(int purpose);
+  uint max_regions(int purpose);
 
   // The limit on regions for a particular purpose is reached.
   void note_alloc_region_limit_reached(int purpose) {
@@ -1154,7 +894,7 @@
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
-  void record_survivor_regions(size_t      regions,
+  void record_survivor_regions(uint regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
     _recorded_survivor_regions = regions;
@@ -1162,12 +902,11 @@
     _recorded_survivor_tail    = tail;
   }
 
-  size_t recorded_survivor_regions() {
+  uint recorded_survivor_regions() {
     return _recorded_survivor_regions;
   }
 
-  void record_thread_age_table(ageTable* age_table)
-  {
+  void record_thread_age_table(ageTable* age_table) {
     _survivors_age_table.merge_par(age_table);
   }
 
--- a/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -120,11 +120,12 @@
 
 // Single parameter format strings
 #define ergo_format_str(_name_)      ", " _name_ ": %s"
-#define ergo_format_region(_name_)   ", " _name_ ": "SIZE_FORMAT" regions"
+#define ergo_format_region(_name_)   ", " _name_ ": %u regions"
 #define ergo_format_byte(_name_)     ", " _name_ ": "SIZE_FORMAT" bytes"
 #define ergo_format_double(_name_)   ", " _name_ ": %1.2f"
 #define ergo_format_perc(_name_)     ", " _name_ ": %1.2f %%"
 #define ergo_format_ms(_name_)       ", " _name_ ": %1.2f ms"
+#define ergo_format_size(_name_)     ", " _name_ ": "SIZE_FORMAT
 
 // Double parameter format strings
 #define ergo_format_byte_perc(_name_)                                   \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
+
+// Helper class for avoiding interleaved logging
+class LineBuffer: public StackObj {
+
+private:
+  static const int BUFFER_LEN = 1024;
+  static const int INDENT_CHARS = 3;
+  char _buffer[BUFFER_LEN];
+  int _indent_level;
+  int _cur;
+
+  void vappend(const char* format, va_list ap) {
+    int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
+    if (res != -1) {
+      _cur += res;
+    } else {
+      DEBUG_ONLY(warning("buffer too small in LineBuffer");)
+      _buffer[BUFFER_LEN -1] = 0;
+      _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
+    }
+  }
+
+public:
+  explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
+    for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
+      _buffer[_cur] = ' ';
+    }
+  }
+
+#ifndef PRODUCT
+  ~LineBuffer() {
+    assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
+  }
+#endif
+
+  void append(const char* format, ...) {
+    va_list ap;
+    va_start(ap, format);
+    vappend(format, ap);
+    va_end(ap);
+  }
+
+  void append_and_print_cr(const char* format, ...) {
+    va_list ap;
+    va_start(ap, format);
+    vappend(format, ap);
+    va_end(ap);
+    gclog_or_tty->print_cr("%s", _buffer);
+    _cur = _indent_level * INDENT_CHARS;
+  }
+};
+
+template <class T>
+void WorkerDataArray<T>::print(int level, const char* title) {
+  if (_length == 1) {
+    // No need for min, max, average and sum for only one worker
+    LineBuffer buf(level);
+    buf.append("[%s:  ", title);
+    buf.append(_print_format, _data[0]);
+    buf.append_and_print_cr("]");
+    return;
+  }
+
+  T min = _data[0];
+  T max = _data[0];
+  T sum = 0;
+
+  LineBuffer buf(level);
+  buf.append("[%s:", title);
+  for (uint i = 0; i < _length; ++i) {
+    T val = _data[i];
+    min = MIN2(val, min);
+    max = MAX2(val, max);
+    sum += val;
+    if (G1Log::finest()) {
+      buf.append("  ");
+      buf.append(_print_format, val);
+    }
+  }
+
+  if (G1Log::finest()) {
+    buf.append_and_print_cr("");
+  }
+
+  double avg = (double)sum / (double)_length;
+  buf.append(" Min: ");
+  buf.append(_print_format, min);
+  buf.append(", Avg: ");
+  buf.append("%.1lf", avg); // Always print average as a double
+  buf.append(", Max: ");
+  buf.append(_print_format, max);
+  buf.append(", Diff: ");
+  buf.append(_print_format, max - min);
+  if (_print_sum) {
+    // for things like the start and end times the sum is not
+    // that relevant
+    buf.append(", Sum: ");
+    buf.append(_print_format, sum);
+  }
+  buf.append_and_print_cr("]");
+}
+
+#ifdef ASSERT
+
+template <class T>
+void WorkerDataArray<T>::reset() {
+  for (uint i = 0; i < _length; i++) {
+    _data[i] = (T)-1;
+  }
+}
+
+template <class T>
+void WorkerDataArray<T>::verify() {
+  for (uint i = 0; i < _length; i++) {
+    assert(_data[i] >= (T)0, err_msg("Invalid data for worker %d", i));
+  }
+}
+
+#endif
+
+G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
+  _max_gc_threads(max_gc_threads),
+  _min_clear_cc_time_ms(-1.0),
+  _max_clear_cc_time_ms(-1.0),
+  _cur_clear_cc_time_ms(0.0),
+  _cum_clear_cc_time_ms(0.0),
+  _num_cc_clears(0L),
+  _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false),
+  _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"),
+  _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"),
+  _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
+  _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
+  _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
+  _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
+  _last_termination_times_ms(_max_gc_threads, "%.1lf"),
+  _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
+  _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
+  _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
+  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf")
+{
+  assert(max_gc_threads > 0, "Must have some GC threads");
+}
+
+void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
+  assert(active_gc_threads > 0, "The number of threads must be > 0");
+  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
+  _active_gc_threads = active_gc_threads;
+
+  _last_gc_worker_start_times_ms.reset();
+  _last_ext_root_scan_times_ms.reset();
+  _last_satb_filtering_times_ms.reset();
+  _last_update_rs_times_ms.reset();
+  _last_update_rs_processed_buffers.reset();
+  _last_scan_rs_times_ms.reset();
+  _last_obj_copy_times_ms.reset();
+  _last_termination_times_ms.reset();
+  _last_termination_attempts.reset();
+  _last_gc_worker_end_times_ms.reset();
+  _last_gc_worker_times_ms.reset();
+  _last_gc_worker_other_times_ms.reset();
+}
+
+void G1GCPhaseTimes::note_gc_end() {
+  _last_gc_worker_start_times_ms.verify();
+  _last_ext_root_scan_times_ms.verify();
+  _last_satb_filtering_times_ms.verify();
+  _last_update_rs_times_ms.verify();
+  _last_update_rs_processed_buffers.verify();
+  _last_scan_rs_times_ms.verify();
+  _last_obj_copy_times_ms.verify();
+  _last_termination_times_ms.verify();
+  _last_termination_attempts.verify();
+  _last_gc_worker_end_times_ms.verify();
+
+    for (uint i = 0; i < _active_gc_threads; i++) {
+      double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
+      _last_gc_worker_times_ms.set(i, worker_time);
+
+      double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
+        _last_satb_filtering_times_ms.get(i) +
+        _last_update_rs_times_ms.get(i) +
+        _last_scan_rs_times_ms.get(i) +
+        _last_obj_copy_times_ms.get(i) +
+        _last_termination_times_ms.get(i);
+
+      double worker_other_time = worker_time - worker_known_time;
+      _last_gc_worker_other_times_ms.set(i, worker_other_time);
+    }
+
+  _last_gc_worker_times_ms.verify();
+  _last_gc_worker_other_times_ms.verify();
+}
+
+void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
+}
+
+void G1GCPhaseTimes::print_stats(int level, const char* str, double value, int workers) {
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %d]", str, value, workers);
+}
+
+double G1GCPhaseTimes::accounted_time_ms() {
+    // Subtract the root region scanning wait time. It's initialized to
+    // zero at the start of the pause.
+    double misc_time_ms = _root_region_scan_wait_time_ms;
+
+    misc_time_ms += _cur_collection_par_time_ms;
+
+    // Now subtract the time taken to fix up roots in generated code
+    misc_time_ms += _cur_collection_code_root_fixup_time_ms;
+
+    // Subtract the time taken to clean the card table from the
+    // current value of "other time"
+    misc_time_ms += _cur_clear_ct_time_ms;
+
+    return misc_time_ms;
+}
+
+void G1GCPhaseTimes::print(double pause_time_sec) {
+  if (_root_region_scan_wait_time_ms > 0.0) {
+    print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+  }
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
+    _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
+    _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
+    if (_last_satb_filtering_times_ms.sum() > 0.0) {
+      _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
+    }
+    _last_update_rs_times_ms.print(2, "Update RS (ms)");
+      _last_update_rs_processed_buffers.print(3, "Processed Buffers");
+    _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
+    _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
+    _last_termination_times_ms.print(2, "Termination (ms)");
+    if (G1Log::finest()) {
+      _last_termination_attempts.print(3, "Termination Attempts");
+    }
+    _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
+    _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
+    _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
+  } else {
+    _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)");
+    if (_last_satb_filtering_times_ms.sum() > 0.0) {
+      _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
+    }
+    _last_update_rs_times_ms.print(1, "Update RS (ms)");
+      _last_update_rs_processed_buffers.print(2, "Processed Buffers");
+    _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
+    _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
+  }
+  print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
+  print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+  if (Verbose && G1Log::finest()) {
+    print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
+    print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
+    print_stats(1, "Min Clear CC", _min_clear_cc_time_ms);
+    print_stats(1, "Max Clear CC", _max_clear_cc_time_ms);
+    if (_num_cc_clears > 0) {
+      print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears));
+    }
+  }
+  double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
+  print_stats(1, "Other", misc_time_ms);
+  if (_cur_verify_before_time_ms > 0.0) {
+    print_stats(2, "Verify Before", _cur_verify_before_time_ms);
+  }
+  print_stats(2, "Choose CSet",
+    (_recorded_young_cset_choice_time_ms +
+    _recorded_non_young_cset_choice_time_ms));
+  print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
+  print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
+  print_stats(2, "Free CSet",
+    (_recorded_young_free_cset_time_ms +
+    _recorded_non_young_free_cset_time_ms));
+  if (_cur_verify_after_time_ms > 0.0) {
+    print_stats(2, "Verify After", _cur_verify_after_time_ms);
+  }
+}
+
+void G1GCPhaseTimes::record_cc_clear_time_ms(double ms) {
+  if (!(Verbose && G1Log::finest())) {
+    return;
+  }
+
+  if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms) {
+    _min_clear_cc_time_ms = ms;
+  }
+  if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms) {
+    _max_clear_cc_time_ms = ms;
+  }
+  _cur_clear_cc_time_ms = ms;
+  _cum_clear_cc_time_ms += ms;
+  _num_cc_clears++;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
+
+#include "memory/allocation.hpp"
+#include "gc_interface/gcCause.hpp"
+
+template <class T>
+class WorkerDataArray  : public CHeapObj<mtGC> {
+  T*          _data;
+  uint        _length;
+  const char* _print_format;
+  bool        _print_sum;
+
+  // We are caching the sum and average to only have to calculate them once.
+  // This is not done in an MT-safe way. It is intetened to allow single
+  // threaded code to call sum() and average() multiple times in any order
+  // without having to worry about the cost.
+  bool   _has_new_data;
+  T      _sum;
+  double _average;
+
+ public:
+  WorkerDataArray(uint length, const char* print_format, bool print_sum = true) :
+  _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) {
+    assert(length > 0, "Must have some workers to store data for");
+    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
+  }
+
+  ~WorkerDataArray() {
+    FREE_C_HEAP_ARRAY(T, _data, mtGC);
+  }
+
+  void set(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i));
+    _data[worker_i] = value;
+    _has_new_data = true;
+  }
+
+  T get(uint worker_i) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
+    return _data[worker_i];
+  }
+
+  void add(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
+    _data[worker_i] += value;
+    _has_new_data = true;
+  }
+
+  double average(){
+    if (_has_new_data) {
+      calculate_totals();
+    }
+    return _average;
+  }
+
+  T sum() {
+    if (_has_new_data) {
+      calculate_totals();
+    }
+    return _sum;
+  }
+
+  void print(int level, const char* title);
+
+  void reset() PRODUCT_RETURN;
+  void verify() PRODUCT_RETURN;
+
+ private:
+
+  void calculate_totals(){
+    _sum = (T)0;
+    for (uint i = 0; i < _length; ++i) {
+      _sum += _data[i];
+    }
+    _average = (double)_sum / (double)_length;
+    _has_new_data = false;
+  }
+};
+
+class G1GCPhaseTimes : public CHeapObj<mtGC> {
+
+ private:
+  uint _active_gc_threads;
+  uint _max_gc_threads;
+
+  WorkerDataArray<double> _last_gc_worker_start_times_ms;
+  WorkerDataArray<double> _last_ext_root_scan_times_ms;
+  WorkerDataArray<double> _last_satb_filtering_times_ms;
+  WorkerDataArray<double> _last_update_rs_times_ms;
+  WorkerDataArray<int>    _last_update_rs_processed_buffers;
+  WorkerDataArray<double> _last_scan_rs_times_ms;
+  WorkerDataArray<double> _last_obj_copy_times_ms;
+  WorkerDataArray<double> _last_termination_times_ms;
+  WorkerDataArray<size_t> _last_termination_attempts;
+  WorkerDataArray<double> _last_gc_worker_end_times_ms;
+  WorkerDataArray<double> _last_gc_worker_times_ms;
+  WorkerDataArray<double> _last_gc_worker_other_times_ms;
+
+  double _cur_collection_par_time_ms;
+  double _cur_collection_code_root_fixup_time_ms;
+
+  double _cur_clear_ct_time_ms;
+  double _cur_ref_proc_time_ms;
+  double _cur_ref_enq_time_ms;
+
+  // Card Table Count Cache stats
+  double _min_clear_cc_time_ms;         // min
+  double _max_clear_cc_time_ms;         // max
+  double _cur_clear_cc_time_ms;         // clearing time during current pause
+  double _cum_clear_cc_time_ms;         // cummulative clearing time
+  jlong  _num_cc_clears;                // number of times the card count cache has been cleared
+
+  double _cur_collection_start_sec;
+  double _root_region_scan_wait_time_ms;
+
+  double _recorded_young_cset_choice_time_ms;
+  double _recorded_non_young_cset_choice_time_ms;
+
+  double _recorded_young_free_cset_time_ms;
+  double _recorded_non_young_free_cset_time_ms;
+
+  double _cur_verify_before_time_ms;
+  double _cur_verify_after_time_ms;
+
+  // Helper methods for detailed logging
+  void print_stats(int level, const char* str, double value);
+  void print_stats(int level, const char* str, double value, int workers);
+
+ public:
+  G1GCPhaseTimes(uint max_gc_threads);
+  void note_gc_start(uint active_gc_threads);
+  void note_gc_end();
+  void print(double pause_time_sec);
+
+  void record_gc_worker_start_time(uint worker_i, double ms) {
+    _last_gc_worker_start_times_ms.set(worker_i, ms);
+  }
+
+  void record_ext_root_scan_time(uint worker_i, double ms) {
+    _last_ext_root_scan_times_ms.set(worker_i, ms);
+  }
+
+  void record_satb_filtering_time(uint worker_i, double ms) {
+    _last_satb_filtering_times_ms.set(worker_i, ms);
+  }
+
+  void record_update_rs_time(uint worker_i, double ms) {
+    _last_update_rs_times_ms.set(worker_i, ms);
+  }
+
+  void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) {
+    _last_update_rs_processed_buffers.set(worker_i, processed_buffers);
+  }
+
+  void record_scan_rs_time(uint worker_i, double ms) {
+    _last_scan_rs_times_ms.set(worker_i, ms);
+  }
+
+  void record_obj_copy_time(uint worker_i, double ms) {
+    _last_obj_copy_times_ms.set(worker_i, ms);
+  }
+
+  void add_obj_copy_time(uint worker_i, double ms) {
+    _last_obj_copy_times_ms.add(worker_i, ms);
+  }
+
+  void record_termination(uint worker_i, double ms, size_t attempts) {
+    _last_termination_times_ms.set(worker_i, ms);
+    _last_termination_attempts.set(worker_i, attempts);
+  }
+
+  void record_gc_worker_end_time(uint worker_i, double ms) {
+    _last_gc_worker_end_times_ms.set(worker_i, ms);
+  }
+
+  void record_clear_ct_time(double ms) {
+    _cur_clear_ct_time_ms = ms;
+  }
+
+  void record_par_time(double ms) {
+    _cur_collection_par_time_ms = ms;
+  }
+
+  void record_code_root_fixup_time(double ms) {
+    _cur_collection_code_root_fixup_time_ms = ms;
+  }
+
+  void record_ref_proc_time(double ms) {
+    _cur_ref_proc_time_ms = ms;
+  }
+
+  void record_ref_enq_time(double ms) {
+    _cur_ref_enq_time_ms = ms;
+  }
+
+  void record_root_region_scan_wait_time(double time_ms) {
+    _root_region_scan_wait_time_ms = time_ms;
+  }
+
+  void record_cc_clear_time_ms(double ms);
+
+  void record_young_free_cset_time_ms(double time_ms) {
+    _recorded_young_free_cset_time_ms = time_ms;
+  }
+
+  void record_non_young_free_cset_time_ms(double time_ms) {
+    _recorded_non_young_free_cset_time_ms = time_ms;
+  }
+
+  void record_young_cset_choice_time_ms(double time_ms) {
+    _recorded_young_cset_choice_time_ms = time_ms;
+  }
+
+  void record_non_young_cset_choice_time_ms(double time_ms) {
+    _recorded_non_young_cset_choice_time_ms = time_ms;
+  }
+
+  void record_cur_collection_start_sec(double time_ms) {
+    _cur_collection_start_sec = time_ms;
+  }
+
+  void record_verify_before_time_ms(double time_ms) {
+    _cur_verify_before_time_ms = time_ms;
+  }
+
+  void record_verify_after_time_ms(double time_ms) {
+    _cur_verify_after_time_ms = time_ms;
+  }
+
+  double accounted_time_ms();
+
+  double cur_collection_start_sec() {
+    return _cur_collection_start_sec;
+  }
+
+  double cur_collection_par_time_ms() {
+    return _cur_collection_par_time_ms;
+  }
+
+  double cur_clear_ct_time_ms() {
+    return _cur_clear_ct_time_ms;
+  }
+
+  double root_region_scan_wait_time_ms() {
+    return _root_region_scan_wait_time_ms;
+  }
+
+  double young_cset_choice_time_ms() {
+    return _recorded_young_cset_choice_time_ms;
+  }
+
+  double young_free_cset_time_ms() {
+    return _recorded_young_free_cset_time_ms;
+  }
+
+  double non_young_cset_choice_time_ms() {
+    return _recorded_non_young_cset_choice_time_ms;
+  }
+
+  double non_young_free_cset_time_ms() {
+    return _recorded_non_young_free_cset_time_ms;
+  }
+
+  double average_last_update_rs_time() {
+    return _last_update_rs_times_ms.average();
+  }
+
+  int sum_last_update_rs_processed_buffers() {
+    return _last_update_rs_processed_buffers.sum();
+  }
+
+  double average_last_scan_rs_time(){
+    return _last_scan_rs_times_ms.average();
+  }
+
+  double average_last_obj_copy_time() {
+    return _last_obj_copy_times_ms.average();
+  }
+
+  double average_last_termination_time() {
+    return _last_termination_times_ms.average();
+  }
+
+  double average_last_ext_root_scan_time() {
+    return _last_ext_root_scan_times_ms.average();
+  }
+
+  double average_last_satb_filtering_times_ms() {
+    return _last_satb_filtering_times_ms.average();
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1Log.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1_globals.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
+#include "runtime/globals.hpp"
+
+G1Log::LogLevel G1Log::_level = G1Log::LevelNone;
+
+// If G1LogLevel has not been set up we will use the values of PrintGC
+// and PrintGCDetails for the logging level.
+// - PrintGC maps to "fine".
+// - PrintGCDetails maps to "finer".
+void G1Log::init() {
+  if (G1LogLevel != NULL && G1LogLevel[0] != '\0') {
+    if (strncmp("none", G1LogLevel, 4) == 0 && G1LogLevel[4] == '\0') {
+      _level = LevelNone;
+    } else if (strncmp("fine", G1LogLevel, 4) == 0 && G1LogLevel[4] == '\0') {
+      _level = LevelFine;
+    } else if (strncmp("finer", G1LogLevel, 5) == 0 && G1LogLevel[5] == '\0') {
+      _level = LevelFiner;
+    } else if (strncmp("finest", G1LogLevel, 6) == 0 && G1LogLevel[6] == '\0') {
+      _level = LevelFinest;
+    } else {
+      warning("Unknown logging level '%s', should be one of 'fine', 'finer' or 'finest'.", G1LogLevel);
+    }
+  } else {
+    if (PrintGCDetails) {
+      _level = LevelFiner;
+    } else if (PrintGC) {
+      _level = LevelFine;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1Log.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1LOG_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1LOG_HPP
+
+#include "memory/allocation.hpp"
+
+class G1Log : public AllStatic {
+  typedef enum {
+    LevelNone,
+    LevelFine,
+    LevelFiner,
+    LevelFinest
+  } LogLevel;
+
+  static LogLevel _level;
+
+ public:
+  inline static bool fine() {
+    return _level >= LevelFine;
+  }
+
+  inline static bool finer() {
+    return _level >= LevelFiner;
+  }
+
+  inline static bool finest() {
+    return _level == LevelFinest;
+  }
+
+  static void init();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1LOG_HPP
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 /***** ALL TIMES ARE IN SECS!!!!!!! *****/
 
 // this is the "interface"
-class G1MMUTracker: public CHeapObj {
+class G1MMUTracker: public CHeapObj<mtGC> {
 protected:
   double          _time_slice;
   double          _max_gc_time; // this is per time slice
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,6 +29,7 @@
 #include "classfile/vmSymbols.hpp"
 #include "code/codeCache.hpp"
 #include "code/icBuffer.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "memory/gcLocker.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -126,7 +127,7 @@
 void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
                                     bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
+  TraceTime tm("phase 1", G1Log::fine() && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace(" 1");
 
   SharedHeap* sh = SharedHeap::heap();
@@ -192,8 +193,7 @@
     // fail. At the end of the GC, the orginal mark word values
     // (including hash values) are restored to the appropriate
     // objects.
-    Universe::heap()->verify(/* allow dirty */ true,
-                             /* silent      */ false,
+    Universe::heap()->verify(/* silent      */ false,
                              /* option      */ VerifyOption_G1UseMarkWord);
 
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -262,18 +262,6 @@
   }
 };
 
-// Finds the first HeapRegion.
-class FindFirstRegionClosure: public HeapRegionClosure {
-  HeapRegion* _a_region;
-public:
-  FindFirstRegionClosure() : _a_region(NULL) {}
-  bool doHeapRegion(HeapRegion* r) {
-    _a_region = r;
-    return true;
-  }
-  HeapRegion* result() { return _a_region; }
-};
-
 void G1MarkSweep::mark_sweep_phase2() {
   // Now all live objects are marked, compute the new object addresses.
 
@@ -291,12 +279,11 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   Generation* pg = g1h->perm_gen();
 
-  TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
+  TraceTime tm("phase 2", G1Log::fine() && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("2");
 
-  FindFirstRegionClosure cl;
-  g1h->heap_region_iterate(&cl);
-  HeapRegion *r = cl.result();
+  // find the first region
+  HeapRegion* r = g1h->region_at(0);
   CompactibleSpace* sp = r;
   if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
     sp = r->next_compaction_space();
@@ -335,7 +322,7 @@
   Generation* pg = g1h->perm_gen();
 
   // Adjust the pointers to reflect the new locations
-  TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
+  TraceTime tm("phase 3", G1Log::fine() && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("3");
 
   SharedHeap* sh = SharedHeap::heap();
@@ -399,7 +386,7 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   Generation* pg = g1h->perm_gen();
 
-  TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
+  TraceTime tm("phase 4", G1Log::fine() && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("4");
 
   pg->compact();
@@ -408,7 +395,3 @@
   g1h->heap_region_iterate(&blk);
 
 }
-
-// Local Variables: ***
-// c-indentation-style: gnu ***
-// End: ***
--- a/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,7 +44,9 @@
                G1MonitoringSupport::pad_capacity(0, 3) /* min_capacity */,
                G1MonitoringSupport::pad_capacity(g1mm->young_gen_max(), 3),
                G1MonitoringSupport::pad_capacity(0, 3) /* curr_capacity */) {
-  update_all();
+  if (UsePerfData) {
+    update_all();
+  }
 }
 
 G1OldGenerationCounters::G1OldGenerationCounters(G1MonitoringSupport* g1mm,
@@ -53,7 +55,9 @@
                G1MonitoringSupport::pad_capacity(0) /* min_capacity */,
                G1MonitoringSupport::pad_capacity(g1mm->old_gen_max()),
                G1MonitoringSupport::pad_capacity(0) /* curr_capacity */) {
-  update_all();
+  if (UsePerfData) {
+    update_all();
+  }
 }
 
 void G1YoungGenerationCounters::update_all() {
@@ -149,10 +153,6 @@
     pad_capacity(0) /* max_capacity */,
     pad_capacity(0) /* init_capacity */,
     _young_collection_counters);
-  // Given that this survivor space is not used, we update it here
-  // once to reflect that its used space is 0 so that we don't have to
-  // worry about updating it again later.
-  _from_counters->update_used(0);
 
   //  name "generation.0.space.2"
   // See _old_space_counters for additional counters
@@ -160,6 +160,13 @@
     pad_capacity(overall_reserved()) /* max_capacity */,
     pad_capacity(survivor_space_committed()) /* init_capacity */,
     _young_collection_counters);
+
+  if (UsePerfData) {
+    // Given that this survivor space is not used, we update it here
+    // once to reflect that its used space is 0 so that we don't have to
+    // worry about updating it again later.
+    _from_counters->update_used(0);
+  }
 }
 
 void G1MonitoringSupport::recalculate_sizes() {
@@ -170,19 +177,19 @@
   // values we read here are possible (i.e., at a STW phase at the end
   // of a GC).
 
-  size_t young_list_length = g1->young_list()->length();
-  size_t survivor_list_length = g1->g1_policy()->recorded_survivor_regions();
+  uint young_list_length = g1->young_list()->length();
+  uint survivor_list_length = g1->g1_policy()->recorded_survivor_regions();
   assert(young_list_length >= survivor_list_length, "invariant");
-  size_t eden_list_length = young_list_length - survivor_list_length;
+  uint eden_list_length = young_list_length - survivor_list_length;
   // Max length includes any potential extensions to the young gen
   // we'll do when the GC locker is active.
-  size_t young_list_max_length = g1->g1_policy()->young_list_max_length();
+  uint young_list_max_length = g1->g1_policy()->young_list_max_length();
   assert(young_list_max_length >= survivor_list_length, "invariant");
-  size_t eden_list_max_length = young_list_max_length - survivor_list_length;
+  uint eden_list_max_length = young_list_max_length - survivor_list_length;
 
   _overall_used = g1->used_unlocked();
-  _eden_used = eden_list_length * HeapRegion::GrainBytes;
-  _survivor_used = survivor_list_length * HeapRegion::GrainBytes;
+  _eden_used = (size_t) eden_list_length * HeapRegion::GrainBytes;
+  _survivor_used = (size_t) survivor_list_length * HeapRegion::GrainBytes;
   _young_region_num = young_list_length;
   _old_used = subtract_up_to_zero(_overall_used, _eden_used + _survivor_used);
 
@@ -200,7 +207,7 @@
   committed -= _survivor_committed + _old_committed;
 
   // Next, calculate and remove the committed size for the eden.
-  _eden_committed = eden_list_max_length * HeapRegion::GrainBytes;
+  _eden_committed = (size_t) eden_list_max_length * HeapRegion::GrainBytes;
   // Somewhat defensive: be robust in case there are inaccuracies in
   // the calculations
   _eden_committed = MIN2(_eden_committed, committed);
@@ -230,10 +237,10 @@
   // When a new eden region is allocated, only the eden_used size is
   // affected (since we have recalculated everything else at the last GC).
 
-  size_t young_region_num = g1h()->young_list()->length();
+  uint young_region_num = g1h()->young_list()->length();
   if (young_region_num > _young_region_num) {
-    size_t diff = young_region_num - _young_region_num;
-    _eden_used += diff * HeapRegion::GrainBytes;
+    uint diff = young_region_num - _young_region_num;
+    _eden_used += (size_t) diff * HeapRegion::GrainBytes;
     // Somewhat defensive: cap the eden used size to make sure it
     // never exceeds the committed size.
     _eden_used = MIN2(_eden_used, _eden_committed);
--- a/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -112,7 +112,7 @@
 // do which is important as we want to keep the eden region allocation
 // path as low-overhead as possible.
 
-class G1MonitoringSupport : public CHeapObj {
+class G1MonitoringSupport : public CHeapObj<mtGC> {
   friend class VMStructs;
 
   G1CollectedHeap* _g1h;
@@ -147,7 +147,7 @@
   size_t _overall_committed;
   size_t _overall_used;
 
-  size_t _young_region_num;
+  uint   _young_region_num;
   size_t _young_gen_committed;
   size_t _eden_committed;
   size_t _eden_used;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -118,9 +118,11 @@
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
+class G1ParCopyClosure : public G1ParClosureSuper {
+  G1ParScanClosure _scanner;
+  template <class T> void do_oop_work(T* p);
 
-class G1ParCopyHelper : public G1ParClosureSuper {
-  G1ParScanClosure *_scanner;
 protected:
   // Mark the object if it's not already marked. This is used to mark
   // objects pointed to by roots that are guaranteed not to move
@@ -135,22 +137,10 @@
   oop copy_to_survivor_space(oop obj);
 
 public:
-  G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
-                  G1ParScanClosure *scanner) :
-    G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
-};
-
-template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
-class G1ParCopyClosure : public G1ParCopyHelper {
-  G1ParScanClosure _scanner;
-
-  template <class T> void do_oop_work(T* p);
-
-public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                    ReferenceProcessor* rp) :
       _scanner(g1, par_scan_state, rp),
-      G1ParCopyHelper(g1, par_scan_state, &_scanner) {
+      G1ParClosureSuper(g1, par_scan_state) {
     assert(_ref_processor == NULL, "sanity");
   }
 
@@ -207,7 +197,6 @@
   HeapWord* _r_bottom;
   HeapWord* _r_end;
   OopClosure* _oc;
-  int _out_of_region;
 public:
   FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
   template <class T> void do_oop_nv(T* p);
@@ -215,7 +204,6 @@
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
   bool apply_to_weak_ref_discovered_field() { return true; }
   bool do_header() { return false; }
-  int out_of_region() { return _out_of_region; }
 };
 
 // Closure for iterating over object fields during concurrent marking
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,31 +29,22 @@
 #include "gc_implementation/g1/g1CollectedHeap.hpp"
 #include "gc_implementation/g1/g1OopClosures.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
 
 /*
  * This really ought to be an inline function, but apparently the C++
  * compiler sometimes sees fit to ignore inline declarations.  Sigh.
  */
 
-// This must a ifdef'ed because the counting it controls is in a
-// perf-critical inner loop.
-#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
-
 template <class T>
 inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
-#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
-    if (_dcto_cl != NULL)
-      _dcto_cl->incr_count();
-#endif
   }
 }
 
-#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
-
 template <class T>
 inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
@@ -61,9 +52,6 @@
     HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop);
     if (obj_hw < _r_bottom || obj_hw >= _r_end) {
       _oc->do_oop(p);
-#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
-      _out_of_region++;
-#endif
     }
   }
 }
@@ -182,6 +170,7 @@
 #endif // ASSERT
 
   assert(_from != NULL, "from region must be non-NULL");
+  assert(_from->is_in_reserved(p), "p is not in from");
 
   HeapRegion* to = _g1->heap_region_containing(obj);
   if (to != NULL && _from != to) {
@@ -212,14 +201,16 @@
       // or processed (if an evacuation failure occurs) at the end
       // of the collection.
       // See G1RemSet::cleanup_after_oops_into_collection_set_do().
-    } else {
-      // We either don't care about pushing references that point into the
-      // collection set (i.e. we're not during an evacuation pause) _or_
-      // the reference doesn't point into the collection set. Either way
-      // we add the reference directly to the RSet of the region containing
-      // the referenced object.
-      _g1_rem_set->par_write_ref(_from, p, _worker_i);
+      return;
     }
+
+    // We either don't care about pushing references that point into the
+    // collection set (i.e. we're not during an evacuation pause) _or_
+    // the reference doesn't point into the collection set. Either way
+    // we add the reference directly to the RSet of the region containing
+    // the referenced object.
+    assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+    to->rem_set()->add_reference(p, _worker_i);
   }
 }
 
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,6 +29,7 @@
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
@@ -75,7 +76,7 @@
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
-  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers());
+  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers(), mtGC);
   for (uint i = 0; i < n_workers(); i++) {
     _cset_rs_update_cl[i] = NULL;
   }
@@ -86,7 +87,7 @@
   for (uint i = 0; i < n_workers(); i++) {
     assert(_cset_rs_update_cl[i] == NULL, "it should be");
   }
-  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl);
+  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl, mtGC);
 }
 
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -224,7 +225,7 @@
   assert( _cards_scanned != NULL, "invariant" );
   _cards_scanned[worker_i] = scanRScl.cards_done();
 
-  _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
 }
 
 // Closure used for updating RSets and recording references that
@@ -276,65 +277,9 @@
     guarantee(cl.n() == 0, "Card table should be clean.");
   }
 
-  _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
+  _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
 }
 
-class CountRSSizeClosure: public HeapRegionClosure {
-  size_t _n;
-  size_t _tot;
-  size_t _max;
-  HeapRegion* _max_r;
-  enum {
-    N = 20,
-    MIN = 6
-  };
-  int _histo[N];
-public:
-  CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
-    for (int i = 0; i < N; i++) _histo[i] = 0;
-  }
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->continuesHumongous()) {
-      size_t occ = r->rem_set()->occupied();
-      _n++;
-      _tot += occ;
-      if (occ > _max) {
-        _max = occ;
-        _max_r = r;
-      }
-      // Fit it into a histo bin.
-      int s = 1 << MIN;
-      int i = 0;
-      while (occ > (size_t) s && i < (N-1)) {
-        s = s << 1;
-        i++;
-      }
-      _histo[i]++;
-    }
-    return false;
-  }
-  size_t n() { return _n; }
-  size_t tot() { return _tot; }
-  size_t mx() { return _max; }
-  HeapRegion* mxr() { return _max_r; }
-  void print_histo() {
-    int mx = N;
-    while (mx >= 0) {
-      if (_histo[mx-1] > 0) break;
-      mx--;
-    }
-    gclog_or_tty->print_cr("Number of regions with given RS sizes:");
-    gclog_or_tty->print_cr("           <= %8d   %8d", 1 << MIN, _histo[0]);
-    for (int i = 1; i < mx-1; i++) {
-      gclog_or_tty->print_cr("  %8d  - %8d   %8d",
-                    (1 << (MIN + i - 1)) + 1,
-                    1 << (MIN + i),
-                    _histo[i]);
-    }
-    gclog_or_tty->print_cr("            > %8d   %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
-  }
-};
-
 void G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }
@@ -348,17 +293,6 @@
     _cg1r->clear_and_record_card_counts();
   }
 
-  // Make this into a command-line flag...
-  if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
-    CountRSSizeClosure count_cl;
-    _g1->heap_region_iterate(&count_cl);
-    gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
-                  "max region is " PTR_FORMAT,
-                  count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
-                  count_cl.mx(), count_cl.mxr());
-    count_cl.print_histo();
-  }
-
   // We cache the value of 'oc' closure into the appropriate slot in the
   // _cset_rs_update_cl for this worker
   assert(worker_i < (int)n_workers(), "sanity");
@@ -390,13 +324,13 @@
   if (G1UseParallelRSetUpdating || (worker_i == 0)) {
     updateRS(&into_cset_dcq, worker_i);
   } else {
-    _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
-    _g1p->record_update_rs_time(worker_i, 0.0);
+    _g1p->phase_times()->record_update_rs_processed_buffers(worker_i, 0);
+    _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
   }
   if (G1UseParallelRSetScanning || (worker_i == 0)) {
     scanRS(oc, worker_i);
   } else {
-    _g1p->record_scan_rs_time(worker_i, 0.0);
+    _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
   }
 
   // We now clear the cached values of _cset_rs_update_cl for this worker
@@ -416,7 +350,7 @@
     // _seq_task->set_n_termination((int)n_workers());
   }
   guarantee( _cards_scanned == NULL, "invariant" );
-  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
+  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers(), mtGC);
   for (uint i = 0; i < n_workers(); ++i) {
     _cards_scanned[i] = 0;
   }
@@ -487,7 +421,7 @@
   for (uint i = 0; i < n_workers(); ++i) {
     _total_cards_scanned += _cards_scanned[i];
   }
-  FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
+  FREE_C_HEAP_ARRAY(size_t, _cards_scanned, mtGC);
   _cards_scanned = NULL;
   // Cleanup after copy
   _g1->set_refine_cte_cl_concurrency(true);
@@ -567,8 +501,6 @@
 }
 
 
-static IntHistogram out_of_histo(50, 50);
-
 
 G1TriggerClosure::G1TriggerClosure() :
   _triggered(false) { }
@@ -670,7 +602,6 @@
       sdcq->enqueue(card_ptr);
     }
   } else {
-    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
     _conc_refine_cards++;
   }
 
@@ -861,11 +792,6 @@
   card_repeat_count.print_on(gclog_or_tty);
 #endif
 
-  if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
-    gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
-    gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
-    out_of_histo.print_on(gclog_or_tty);
-  }
   gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
                          _conc_refine_cards);
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
@@ -888,21 +814,24 @@
 
   HRRSStatsIter blk;
   g1->heap_region_iterate(&blk);
-  gclog_or_tty->print_cr("  Total heap region rem set sizes = " SIZE_FORMAT "K."
-                         "  Max = " SIZE_FORMAT "K.",
+  gclog_or_tty->print_cr("  Total heap region rem set sizes = "SIZE_FORMAT"K."
+                         "  Max = "SIZE_FORMAT"K.",
                          blk.total_mem_sz()/K, blk.max_mem_sz()/K);
-  gclog_or_tty->print_cr("  Static structures = " SIZE_FORMAT "K,"
-                         " free_lists = " SIZE_FORMAT "K.",
-                         HeapRegionRemSet::static_mem_size()/K,
-                         HeapRegionRemSet::fl_mem_size()/K);
-  gclog_or_tty->print_cr("    %d occupied cards represented.",
+  gclog_or_tty->print_cr("  Static structures = "SIZE_FORMAT"K,"
+                         " free_lists = "SIZE_FORMAT"K.",
+                         HeapRegionRemSet::static_mem_size() / K,
+                         HeapRegionRemSet::fl_mem_size() / K);
+  gclog_or_tty->print_cr("    "SIZE_FORMAT" occupied cards represented.",
                          blk.occupied());
-  gclog_or_tty->print_cr("    Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
-                         ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
-                         blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
-                         (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
-                         (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
-  gclog_or_tty->print_cr("    Did %d coarsenings.", HeapRegionRemSet::n_coarsenings());
+  HeapRegion* max_mem_sz_region = blk.max_mem_sz_region();
+  HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set();
+  gclog_or_tty->print_cr("    Max size region = "HR_FORMAT", "
+                         "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.",
+                         HR_FORMAT_PARAMS(max_mem_sz_region),
+                         (rem_set->mem_size() + K - 1)/K,
+                         (rem_set->occupied() + K - 1)/K);
+  gclog_or_tty->print_cr("    Did %d coarsenings.",
+                         HeapRegionRemSet::n_coarsenings());
 }
 
 void G1RemSet::prepare_for_verify() {
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,7 +36,7 @@
 // external heap references into it.  Uses a mod ref bs to track updates,
 // so that they can be used to update the individual region remsets.
 
-class G1RemSet: public CHeapObj {
+class G1RemSet: public CHeapObj<mtGC> {
 protected:
   G1CollectedHeap* _g1;
   unsigned _conc_refine_cards;
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,7 +26,6 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1_GLOBALS_HPP
 
 #include "runtime/globals.hpp"
-
 //
 // Defines all globals flags used by the garbage-first compiler.
 //
@@ -54,6 +53,9 @@
   develop(bool, G1TraceMarkStackOverflow, false,                            \
           "If true, extra debugging code for CM restart for ovflw.")        \
                                                                             \
+  develop(bool, G1TraceHeapRegionRememberedSet, false,                      \
+          "Enables heap region remembered set debug logs")                  \
+                                                                            \
   diagnostic(bool, G1SummarizeConcMark, false,                              \
           "Summarize concurrent mark info")                                 \
                                                                             \
@@ -69,9 +71,6 @@
   diagnostic(bool, G1TraceConcRefinement, false,                            \
           "Trace G1 concurrent refinement")                                 \
                                                                             \
-  product(intx, G1MarkRegionStackSize, 1024 * 1024,                         \
-          "Size of the region stack for concurrent marking.")               \
-                                                                            \
   product(double, G1ConcMarkStepDurationMillis, 10.0,                       \
           "Target duration of individual concurrent marking steps "         \
           "in milliseconds.")                                               \
@@ -131,9 +130,6 @@
             "Prints the liveness information for all regions in the heap "  \
             "at the end of a marking cycle.")                               \
                                                                             \
-  develop(bool, G1PrintParCleanupStats, false,                              \
-          "When true, print extra stats about parallel cleanup.")           \
-                                                                            \
   product(intx, G1UpdateBufferSize, 256,                                    \
           "Size of an update buffer")                                       \
                                                                             \
@@ -291,29 +287,59 @@
           "The number of times we'll force an overflow during "             \
           "concurrent marking")                                             \
                                                                             \
-  develop(uintx, G1DefaultMinNewGenPercent, 20,                             \
+  experimental(uintx, G1DefaultMinNewGenPercent, 20,                        \
           "Percentage (0-100) of the heap size to use as minimum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
+  experimental(uintx, G1DefaultMaxNewGenPercent, 80,                        \
           "Percentage (0-100) of the heap size to use as maximum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1OldCSetRegionLiveThresholdPercent, 95,                   \
+  experimental(uintx, G1OldCSetRegionLiveThresholdPercent, 90,              \
           "Threshold for regions to be added to the collection set. "       \
-          "Regions with more live bytes that this will not be collected.")  \
+          "Regions with more live bytes than this will not be collected.")  \
+                                                                            \
+  product(uintx, G1HeapWastePercent, 5,                                     \
+          "Amount of space, expressed as a percentage of the heap size, "   \
+          "that G1 is willing not to collect to avoid expensive GCs.")      \
+                                                                            \
+  product(uintx, G1MixedGCCountTarget, 4,                                   \
+          "The target number of mixed GCs after a marking cycle.")          \
+                                                                            \
+  experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
+          "An upper bound for the number of old CSet regions expressed "    \
+          "as a percentage of the heap size.")                              \
+                                                                            \
+  experimental(ccstr, G1LogLevel, NULL,                                     \
+          "Log level for G1 logging: fine, finer, finest")                  \
+                                                                            \
+  notproduct(bool, G1EvacuationFailureALot, false,                          \
+          "Force use of evacuation failure handling during certain "        \
+          "evacuation pauses")                                              \
                                                                             \
-  develop(uintx, G1OldReclaimableThresholdPercent, 1,                       \
-          "Threshold for the remaining old reclaimable bytes, expressed "   \
-          "as a percentage of the heap size. If the old reclaimable bytes " \
-          "are under this we will not collect them with more mixed GCs.")   \
+  develop(uintx, G1EvacuationFailureALotCount, 1000,                        \
+          "Number of successful evacuations between evacuation failures "   \
+          "occurring at object copying")                                    \
+                                                                            \
+  develop(uintx, G1EvacuationFailureALotInterval, 5,                        \
+          "Total collections between forced triggering of evacuation "      \
+          "failures")                                                       \
+                                                                            \
+  develop(bool, G1EvacuationFailureALotDuringConcMark, true,                \
+          "Force use of evacuation failure handling during evacuation "     \
+          "pauses when marking is in progress")                             \
                                                                             \
-  develop(uintx, G1MaxMixedGCNum, 4,                                        \
-          "The maximum desired number of mixed GCs after a marking cycle.") \
+  develop(bool, G1EvacuationFailureALotDuringInitialMark, true,             \
+          "Force use of evacuation failure handling during initial mark "   \
+          "evacuation pauses")                                              \
                                                                             \
-  develop(uintx, G1OldCSetRegionThresholdPercent, 10,                       \
-          "An upper bound for the number of old CSet regions expressed "    \
-          "as a percentage of the heap size.")
+  develop(bool, G1EvacuationFailureALotDuringYoungGC, true,                 \
+          "Force use of evacuation failure handling during young "          \
+          "evacuation pauses")                                              \
+                                                                            \
+  develop(bool, G1EvacuationFailureALotDuringMixedGC, true,                 \
+          "Force use of evacuation failure handling during mixed "          \
+          "evacuation pauses")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,14 +44,11 @@
                                  CardTableModRefBS::PrecisionStyle precision,
                                  FilterKind fk) :
   ContiguousSpaceDCTOC(hr, cl, precision, NULL),
-  _hr(hr), _fk(fk), _g1(g1)
-{ }
+  _hr(hr), _fk(fk), _g1(g1) { }
 
 FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
                                                    OopClosure* oc) :
-  _r_bottom(r->bottom()), _r_end(r->end()),
-  _oc(oc), _out_of_region(0)
-{}
+  _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { }
 
 class VerifyLiveClosure: public OopClosure {
 private:
@@ -334,7 +331,7 @@
 
   guarantee(GrainWords == 0, "we should only set it once");
   GrainWords = GrainBytes >> LogHeapWordSize;
-  guarantee((size_t)(1 << LogOfHRGrainWords) == GrainWords, "sanity");
+  guarantee((size_t) 1 << LogOfHRGrainWords == GrainWords, "sanity");
 
   guarantee(CardsPerRegion == 0, "we should only set it once");
   CardsPerRegion = GrainBytes >> CardTableModRefBS::card_shift;
@@ -370,7 +367,6 @@
     _claimed = InitialClaimValue;
   }
   zero_marked_bytes();
-  set_sort_index(-1);
 
   _offsets.resize(HeapRegion::GrainWords);
   init_top_at_mark_start();
@@ -388,10 +384,17 @@
 }
 
 void HeapRegion::calc_gc_efficiency() {
+  // GC efficiency is the ratio of how much space would be
+  // reclaimed over how long we predict it would take to reclaim it.
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
-  _gc_efficiency = (double) reclaimable_bytes() /
-                            g1p->predict_region_elapsed_time_ms(this, false);
+
+  // Retrieve a prediction of the elapsed time for this region for
+  // a mixed gc because the region will only be evacuated during a
+  // mixed gc.
+  double region_elapsed_time_ms =
+    g1p->predict_region_elapsed_time_ms(this, false /* for_young_gc */);
+  _gc_efficiency = (double) reclaimable_bytes() / region_elapsed_time_ms;
 }
 
 void HeapRegion::set_startsHumongous(HeapWord* new_top, HeapWord* new_end) {
@@ -482,17 +485,16 @@
 #endif // _MSC_VER
 
 
-HeapRegion::
-HeapRegion(size_t hrs_index, G1BlockOffsetSharedArray* sharedOffsetArray,
-           MemRegion mr, bool is_zeroed)
-  : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed),
+HeapRegion::HeapRegion(uint hrs_index,
+                       G1BlockOffsetSharedArray* sharedOffsetArray,
+                       MemRegion mr, bool is_zeroed) :
+    G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed),
     _hrs_index(hrs_index),
     _humongous_type(NotHumongous), _humongous_start_region(NULL),
     _in_collection_set(false),
     _next_in_special_set(NULL), _orig_end(NULL),
     _claimed(InitialClaimValue), _evacuation_failed(false),
-    _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1),
-    _gc_efficiency(0.0),
+    _prev_marked_bytes(0), _next_marked_bytes(0), _gc_efficiency(0.0),
     _young_type(NotYoung), _next_young_region(NULL),
     _next_dirty_cards_region(NULL), _next(NULL), _pending_removal(false),
 #ifdef ASSERT
@@ -512,40 +514,21 @@
   _rem_set =  new HeapRegionRemSet(sharedOffsetArray, this);
 
   assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
-  // In case the region is allocated during a pause, note the top.
-  // We haven't done any counting on a brand new region.
-  _top_at_conc_mark_count = bottom();
 }
 
-class NextCompactionHeapRegionClosure: public HeapRegionClosure {
-  const HeapRegion* _target;
-  bool _target_seen;
-  HeapRegion* _last;
-  CompactibleSpace* _res;
-public:
-  NextCompactionHeapRegionClosure(const HeapRegion* target) :
-    _target(target), _target_seen(false), _res(NULL) {}
-  bool doHeapRegion(HeapRegion* cur) {
-    if (_target_seen) {
-      if (!cur->isHumongous()) {
-        _res = cur;
-        return true;
-      }
-    } else if (cur == _target) {
-      _target_seen = true;
+CompactibleSpace* HeapRegion::next_compaction_space() const {
+  // We're not using an iterator given that it will wrap around when
+  // it reaches the last region and this is not what we want here.
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  uint index = hrs_index() + 1;
+  while (index < g1h->n_regions()) {
+    HeapRegion* hr = g1h->region_at(index);
+    if (!hr->isHumongous()) {
+      return hr;
     }
-    return false;
+    index += 1;
   }
-  CompactibleSpace* result() { return _res; }
-};
-
-CompactibleSpace* HeapRegion::next_compaction_space() const {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  // cast away const-ness
-  HeapRegion* r = (HeapRegion*) this;
-  NextCompactionHeapRegionClosure blk(r);
-  g1h->heap_region_iterate_from(r, &blk);
-  return blk.result();
+  return NULL;
 }
 
 void HeapRegion::save_marks() {
@@ -587,14 +570,12 @@
     // we find to be self-forwarded on the next bitmap. So all
     // objects need to be below NTAMS.
     _next_top_at_mark_start = top();
-    set_top_at_conc_mark_count(bottom());
     _next_marked_bytes = 0;
   } else if (during_conc_mark) {
     // During concurrent mark, all objects in the CSet (including
     // the ones we find to be self-forwarded) are implicitly live.
     // So all objects need to be above NTAMS.
     _next_top_at_mark_start = bottom();
-    set_top_at_conc_mark_count(bottom());
     _next_marked_bytes = 0;
   }
 }
@@ -779,16 +760,15 @@
   G1OffsetTableContigSpace::print_on(st);
 }
 
-void HeapRegion::verify(bool allow_dirty) const {
+void HeapRegion::verify() const {
   bool dummy = false;
-  verify(allow_dirty, VerifyOption_G1UsePrevMarking, /* failures */ &dummy);
+  verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy);
 }
 
 // This really ought to be commoned up into OffsetTableContigSpace somehow.
 // We would need a mechanism to make that code skip dead objects.
 
-void HeapRegion::verify(bool allow_dirty,
-                        VerifyOption vo,
+void HeapRegion::verify(VerifyOption vo,
                         bool* failures) const {
   G1CollectedHeap* g1 = G1CollectedHeap::heap();
   *failures = false;
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -52,12 +52,18 @@
 class HeapRegion;
 class HeapRegionSetBase;
 
-#define HR_FORMAT SIZE_FORMAT":(%s)["PTR_FORMAT","PTR_FORMAT","PTR_FORMAT"]"
+#define HR_FORMAT "%u:(%s)["PTR_FORMAT","PTR_FORMAT","PTR_FORMAT"]"
 #define HR_FORMAT_PARAMS(_hr_) \
                 (_hr_)->hrs_index(), \
-                (_hr_)->is_survivor() ? "S" : (_hr_)->is_young() ? "E" : "-", \
+                (_hr_)->is_survivor() ? "S" : (_hr_)->is_young() ? "E" : \
+                (_hr_)->startsHumongous() ? "HS" : \
+                (_hr_)->continuesHumongous() ? "HC" : \
+                !(_hr_)->is_empty() ? "O" : "F", \
                 (_hr_)->bottom(), (_hr_)->top(), (_hr_)->end()
 
+// sentinel value for hrs_index
+#define G1_NULL_HRS_INDEX ((uint) -1)
+
 // A dirty card to oop closure for heap regions. It
 // knows how to get the G1 heap and how to use the bitmap
 // in the concurrent marker used by G1 to filter remembered
@@ -170,6 +176,7 @@
   virtual HeapWord* saved_mark_word() const;
   virtual void set_saved_mark();
   void reset_gc_time_stamp() { _gc_time_stamp = 0; }
+  unsigned get_gc_time_stamp() { return _gc_time_stamp; }
 
   // See the comment above in the declaration of _pre_dummy_top for an
   // explanation of what it is.
@@ -235,7 +242,7 @@
 
  protected:
   // The index of this region in the heap region sequence.
-  size_t  _hrs_index;
+  uint  _hrs_index;
 
   HumongousType _humongous_type;
   // For a humongous region, region in which it starts.
@@ -278,12 +285,8 @@
   size_t _prev_marked_bytes;    // Bytes known to be live via last completed marking.
   size_t _next_marked_bytes;    // Bytes known to be live via in-progress marking.
 
-  // See "sort_index" method.  -1 means is not in the array.
-  int _sort_index;
-
-  // <PREDICTION>
+  // The calculated GC efficiency of the region.
   double _gc_efficiency;
-  // </PREDICTION>
 
   enum YoungType {
     NotYoung,                   // a region is not young
@@ -307,9 +310,6 @@
   // If a collection pause is in progress, this is the top at the start
   // of that pause.
 
-  // We've counted the marked bytes of objects below here.
-  HeapWord* _top_at_conc_mark_count;
-
   void init_top_at_mark_start() {
     assert(_prev_marked_bytes == 0 &&
            _next_marked_bytes == 0,
@@ -317,7 +317,6 @@
     HeapWord* bot = bottom();
     _prev_top_at_mark_start = bot;
     _next_top_at_mark_start = bot;
-    _top_at_conc_mark_count = bot;
   }
 
   void set_young_type(YoungType new_type) {
@@ -342,7 +341,7 @@
 
  public:
   // If "is_zeroed" is "true", the region "mr" can be assumed to contain zeros.
-  HeapRegion(size_t hrs_index,
+  HeapRegion(uint hrs_index,
              G1BlockOffsetSharedArray* sharedOffsetArray,
              MemRegion mr, bool is_zeroed);
 
@@ -373,10 +372,9 @@
     ScrubRemSetClaimValue      = 3,
     ParVerifyClaimValue        = 4,
     RebuildRSClaimValue        = 5,
-    CompleteMarkCSetClaimValue = 6,
-    ParEvacFailureClaimValue   = 7,
-    AggregateCountClaimValue   = 8,
-    VerifyCountClaimValue      = 9
+    ParEvacFailureClaimValue   = 6,
+    AggregateCountClaimValue   = 7,
+    VerifyCountClaimValue      = 8
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@@ -390,7 +388,7 @@
 
   // If this region is a member of a HeapRegionSeq, the index in that
   // sequence, otherwise -1.
-  size_t hrs_index() const { return _hrs_index; }
+  uint hrs_index() const { return _hrs_index; }
 
   // The number of bytes marked live in the region in the last marking phase.
   size_t marked_bytes()    { return _prev_marked_bytes; }
@@ -445,6 +443,25 @@
     return _humongous_start_region;
   }
 
+  // Return the number of distinct regions that are covered by this region:
+  // 1 if the region is not humongous, >= 1 if the region is humongous.
+  uint region_num() const {
+    if (!isHumongous()) {
+      return 1U;
+    } else {
+      assert(startsHumongous(), "doesn't make sense on HC regions");
+      assert(capacity() % HeapRegion::GrainBytes == 0, "sanity");
+      return (uint) (capacity() >> HeapRegion::LogOfHRGrainBytes);
+    }
+  }
+
+  // Return the index + 1 of the last HC regions that's associated
+  // with this HS region.
+  uint last_hc_index() const {
+    assert(startsHumongous(), "don't call this otherwise");
+    return hrs_index() + region_num();
+  }
+
   // Same as Space::is_in_reserved, but will use the original size of the region.
   // The original size is different only for start humongous regions. They get
   // their _end set up to be the end of the last continues region of the
@@ -627,32 +644,9 @@
   // last mark phase ended.
   bool is_marked() { return _prev_top_at_mark_start != bottom(); }
 
-  // If "is_marked()" is true, then this is the index of the region in
-  // an array constructed at the end of marking of the regions in a
-  // "desirability" order.
-  int sort_index() {
-    return _sort_index;
-  }
-  void set_sort_index(int i) {
-    _sort_index = i;
-  }
-
-  void init_top_at_conc_mark_count() {
-    _top_at_conc_mark_count = bottom();
-  }
-
-  void set_top_at_conc_mark_count(HeapWord *cur) {
-    assert(bottom() <= cur && cur <= end(), "Sanity.");
-    _top_at_conc_mark_count = cur;
-  }
-
-  HeapWord* top_at_conc_mark_count() {
-    return _top_at_conc_mark_count;
-  }
-
   void reset_during_compaction() {
-    guarantee( isHumongous() && startsHumongous(),
-               "should only be called for humongous regions");
+    assert(isHumongous() && startsHumongous(),
+           "should only be called for starts humongous regions");
 
     zero_marked_bytes();
     init_top_at_mark_start();
@@ -745,7 +739,6 @@
     _evacuation_failed = b;
 
     if (b) {
-      init_top_at_conc_mark_count();
       _next_marked_bytes = 0;
     }
   }
@@ -804,7 +797,7 @@
   virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
   SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
 
-  CompactibleSpace* next_compaction_space() const;
+  virtual CompactibleSpace* next_compaction_space() const;
 
   virtual void reset_after_compaction();
 
@@ -824,10 +817,10 @@
   // Currently there is only one place where this is called with
   // vo == UseMarkWord, which is to verify the marking during a
   // full GC.
-  void verify(bool allow_dirty, VerifyOption vo, bool *failures) const;
+  void verify(VerifyOption vo, bool *failures) const;
 
   // Override; it uses the "prev" marking information
-  virtual void verify(bool allow_dirty) const;
+  virtual void verify() const;
 };
 
 // HeapRegionClosure is used for iterating over regions.
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -56,7 +56,6 @@
 }
 
 inline void HeapRegion::note_start_of_marking() {
-  init_top_at_conc_mark_count();
   _next_marked_bytes = 0;
   _next_top_at_mark_start = top();
 }
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,102 +30,54 @@
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "memory/allocation.hpp"
 #include "memory/space.inline.hpp"
+#include "oops/oop.inline.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
 
-#define HRRS_VERBOSE 0
-
-#define PRT_COUNT_OCCUPIED 1
-
-// OtherRegionsTable
-
-class PerRegionTable: public CHeapObj {
+class PerRegionTable: public CHeapObj<mtGC> {
   friend class OtherRegionsTable;
   friend class HeapRegionRemSetIterator;
 
   HeapRegion*     _hr;
   BitMap          _bm;
-#if PRT_COUNT_OCCUPIED
   jint            _occupied;
-#endif
-  PerRegionTable* _next_free;
+
+  // next pointer for free/allocated 'all' list
+  PerRegionTable* _next;
 
-  PerRegionTable* next_free() { return _next_free; }
-  void set_next_free(PerRegionTable* prt) { _next_free = prt; }
+  // prev pointer for the allocated 'all' list
+  PerRegionTable* _prev;
 
+  // next pointer in collision list
+  PerRegionTable * _collision_list_next;
 
+  // Global free list of PRTs
   static PerRegionTable* _free_list;
 
-#ifdef _MSC_VER
-  // For some reason even though the classes are marked as friend they are unable
-  // to access CardsPerRegion when private/protected. Only the windows c++ compiler
-  // says this Sun CC and linux gcc don't have a problem with access when private
-
-  public:
-
-#endif // _MSC_VER
-
 protected:
   // We need access in order to union things into the base table.
   BitMap* bm() { return &_bm; }
 
-#if PRT_COUNT_OCCUPIED
   void recount_occupied() {
     _occupied = (jint) bm()->count_one_bits();
   }
-#endif
 
   PerRegionTable(HeapRegion* hr) :
     _hr(hr),
-#if PRT_COUNT_OCCUPIED
     _occupied(0),
-#endif
-    _bm(HeapRegion::CardsPerRegion, false /* in-resource-area */)
+    _bm(HeapRegion::CardsPerRegion, false /* in-resource-area */),
+    _collision_list_next(NULL), _next(NULL), _prev(NULL)
   {}
 
-  static void free(PerRegionTable* prt) {
-    while (true) {
-      PerRegionTable* fl = _free_list;
-      prt->set_next_free(fl);
-      PerRegionTable* res =
-        (PerRegionTable*)
-        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
-      if (res == fl) return;
-    }
-    ShouldNotReachHere();
-  }
-
-  static PerRegionTable* alloc(HeapRegion* hr) {
-    PerRegionTable* fl = _free_list;
-    while (fl != NULL) {
-      PerRegionTable* nxt = fl->next_free();
-      PerRegionTable* res =
-        (PerRegionTable*)
-        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
-      if (res == fl) {
-        fl->init(hr);
-        return fl;
-      } else {
-        fl = _free_list;
-      }
-    }
-    assert(fl == NULL, "Loop condition.");
-    return new PerRegionTable(hr);
-  }
-
   void add_card_work(CardIdx_t from_card, bool par) {
     if (!_bm.at(from_card)) {
       if (par) {
         if (_bm.par_at_put(from_card, 1)) {
-#if PRT_COUNT_OCCUPIED
           Atomic::inc(&_occupied);
-#endif
         }
       } else {
         _bm.at_put(from_card, 1);
-#if PRT_COUNT_OCCUPIED
         _occupied++;
-#endif
       }
     }
   }
@@ -134,10 +86,13 @@
     // Must make this robust in case "from" is not in "_hr", because of
     // concurrency.
 
-#if HRRS_VERBOSE
-    gclog_or_tty->print_cr("    PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").",
-                           from, *from);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print_cr("    PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").",
+                             from,
+                             UseCompressedOops
+                             ? oopDesc::load_decode_heap_oop((narrowOop*)from)
+                             : oopDesc::load_decode_heap_oop((oop*)from));
+    }
 
     HeapRegion* loc_hr = hr();
     // If the test below fails, then this table was reused concurrently
@@ -162,23 +117,20 @@
 
   HeapRegion* hr() const { return _hr; }
 
-#if PRT_COUNT_OCCUPIED
   jint occupied() const {
     // Overkill, but if we ever need it...
     // guarantee(_occupied == _bm.count_one_bits(), "Check");
     return _occupied;
   }
-#else
-  jint occupied() const {
-    return _bm.count_one_bits();
-  }
-#endif
 
-  void init(HeapRegion* hr) {
+  void init(HeapRegion* hr, bool clear_links_to_all_list) {
+    if (clear_links_to_all_list) {
+      set_next(NULL);
+      set_prev(NULL);
+    }
     _hr = hr;
-#if PRT_COUNT_OCCUPIED
+    _collision_list_next = NULL;
     _occupied = 0;
-#endif
     _bm.clear();
   }
 
@@ -194,9 +146,7 @@
     HeapWord* hr_bot = hr()->bottom();
     size_t hr_first_card_index = ctbs->index_for(hr_bot);
     bm()->set_intersection_at_offset(*card_bm, hr_first_card_index);
-#if PRT_COUNT_OCCUPIED
     recount_occupied();
-#endif
   }
 
   void add_card(CardIdx_t from_card_index) {
@@ -218,16 +168,6 @@
     return sizeof(this) + _bm.size_in_words() * HeapWordSize;
   }
 
-  static size_t fl_mem_size() {
-    PerRegionTable* cur = _free_list;
-    size_t res = 0;
-    while (cur != NULL) {
-      res += sizeof(PerRegionTable);
-      cur = cur->next_free();
-    }
-    return res;
-  }
-
   // Requires "from" to be in "hr()".
   bool contains_reference(OopOrNarrowOopStar from) const {
     assert(hr()->is_in_reserved(from), "Precondition.");
@@ -235,273 +175,86 @@
                                     CardTableModRefBS::card_size);
     return _bm.at(card_ind);
   }
-};
 
-PerRegionTable* PerRegionTable::_free_list = NULL;
-
-
-#define COUNT_PAR_EXPANDS 0
-
-#if COUNT_PAR_EXPANDS
-static jint n_par_expands = 0;
-static jint n_par_contracts = 0;
-static jint par_expand_list_len = 0;
-static jint max_par_expand_list_len = 0;
-
-static void print_par_expand() {
-  Atomic::inc(&n_par_expands);
-  Atomic::inc(&par_expand_list_len);
-  if (par_expand_list_len > max_par_expand_list_len) {
-    max_par_expand_list_len = par_expand_list_len;
-  }
-  if ((n_par_expands % 10) == 0) {
-    gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, "
-                  "len = %d, max_len = %d\n.",
-                  n_par_expands, n_par_contracts, par_expand_list_len,
-                  max_par_expand_list_len);
-  }
-}
-#endif
-
-class PosParPRT: public PerRegionTable {
-  PerRegionTable** _par_tables;
-
-  enum SomePrivateConstants {
-    ReserveParTableExpansion = 1
-  };
-
-  void par_contract() {
-    assert(_par_tables != NULL, "Precondition.");
-    int n = HeapRegionRemSet::num_par_rem_sets()-1;
-    for (int i = 0; i < n; i++) {
-      _par_tables[i]->union_bitmap_into(bm());
-      PerRegionTable::free(_par_tables[i]);
-      _par_tables[i] = NULL;
-    }
-#if PRT_COUNT_OCCUPIED
-    // We must recount the "occupied."
-    recount_occupied();
-#endif
-    FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables);
-    _par_tables = NULL;
-#if COUNT_PAR_EXPANDS
-    Atomic::inc(&n_par_contracts);
-    Atomic::dec(&par_expand_list_len);
-#endif
-  }
-
-  static PerRegionTable** _par_table_fl;
-
-  PosParPRT* _next;
-
-  static PosParPRT* _free_list;
-
-  PerRegionTable** par_tables() const {
-    assert(uintptr_t(NULL) == 0, "Assumption.");
-    if (uintptr_t(_par_tables) <= ReserveParTableExpansion)
-      return NULL;
-    else
-      return _par_tables;
-  }
-
-  PosParPRT* _next_par_expanded;
-  PosParPRT* next_par_expanded() { return _next_par_expanded; }
-  void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; }
-  static PosParPRT* _par_expanded_list;
-
-public:
-
-  PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {}
-
-  jint occupied() const {
-    jint res = PerRegionTable::occupied();
-    if (par_tables() != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        res += par_tables()[i]->occupied();
+  // Bulk-free the PRTs from prt to last, assumes that they are
+  // linked together using their _next field.
+  static void bulk_free(PerRegionTable* prt, PerRegionTable* last) {
+    while (true) {
+      PerRegionTable* fl = _free_list;
+      last->set_next(fl);
+      PerRegionTable* res = (PerRegionTable*) Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+      if (res == fl) {
+        return;
       }
     }
-    return res;
-  }
-
-  void init(HeapRegion* hr) {
-    PerRegionTable::init(hr);
-    _next = NULL;
-    if (par_tables() != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        par_tables()[i]->init(hr);
-      }
-    }
-  }
-
-  static void free(PosParPRT* prt) {
-    while (true) {
-      PosParPRT* fl = _free_list;
-      prt->set_next(fl);
-      PosParPRT* res =
-        (PosParPRT*)
-        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
-      if (res == fl) return;
-    }
     ShouldNotReachHere();
   }
 
-  static PosParPRT* alloc(HeapRegion* hr) {
-    PosParPRT* fl = _free_list;
+  static void free(PerRegionTable* prt) {
+    bulk_free(prt, prt);
+  }
+
+  // Returns an initialized PerRegionTable instance.
+  static PerRegionTable* alloc(HeapRegion* hr) {
+    PerRegionTable* fl = _free_list;
     while (fl != NULL) {
-      PosParPRT* nxt = fl->next();
-      PosParPRT* res =
-        (PosParPRT*)
+      PerRegionTable* nxt = fl->next();
+      PerRegionTable* res =
+        (PerRegionTable*)
         Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
       if (res == fl) {
-        fl->init(hr);
+        fl->init(hr, true);
         return fl;
       } else {
         fl = _free_list;
       }
     }
     assert(fl == NULL, "Loop condition.");
-    return new PosParPRT(hr);
-  }
-
-  PosParPRT* next() const { return _next; }
-  void set_next(PosParPRT* nxt) { _next = nxt; }
-  PosParPRT** next_addr() { return &_next; }
-
-  bool should_expand(int tid) {
-    // Given that we now defer RSet updates for after a GC we don't
-    // really need to expand the tables any more. This code should be
-    // cleaned up in the future (see CR 6921087).
-    return false;
+    return new PerRegionTable(hr);
   }
 
-  void par_expand() {
-    int n = HeapRegionRemSet::num_par_rem_sets()-1;
-    if (n <= 0) return;
-    if (_par_tables == NULL) {
-      PerRegionTable* res =
-        (PerRegionTable*)
-        Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion,
-                            &_par_tables, NULL);
-      if (res != NULL) return;
-      // Otherwise, we reserved the right to do the expansion.
+  PerRegionTable* next() const { return _next; }
+  void set_next(PerRegionTable* next) { _next = next; }
+  PerRegionTable* prev() const { return _prev; }
+  void set_prev(PerRegionTable* prev) { _prev = prev; }
 
-      PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n);
-      for (int i = 0; i < n; i++) {
-        PerRegionTable* ptable = PerRegionTable::alloc(hr());
-        ptables[i] = ptable;
-      }
-      // Here we do not need an atomic.
-      _par_tables = ptables;
-#if COUNT_PAR_EXPANDS
-      print_par_expand();
-#endif
-      // We must put this table on the expanded list.
-      PosParPRT* exp_head = _par_expanded_list;
-      while (true) {
-        set_next_par_expanded(exp_head);
-        PosParPRT* res =
-          (PosParPRT*)
-          Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head);
-        if (res == exp_head) return;
-        // Otherwise.
-        exp_head = res;
-      }
-      ShouldNotReachHere();
-    }
+  // Accessor and Modification routines for the pointer for the
+  // singly linked collision list that links the PRTs within the
+  // OtherRegionsTable::_fine_grain_regions hash table.
+  //
+  // It might be useful to also make the collision list doubly linked
+  // to avoid iteration over the collisions list during scrubbing/deletion.
+  // OTOH there might not be many collisions.
+
+  PerRegionTable* collision_list_next() const {
+    return _collision_list_next;
   }
 
-  void add_reference(OopOrNarrowOopStar from, int tid) {
-    // Expand if necessary.
-    PerRegionTable** pt = par_tables();
-    if (pt != NULL) {
-      // We always have to assume that mods to table 0 are in parallel,
-      // because of the claiming scheme in parallel expansion.  A thread
-      // with tid != 0 that finds the table to be NULL, but doesn't succeed
-      // in claiming the right of expanding it, will end up in the else
-      // clause of the above if test.  That thread could be delayed, and a
-      // thread 0 add reference could see the table expanded, and come
-      // here.  Both threads would be adding in parallel.  But we get to
-      // not use atomics for tids > 0.
-      if (tid == 0) {
-        PerRegionTable::add_reference(from);
-      } else {
-        pt[tid-1]->seq_add_reference(from);
-      }
-    } else {
-      // Not expanded -- add to the base table.
-      PerRegionTable::add_reference(from);
-    }
+  void set_collision_list_next(PerRegionTable* next) {
+    _collision_list_next = next;
   }
 
-  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
-    assert(_par_tables == NULL, "Precondition");
-    PerRegionTable::scrub(ctbs, card_bm);
-  }
-
-  size_t mem_size() const {
-    size_t res =
-      PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable);
-    if (_par_tables != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        res += _par_tables[i]->mem_size();
-      }
-    }
-    return res;
+  PerRegionTable** collision_list_next_addr() {
+    return &_collision_list_next;
   }
 
   static size_t fl_mem_size() {
-    PosParPRT* cur = _free_list;
+    PerRegionTable* cur = _free_list;
     size_t res = 0;
     while (cur != NULL) {
-      res += sizeof(PosParPRT);
+      res += sizeof(PerRegionTable);
       cur = cur->next();
     }
     return res;
   }
-
-  bool contains_reference(OopOrNarrowOopStar from) const {
-    if (PerRegionTable::contains_reference(from)) return true;
-    if (_par_tables != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        if (_par_tables[i]->contains_reference(from)) return true;
-      }
-    }
-    return false;
-  }
-
-  static void par_contract_all();
 };
 
-void PosParPRT::par_contract_all() {
-  PosParPRT* hd = _par_expanded_list;
-  while (hd != NULL) {
-    PosParPRT* nxt = hd->next_par_expanded();
-    PosParPRT* res =
-      (PosParPRT*)
-      Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd);
-    if (res == hd) {
-      // We claimed the right to contract this table.
-      hd->set_next_par_expanded(NULL);
-      hd->par_contract();
-      hd = _par_expanded_list;
-    } else {
-      hd = res;
-    }
-  }
-}
-
-PosParPRT* PosParPRT::_free_list = NULL;
-PosParPRT* PosParPRT::_par_expanded_list = NULL;
-
-jint OtherRegionsTable::_cache_probes = 0;
-jint OtherRegionsTable::_cache_hits = 0;
+PerRegionTable* PerRegionTable::_free_list = NULL;
 
 size_t OtherRegionsTable::_max_fine_entries = 0;
 size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0;
-#if SAMPLE_FOR_EVICTION
 size_t OtherRegionsTable::_fine_eviction_stride = 0;
 size_t OtherRegionsTable::_fine_eviction_sample_size = 0;
-#endif
 
 OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
   _g1h(G1CollectedHeap::heap()),
@@ -510,35 +263,98 @@
   _coarse_map(G1CollectedHeap::heap()->max_regions(),
               false /* in-resource-area */),
   _fine_grain_regions(NULL),
+  _first_all_fine_prts(NULL), _last_all_fine_prts(NULL),
   _n_fine_entries(0), _n_coarse_entries(0),
-#if SAMPLE_FOR_EVICTION
   _fine_eviction_start(0),
-#endif
   _sparse_table(hr)
 {
-  typedef PosParPRT* PosParPRTPtr;
+  typedef PerRegionTable* PerRegionTablePtr;
+
   if (_max_fine_entries == 0) {
     assert(_mod_max_fine_entries_mask == 0, "Both or none.");
     size_t max_entries_log = (size_t)log2_long((jlong)G1RSetRegionEntries);
-    _max_fine_entries = (size_t)(1 << max_entries_log);
+    _max_fine_entries = (size_t)1 << max_entries_log;
     _mod_max_fine_entries_mask = _max_fine_entries - 1;
-#if SAMPLE_FOR_EVICTION
+
     assert(_fine_eviction_sample_size == 0
            && _fine_eviction_stride == 0, "All init at same time.");
     _fine_eviction_sample_size = MAX2((size_t)4, max_entries_log);
     _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size;
-#endif
   }
-  _fine_grain_regions = new PosParPRTPtr[_max_fine_entries];
-  if (_fine_grain_regions == NULL)
+
+  _fine_grain_regions = new PerRegionTablePtr[_max_fine_entries];
+
+  if (_fine_grain_regions == NULL) {
     vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries,
                           "Failed to allocate _fine_grain_entries.");
+  }
+
   for (size_t i = 0; i < _max_fine_entries; i++) {
     _fine_grain_regions[i] = NULL;
   }
 }
 
-int** OtherRegionsTable::_from_card_cache = NULL;
+void OtherRegionsTable::link_to_all(PerRegionTable* prt) {
+  // We always append to the beginning of the list for convenience;
+  // the order of entries in this list does not matter.
+  if (_first_all_fine_prts != NULL) {
+    assert(_first_all_fine_prts->prev() == NULL, "invariant");
+    _first_all_fine_prts->set_prev(prt);
+    prt->set_next(_first_all_fine_prts);
+  } else {
+    // this is the first element we insert. Adjust the "last" pointer
+    _last_all_fine_prts = prt;
+    assert(prt->next() == NULL, "just checking");
+  }
+  // the new element is always the first element without a predecessor
+  prt->set_prev(NULL);
+  _first_all_fine_prts = prt;
+
+  assert(prt->prev() == NULL, "just checking");
+  assert(_first_all_fine_prts == prt, "just checking");
+  assert((_first_all_fine_prts == NULL && _last_all_fine_prts == NULL) ||
+         (_first_all_fine_prts != NULL && _last_all_fine_prts != NULL),
+         "just checking");
+  assert(_last_all_fine_prts == NULL || _last_all_fine_prts->next() == NULL,
+         "just checking");
+  assert(_first_all_fine_prts == NULL || _first_all_fine_prts->prev() == NULL,
+         "just checking");
+}
+
+void OtherRegionsTable::unlink_from_all(PerRegionTable* prt) {
+  if (prt->prev() != NULL) {
+    assert(_first_all_fine_prts != prt, "just checking");
+    prt->prev()->set_next(prt->next());
+    // removing the last element in the list?
+    if (_last_all_fine_prts == prt) {
+      _last_all_fine_prts = prt->prev();
+    }
+  } else {
+    assert(_first_all_fine_prts == prt, "just checking");
+    _first_all_fine_prts = prt->next();
+    // list is empty now?
+    if (_first_all_fine_prts == NULL) {
+      _last_all_fine_prts = NULL;
+    }
+  }
+
+  if (prt->next() != NULL) {
+    prt->next()->set_prev(prt->prev());
+  }
+
+  prt->set_next(NULL);
+  prt->set_prev(NULL);
+
+  assert((_first_all_fine_prts == NULL && _last_all_fine_prts == NULL) ||
+         (_first_all_fine_prts != NULL && _last_all_fine_prts != NULL),
+         "just checking");
+  assert(_last_all_fine_prts == NULL || _last_all_fine_prts->next() == NULL,
+         "just checking");
+  assert(_first_all_fine_prts == NULL || _first_all_fine_prts->prev() == NULL,
+         "just checking");
+}
+
+int**  OtherRegionsTable::_from_card_cache = NULL;
 size_t OtherRegionsTable::_from_card_cache_max_regions = 0;
 size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
 
@@ -546,9 +362,9 @@
   _from_card_cache_max_regions = max_regions;
 
   int n_par_rs = HeapRegionRemSet::num_par_rem_sets();
-  _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs);
+  _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs, mtGC);
   for (int i = 0; i < n_par_rs; i++) {
-    _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions);
+    _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions, mtGC);
     for (size_t j = 0; j < max_regions; j++) {
       _from_card_cache[i][j] = -1;  // An invalid value.
     }
@@ -577,40 +393,28 @@
 #endif
 
 void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
-  size_t cur_hrs_ind = hr()->hrs_index();
+  size_t cur_hrs_ind = (size_t) hr()->hrs_index();
 
-#if HRRS_VERBOSE
-  gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
-                                                  from,
-                                                  UseCompressedOops
-                                                  ? oopDesc::load_decode_heap_oop((narrowOop*)from)
-                                                  : oopDesc::load_decode_heap_oop((oop*)from));
-#endif
+  if (G1TraceHeapRegionRememberedSet) {
+    gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
+                                                    from,
+                                                    UseCompressedOops
+                                                    ? oopDesc::load_decode_heap_oop((narrowOop*)from)
+                                                    : oopDesc::load_decode_heap_oop((oop*)from));
+  }
 
   int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
 
-#if HRRS_VERBOSE
-  gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
-                hr()->bottom(), from_card,
-                _from_card_cache[tid][cur_hrs_ind]);
-#endif
+  if (G1TraceHeapRegionRememberedSet) {
+    gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
+                  hr()->bottom(), from_card,
+                  _from_card_cache[tid][cur_hrs_ind]);
+  }
 
-#define COUNT_CACHE 0
-#if COUNT_CACHE
-  jint p = Atomic::add(1, &_cache_probes);
-  if ((p % 10000) == 0) {
-    jint hits = _cache_hits;
-    gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.",
-                  _cache_hits, p, 100.0* (float)hits/(float)p);
-  }
-#endif
   if (from_card == _from_card_cache[tid][cur_hrs_ind]) {
-#if HRRS_VERBOSE
-    gclog_or_tty->print_cr("  from-card cache hit.");
-#endif
-#if COUNT_CACHE
-    Atomic::inc(&_cache_hits);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print_cr("  from-card cache hit.");
+    }
     assert(contains_reference(from), "We just added it!");
     return;
   } else {
@@ -623,16 +427,16 @@
 
   // If the region is already coarsened, return.
   if (_coarse_map.at(from_hrs_ind)) {
-#if HRRS_VERBOSE
-    gclog_or_tty->print_cr("  coarse map hit.");
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print_cr("  coarse map hit.");
+    }
     assert(contains_reference(from), "We just added it!");
     return;
   }
 
   // Otherwise find a per-region table to add it to.
   size_t ind = from_hrs_ind & _mod_max_fine_entries_mask;
-  PosParPRT* prt = find_region_table(ind, from_hr);
+  PerRegionTable* prt = find_region_table(ind, from_hr);
   if (prt == NULL) {
     MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
     // Confirm that it's really not there...
@@ -649,36 +453,39 @@
           _sparse_table.add_card(from_hrs_ind, card_index)) {
         if (G1RecordHRRSOops) {
           HeapRegionRemSet::record(hr(), from);
-#if HRRS_VERBOSE
-          gclog_or_tty->print("   Added card " PTR_FORMAT " to region "
-                              "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
-                              align_size_down(uintptr_t(from),
-                                              CardTableModRefBS::card_size),
-                              hr()->bottom(), from);
-#endif
+          if (G1TraceHeapRegionRememberedSet) {
+            gclog_or_tty->print("   Added card " PTR_FORMAT " to region "
+                                "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                                align_size_down(uintptr_t(from),
+                                                CardTableModRefBS::card_size),
+                                hr()->bottom(), from);
+          }
         }
-#if HRRS_VERBOSE
-        gclog_or_tty->print_cr("   added card to sparse table.");
-#endif
+        if (G1TraceHeapRegionRememberedSet) {
+          gclog_or_tty->print_cr("   added card to sparse table.");
+        }
         assert(contains_reference_locked(from), "We just added it!");
         return;
       } else {
-#if HRRS_VERBOSE
-        gclog_or_tty->print_cr("   [tid %d] sparse table entry "
-                      "overflow(f: %d, t: %d)",
-                      tid, from_hrs_ind, cur_hrs_ind);
-#endif
+        if (G1TraceHeapRegionRememberedSet) {
+          gclog_or_tty->print_cr("   [tid %d] sparse table entry "
+                        "overflow(f: %d, t: %d)",
+                        tid, from_hrs_ind, cur_hrs_ind);
+        }
       }
 
       if (_n_fine_entries == _max_fine_entries) {
         prt = delete_region_table();
+        // There is no need to clear the links to the 'all' list here:
+        // prt will be reused immediately, i.e. remain in the 'all' list.
+        prt->init(from_hr, false /* clear_links_to_all_list */);
       } else {
-        prt = PosParPRT::alloc(from_hr);
+        prt = PerRegionTable::alloc(from_hr);
+        link_to_all(prt);
       }
-      prt->init(from_hr);
 
-      PosParPRT* first_prt = _fine_grain_regions[ind];
-      prt->set_next(first_prt);  // XXX Maybe move to init?
+      PerRegionTable* first_prt = _fine_grain_regions[ind];
+      prt->set_collision_list_next(first_prt);
       _fine_grain_regions[ind] = prt;
       _n_fine_entries++;
 
@@ -704,71 +511,42 @@
   // OtherRegionsTable for why this is OK.
   assert(prt != NULL, "Inv");
 
-  if (prt->should_expand(tid)) {
-    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
-    HeapRegion* prt_hr = prt->hr();
-    if (prt_hr == from_hr) {
-      // Make sure the table still corresponds to the same region
-      prt->par_expand();
-      prt->add_reference(from, tid);
-    }
-    // else: The table has been concurrently coarsened, evicted, and
-    // the table data structure re-used for another table. So, we
-    // don't need to add the reference any more given that the table
-    // has been coarsened and the whole region will be scanned anyway.
-  } else {
-    prt->add_reference(from, tid);
-  }
+  prt->add_reference(from);
+
   if (G1RecordHRRSOops) {
     HeapRegionRemSet::record(hr(), from);
-#if HRRS_VERBOSE
-    gclog_or_tty->print("Added card " PTR_FORMAT " to region "
-                        "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
-                        align_size_down(uintptr_t(from),
-                                        CardTableModRefBS::card_size),
-                        hr()->bottom(), from);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print("Added card " PTR_FORMAT " to region "
+                          "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                          align_size_down(uintptr_t(from),
+                                          CardTableModRefBS::card_size),
+                          hr()->bottom(), from);
+    }
   }
   assert(contains_reference(from), "We just added it!");
 }
 
-PosParPRT*
+PerRegionTable*
 OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const {
   assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
-  PosParPRT* prt = _fine_grain_regions[ind];
+  PerRegionTable* prt = _fine_grain_regions[ind];
   while (prt != NULL && prt->hr() != hr) {
-    prt = prt->next();
+    prt = prt->collision_list_next();
   }
   // Loop postcondition is the method postcondition.
   return prt;
 }
 
-
-#define DRT_CENSUS 0
-
-#if DRT_CENSUS
-static const int HistoSize = 6;
-static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
-static int coarsenings = 0;
-static int occ_sum = 0;
-#endif
-
 jint OtherRegionsTable::_n_coarsenings = 0;
 
-PosParPRT* OtherRegionsTable::delete_region_table() {
-#if DRT_CENSUS
-  int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
-  const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 };
-#endif
-
+PerRegionTable* OtherRegionsTable::delete_region_table() {
   assert(_m.owned_by_self(), "Precondition");
   assert(_n_fine_entries == _max_fine_entries, "Precondition");
-  PosParPRT* max = NULL;
+  PerRegionTable* max = NULL;
   jint max_occ = 0;
-  PosParPRT** max_prev;
+  PerRegionTable** max_prev;
   size_t max_ind;
 
-#if SAMPLE_FOR_EVICTION
   size_t i = _fine_eviction_start;
   for (size_t k = 0; k < _fine_eviction_sample_size; k++) {
     size_t ii = i;
@@ -778,8 +556,8 @@
       if (ii == _max_fine_entries) ii = 0;
       guarantee(ii != i, "We must find one.");
     }
-    PosParPRT** prev = &_fine_grain_regions[ii];
-    PosParPRT* cur = *prev;
+    PerRegionTable** prev = &_fine_grain_regions[ii];
+    PerRegionTable* cur = *prev;
     while (cur != NULL) {
       jint cur_occ = cur->occupied();
       if (max == NULL || cur_occ > max_occ) {
@@ -788,74 +566,37 @@
         max_ind = i;
         max_occ = cur_occ;
       }
-      prev = cur->next_addr();
-      cur = cur->next();
+      prev = cur->collision_list_next_addr();
+      cur = cur->collision_list_next();
     }
     i = i + _fine_eviction_stride;
     if (i >= _n_fine_entries) i = i - _n_fine_entries;
   }
+
   _fine_eviction_start++;
-  if (_fine_eviction_start >= _n_fine_entries)
+
+  if (_fine_eviction_start >= _n_fine_entries) {
     _fine_eviction_start -= _n_fine_entries;
-#else
-  for (int i = 0; i < _max_fine_entries; i++) {
-    PosParPRT** prev = &_fine_grain_regions[i];
-    PosParPRT* cur = *prev;
-    while (cur != NULL) {
-      jint cur_occ = cur->occupied();
-#if DRT_CENSUS
-      for (int k = 0; k < HistoSize; k++) {
-        if (cur_occ <= histo_limits[k]) {
-          histo[k]++; global_histo[k]++; break;
-        }
-      }
-#endif
-      if (max == NULL || cur_occ > max_occ) {
-        max = cur;
-        max_prev = prev;
-        max_ind = i;
-        max_occ = cur_occ;
-      }
-      prev = cur->next_addr();
-      cur = cur->next();
-    }
   }
-#endif
-  // XXX
+
   guarantee(max != NULL, "Since _n_fine_entries > 0");
-#if DRT_CENSUS
-  gclog_or_tty->print_cr("In a coarsening: histo of occs:");
-  for (int k = 0; k < HistoSize; k++) {
-    gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], histo[k]);
-  }
-  coarsenings++;
-  occ_sum += max_occ;
-  if ((coarsenings % 100) == 0) {
-    gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings);
-    for (int k = 0; k < HistoSize; k++) {
-      gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], global_histo[k]);
-    }
-    gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.",
-                  (float)occ_sum/(float)coarsenings);
-  }
-#endif
 
   // Set the corresponding coarse bit.
-  size_t max_hrs_index = max->hr()->hrs_index();
+  size_t max_hrs_index = (size_t) max->hr()->hrs_index();
   if (!_coarse_map.at(max_hrs_index)) {
     _coarse_map.at_put(max_hrs_index, true);
     _n_coarse_entries++;
-#if 0
-    gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] "
-               "for region [" PTR_FORMAT "...] (%d coarse entries).\n",
-               hr()->bottom(),
-               max->hr()->bottom(),
-               _n_coarse_entries);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] "
+                 "for region [" PTR_FORMAT "...] (%d coarse entries).\n",
+                 hr()->bottom(),
+                 max->hr()->bottom(),
+                 _n_coarse_entries);
+    }
   }
 
   // Unsplice.
-  *max_prev = max->next();
+  *max_prev = max->collision_list_next();
   Atomic::inc(&_n_coarsenings);
   _n_fine_entries--;
   return max;
@@ -866,50 +607,59 @@
 void OtherRegionsTable::scrub(CardTableModRefBS* ctbs,
                               BitMap* region_bm, BitMap* card_bm) {
   // First eliminated garbage regions from the coarse map.
-  if (G1RSScrubVerbose)
-    gclog_or_tty->print_cr("Scrubbing region "SIZE_FORMAT":",
-                           hr()->hrs_index());
+  if (G1RSScrubVerbose) {
+    gclog_or_tty->print_cr("Scrubbing region %u:", hr()->hrs_index());
+  }
 
   assert(_coarse_map.size() == region_bm->size(), "Precondition");
-  if (G1RSScrubVerbose)
-    gclog_or_tty->print("   Coarse map: before = %d...", _n_coarse_entries);
+  if (G1RSScrubVerbose) {
+    gclog_or_tty->print("   Coarse map: before = "SIZE_FORMAT"...",
+                        _n_coarse_entries);
+  }
   _coarse_map.set_intersection(*region_bm);
   _n_coarse_entries = _coarse_map.count_one_bits();
-  if (G1RSScrubVerbose)
-    gclog_or_tty->print_cr("   after = %d.", _n_coarse_entries);
+  if (G1RSScrubVerbose) {
+    gclog_or_tty->print_cr("   after = "SIZE_FORMAT".", _n_coarse_entries);
+  }
 
   // Now do the fine-grained maps.
   for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
-    PosParPRT** prev = &_fine_grain_regions[i];
+    PerRegionTable* cur = _fine_grain_regions[i];
+    PerRegionTable** prev = &_fine_grain_regions[i];
     while (cur != NULL) {
-      PosParPRT* nxt = cur->next();
+      PerRegionTable* nxt = cur->collision_list_next();
       // If the entire region is dead, eliminate.
-      if (G1RSScrubVerbose)
-        gclog_or_tty->print_cr("     For other region "SIZE_FORMAT":",
+      if (G1RSScrubVerbose) {
+        gclog_or_tty->print_cr("     For other region %u:",
                                cur->hr()->hrs_index());
-      if (!region_bm->at(cur->hr()->hrs_index())) {
+      }
+      if (!region_bm->at((size_t) cur->hr()->hrs_index())) {
         *prev = nxt;
-        cur->set_next(NULL);
+        cur->set_collision_list_next(NULL);
         _n_fine_entries--;
-        if (G1RSScrubVerbose)
+        if (G1RSScrubVerbose) {
           gclog_or_tty->print_cr("          deleted via region map.");
-        PosParPRT::free(cur);
+        }
+        unlink_from_all(cur);
+        PerRegionTable::free(cur);
       } else {
         // Do fine-grain elimination.
-        if (G1RSScrubVerbose)
+        if (G1RSScrubVerbose) {
           gclog_or_tty->print("          occ: before = %4d.", cur->occupied());
+        }
         cur->scrub(ctbs, card_bm);
-        if (G1RSScrubVerbose)
+        if (G1RSScrubVerbose) {
           gclog_or_tty->print_cr("          after = %4d.", cur->occupied());
+        }
         // Did that empty the table completely?
         if (cur->occupied() == 0) {
           *prev = nxt;
-          cur->set_next(NULL);
+          cur->set_collision_list_next(NULL);
           _n_fine_entries--;
-          PosParPRT::free(cur);
+          unlink_from_all(cur);
+          PerRegionTable::free(cur);
         } else {
-          prev = cur->next_addr();
+          prev = cur->collision_list_next_addr();
         }
       }
       cur = nxt;
@@ -932,13 +682,15 @@
 
 size_t OtherRegionsTable::occ_fine() const {
   size_t sum = 0;
-  for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
-    while (cur != NULL) {
-      sum += cur->occupied();
-      cur = cur->next();
-    }
+
+  size_t num = 0;
+  PerRegionTable * cur = _first_all_fine_prts;
+  while (cur != NULL) {
+    sum += cur->occupied();
+    cur = cur->next();
+    num++;
   }
+  guarantee(num == _n_fine_entries, "just checking");
   return sum;
 }
 
@@ -954,14 +706,12 @@
   // Cast away const in this case.
   MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
   size_t sum = 0;
-  for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
-    while (cur != NULL) {
-      sum += cur->mem_size();
-      cur = cur->next();
-    }
+  PerRegionTable * cur = _first_all_fine_prts;
+  while (cur != NULL) {
+    sum += cur->mem_size();
+    cur = cur->next();
   }
-  sum += (sizeof(PosParPRT*) * _max_fine_entries);
+  sum += (sizeof(PerRegionTable*) * _max_fine_entries);
   sum += (_coarse_map.size_in_words() * HeapWordSize);
   sum += (_sparse_table.mem_size());
   sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above.
@@ -973,26 +723,28 @@
 }
 
 size_t OtherRegionsTable::fl_mem_size() {
-  return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size();
+  return PerRegionTable::fl_mem_size();
 }
 
 void OtherRegionsTable::clear_fcc() {
+  size_t hrs_idx = hr()->hrs_index();
   for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
-    _from_card_cache[i][hr()->hrs_index()] = -1;
+    _from_card_cache[i][hrs_idx] = -1;
   }
 }
 
 void OtherRegionsTable::clear() {
   MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
-  for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
-    while (cur != NULL) {
-      PosParPRT* nxt = cur->next();
-      PosParPRT::free(cur);
-      cur = nxt;
-    }
-    _fine_grain_regions[i] = NULL;
+  // if there are no entries, skip this step
+  if (_first_all_fine_prts != NULL) {
+    guarantee(_first_all_fine_prts != NULL && _last_all_fine_prts != NULL, "just checking");
+    PerRegionTable::bulk_free(_first_all_fine_prts, _last_all_fine_prts);
+    memset(_fine_grain_regions, 0, _max_fine_entries * sizeof(_fine_grain_regions[0]));
+  } else {
+    guarantee(_first_all_fine_prts == NULL && _last_all_fine_prts == NULL, "just checking");
   }
+
+  _first_all_fine_prts = _last_all_fine_prts = NULL;
   _sparse_table.clear();
   _coarse_map.clear();
   _n_fine_entries = 0;
@@ -1003,7 +755,7 @@
 
 void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) {
   MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
-  size_t hrs_ind = from_hr->hrs_index();
+  size_t hrs_ind = (size_t) from_hr->hrs_index();
   size_t ind = hrs_ind & _mod_max_fine_entries_mask;
   if (del_single_region_table(ind, from_hr)) {
     assert(!_coarse_map.at(hrs_ind), "Inv");
@@ -1011,7 +763,7 @@
     _coarse_map.par_at_put(hrs_ind, 0);
   }
   // Check to see if any of the fcc entries come from here.
-  size_t hr_ind = hr()->hrs_index();
+  size_t hr_ind = (size_t) hr()->hrs_index();
   for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
     int fcc_ent = _from_card_cache[tid][hr_ind];
     if (fcc_ent != -1) {
@@ -1028,16 +780,17 @@
 bool OtherRegionsTable::del_single_region_table(size_t ind,
                                                 HeapRegion* hr) {
   assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
-  PosParPRT** prev_addr = &_fine_grain_regions[ind];
-  PosParPRT* prt = *prev_addr;
+  PerRegionTable** prev_addr = &_fine_grain_regions[ind];
+  PerRegionTable* prt = *prev_addr;
   while (prt != NULL && prt->hr() != hr) {
-    prev_addr = prt->next_addr();
-    prt = prt->next();
+    prev_addr = prt->collision_list_next_addr();
+    prt = prt->collision_list_next();
   }
   if (prt != NULL) {
     assert(prt->hr() == hr, "Loop postcondition.");
-    *prev_addr = prt->next();
-    PosParPRT::free(prt);
+    *prev_addr = prt->collision_list_next();
+    unlink_from_all(prt);
+    PerRegionTable::free(prt);
     _n_fine_entries--;
     return true;
   } else {
@@ -1058,7 +811,7 @@
   // Is this region in the coarse map?
   if (_coarse_map.at(hr_ind)) return true;
 
-  PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask,
+  PerRegionTable* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask,
                                      hr);
   if (prt != NULL) {
     return prt->contains_reference(from);
@@ -1099,7 +852,8 @@
 void HeapRegionRemSet::setup_remset_size() {
   // Setup sparse and fine-grain tables sizes.
   // table_size = base * (log(region_size / 1M) + 1)
-  int region_size_log_mb = MAX2((int)HeapRegion::LogOfHRGrainBytes - (int)LOG_M, 0);
+  const int LOG_M = 20;
+  int region_size_log_mb = MAX2(HeapRegion::LogOfHRGrainBytes - LOG_M, 0);
   if (FLAG_IS_DEFAULT(G1RSetSparseRegionEntries)) {
     G1RSetSparseRegionEntries = G1RSetSparseRegionEntriesBase * (region_size_log_mb + 1);
   }
@@ -1137,7 +891,6 @@
       G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
     gclog_or_tty->print_cr("  Card " PTR_FORMAT, card_start);
   }
-  // XXX
   if (iter.n_yielded() != occupied()) {
     gclog_or_tty->print_cr("Yielded disagrees with occupied:");
     gclog_or_tty->print_cr("  %6d yielded (%6d coarse, %6d fine).",
@@ -1155,10 +908,6 @@
   SparsePRT::cleanup_all();
 }
 
-void HeapRegionRemSet::par_cleanup() {
-  PosParPRT::par_contract_all();
-}
-
 void HeapRegionRemSet::clear() {
   _other_regions.clear();
   assert(occupied() == 0, "Should be clear.");
@@ -1223,7 +972,7 @@
     if ((size_t)_coarse_cur_region_index < _coarse_map->size()) {
       _coarse_cur_region_cur_card = 0;
       HeapWord* r_bot =
-        _g1h->region_at(_coarse_cur_region_index)->bottom();
+        _g1h->region_at((uint) _coarse_cur_region_index)->bottom();
       _cur_region_card_offset = _bosa->index_for(r_bot);
     } else {
       return false;
@@ -1253,7 +1002,7 @@
   while (!fine_has_next()) {
     if (_cur_region_cur_card == (size_t) HeapRegion::CardsPerRegion) {
       _cur_region_cur_card = 0;
-      _fine_cur_prt = _fine_cur_prt->next();
+      _fine_cur_prt = _fine_cur_prt->collision_list_next();
     }
     if (_fine_cur_prt == NULL) {
       fine_find_next_non_null_prt();
@@ -1325,9 +1074,9 @@
            && _recorded_cards == NULL
            && _recorded_regions == NULL,
            "Inv");
-    _recorded_oops    = NEW_C_HEAP_ARRAY(OopOrNarrowOopStar, MaxRecorded);
-    _recorded_cards   = NEW_C_HEAP_ARRAY(HeapWord*,          MaxRecorded);
-    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*,        MaxRecorded);
+    _recorded_oops    = NEW_C_HEAP_ARRAY(OopOrNarrowOopStar, MaxRecorded, mtGC);
+    _recorded_cards   = NEW_C_HEAP_ARRAY(HeapWord*,          MaxRecorded, mtGC);
+    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*,        MaxRecorded, mtGC);
   }
   if (_n_recorded == MaxRecorded) {
     gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded);
@@ -1348,8 +1097,8 @@
     assert(_n_recorded_events == 0
            && _recorded_event_index == NULL,
            "Inv");
-    _recorded_events = NEW_C_HEAP_ARRAY(Event, MaxRecordedEvents);
-    _recorded_event_index = NEW_C_HEAP_ARRAY(int, MaxRecordedEvents);
+    _recorded_events = NEW_C_HEAP_ARRAY(Event, MaxRecordedEvents, mtGC);
+    _recorded_event_index = NEW_C_HEAP_ARRAY(int, MaxRecordedEvents, mtGC);
   }
   if (_n_recorded_events == MaxRecordedEvents) {
     gclog_or_tty->print_cr("Filled up 'recorded_events' (%d).", MaxRecordedEvents);
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
 class G1BlockOffsetSharedArray;
 class HeapRegion;
 class HeapRegionRemSetIterator;
-class PosParPRT;
+class PerRegionTable;
 class SparsePRT;
 
 // Essentially a wrapper around SparsePRTCleanupTask. See
@@ -79,15 +79,22 @@
   size_t      _n_coarse_entries;
   static jint _n_coarsenings;
 
-  PosParPRT** _fine_grain_regions;
-  size_t      _n_fine_entries;
+  PerRegionTable** _fine_grain_regions;
+  size_t           _n_fine_entries;
 
-#define SAMPLE_FOR_EVICTION 1
-#if SAMPLE_FOR_EVICTION
+  // The fine grain remembered sets are doubly linked together using
+  // their 'next' and 'prev' fields.
+  // This allows fast bulk freeing of all the fine grain remembered
+  // set entries, and fast finding of all of them without iterating
+  // over the _fine_grain_regions table.
+  PerRegionTable * _first_all_fine_prts;
+  PerRegionTable * _last_all_fine_prts;
+
+  // Used to sample a subset of the fine grain PRTs to determine which
+  // PRT to evict and coarsen.
   size_t        _fine_eviction_start;
   static size_t _fine_eviction_stride;
   static size_t _fine_eviction_sample_size;
-#endif
 
   SparsePRT   _sparse_table;
 
@@ -98,26 +105,28 @@
   // Requires "prt" to be the first element of the bucket list appropriate
   // for "hr".  If this list contains an entry for "hr", return it,
   // otherwise return "NULL".
-  PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const;
+  PerRegionTable* find_region_table(size_t ind, HeapRegion* hr) const;
 
-  // Find, delete, and return a candidate PosParPRT, if any exists,
+  // Find, delete, and return a candidate PerRegionTable, if any exists,
   // adding the deleted region to the coarse bitmap.  Requires the caller
   // to hold _m, and the fine-grain table to be full.
-  PosParPRT* delete_region_table();
+  PerRegionTable* delete_region_table();
 
   // If a PRT for "hr" is in the bucket list indicated by "ind" (which must
   // be the correct index for "hr"), delete it and return true; else return
   // false.
   bool del_single_region_table(size_t ind, HeapRegion* hr);
 
-  static jint _cache_probes;
-  static jint _cache_hits;
-
   // Indexed by thread X heap region, to minimize thread contention.
   static int** _from_card_cache;
   static size_t _from_card_cache_max_regions;
   static size_t _from_card_cache_mem_size;
 
+  // link/add the given fine grain remembered set into the "all" list
+  void link_to_all(PerRegionTable * prt);
+  // unlink/remove the given fine grain remembered set into the "all" list
+  void unlink_from_all(PerRegionTable * prt);
+
 public:
   OtherRegionsTable(HeapRegion* hr);
 
@@ -127,10 +136,6 @@
   // sense.
   void add_reference(OopOrNarrowOopStar from, int tid);
 
-  void add_reference(OopOrNarrowOopStar from) {
-    return add_reference(from, 0);
-  }
-
   // Removes any entries shown by the given bitmaps to contain only dead
   // objects.
   void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
@@ -173,7 +178,7 @@
   static void print_from_card_cache();
 };
 
-class HeapRegionRemSet : public CHeapObj {
+class HeapRegionRemSet : public CHeapObj<mtGC> {
   friend class VMStructs;
   friend class HeapRegionRemSetIterator;
 
@@ -233,14 +238,12 @@
 
   static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); }
 
-  /* Used in the sequential case.  Returns "true" iff this addition causes
-     the size limit to be reached. */
+  // Used in the sequential case.
   void add_reference(OopOrNarrowOopStar from) {
-    _other_regions.add_reference(from);
+    _other_regions.add_reference(from, 0);
   }
 
-  /* Used in the parallel case.  Returns "true" iff this addition causes
-     the size limit to be reached. */
+  // Used in the parallel case.
   void add_reference(OopOrNarrowOopStar from, int tid) {
     _other_regions.add_reference(from, tid);
   }
@@ -253,15 +256,6 @@
   // entries for this region in other remsets.
   void clear();
 
-  // Forget any entries due to pointers from "from_hr".
-  void clear_incoming_entry(HeapRegion* from_hr) {
-    _other_regions.clear_incoming_entry(from_hr);
-  }
-
-#if 0
-  virtual void cleanup() = 0;
-#endif
-
   // Attempt to claim the region.  Returns true iff this call caused an
   // atomic transition from Unclaimed to Claimed.
   bool claim_iter();
@@ -290,12 +284,6 @@
   // Initialize the given iterator to iterate over this rem set.
   void init_iterator(HeapRegionRemSetIterator* iter) const;
 
-#if 0
-  // Apply the "do_card" method to the start address of every card in the
-  // rem set.  Returns false if some application of the closure aborted.
-  virtual bool card_iterate(CardClosure* iter) = 0;
-#endif
-
   // The actual # of bytes this hr_remset takes up.
   size_t mem_size() {
     return _other_regions.mem_size()
@@ -322,20 +310,17 @@
   void print() const;
 
   // Called during a stop-world phase to perform any deferred cleanups.
-  // The second version may be called by parallel threads after then finish
-  // collection work.
   static void cleanup();
-  static void par_cleanup();
 
   // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
   // (Uses it to initialize from_card_cache).
-  static void init_heap(size_t max_regions) {
-    OtherRegionsTable::init_from_card_cache(max_regions);
+  static void init_heap(uint max_regions) {
+    OtherRegionsTable::init_from_card_cache((size_t) max_regions);
   }
 
   // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
-  static void shrink_heap(size_t new_n_regs) {
-    OtherRegionsTable::shrink_from_card_cache(new_n_regs);
+  static void shrink_heap(uint new_n_regs) {
+    OtherRegionsTable::shrink_from_card_cache((size_t) new_n_regs);
   }
 
 #ifndef PRODUCT
@@ -360,14 +345,14 @@
 #endif
 };
 
-class HeapRegionRemSetIterator : public CHeapObj {
+class HeapRegionRemSetIterator : public CHeapObj<mtGC> {
 
   // The region over which we're iterating.
   const HeapRegionRemSet* _hrrs;
 
   // Local caching of HRRS fields.
   const BitMap*             _coarse_map;
-  PosParPRT**               _fine_grain_regions;
+  PerRegionTable**          _fine_grain_regions;
 
   G1BlockOffsetSharedArray* _bosa;
   G1CollectedHeap*          _g1h;
@@ -404,8 +389,9 @@
 
   // Index of bucket-list we're working on.
   int _fine_array_index;
+
   // Per Region Table we're doing within current bucket list.
-  PosParPRT* _fine_cur_prt;
+  PerRegionTable* _fine_cur_prt;
 
   /* SparsePRT::*/ SparsePRTIter _sparse_iter;
 
@@ -435,12 +421,4 @@
   }
 };
 
-#if 0
-class CardClosure: public Closure {
-public:
-  virtual void do_card(HeapWord* card_start) = 0;
-};
-
-#endif
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP
--- a/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,16 +31,15 @@
 
 // Private
 
-size_t HeapRegionSeq::find_contiguous_from(size_t from, size_t num) {
-  size_t len = length();
+uint HeapRegionSeq::find_contiguous_from(uint from, uint num) {
+  uint len = length();
   assert(num > 1, "use this only for sequences of length 2 or greater");
   assert(from <= len,
-         err_msg("from: "SIZE_FORMAT" should be valid and <= than "SIZE_FORMAT,
-                 from, len));
+         err_msg("from: %u should be valid and <= than %u", from, len));
 
-  size_t curr = from;
-  size_t first = G1_NULL_HRS_INDEX;
-  size_t num_so_far = 0;
+  uint curr = from;
+  uint first = G1_NULL_HRS_INDEX;
+  uint num_so_far = 0;
   while (curr < len && num_so_far < num) {
     if (at(curr)->is_empty()) {
       if (first == G1_NULL_HRS_INDEX) {
@@ -60,7 +59,7 @@
     // we found enough space for the humongous object
     assert(from <= first && first < len, "post-condition");
     assert(first < curr && (curr - first) == num, "post-condition");
-    for (size_t i = first; i < first + num; ++i) {
+    for (uint i = first; i < first + num; ++i) {
       assert(at(i)->is_empty(), "post-condition");
     }
     return first;
@@ -73,10 +72,10 @@
 // Public
 
 void HeapRegionSeq::initialize(HeapWord* bottom, HeapWord* end,
-                               size_t max_length) {
-  assert((size_t) bottom % HeapRegion::GrainBytes == 0,
+                               uint max_length) {
+  assert((uintptr_t) bottom % HeapRegion::GrainBytes == 0,
          "bottom should be heap region aligned");
-  assert((size_t) end % HeapRegion::GrainBytes == 0,
+  assert((uintptr_t) end % HeapRegion::GrainBytes == 0,
          "end should be heap region aligned");
 
   _length = 0;
@@ -87,9 +86,9 @@
   _allocated_length = 0;
   _max_length = max_length;
 
-  _regions = NEW_C_HEAP_ARRAY(HeapRegion*, max_length);
-  memset(_regions, 0, max_length * sizeof(HeapRegion*));
-  _regions_biased = _regions - ((size_t) bottom >> _region_shift);
+  _regions = NEW_C_HEAP_ARRAY(HeapRegion*, max_length, mtGC);
+  memset(_regions, 0, (size_t) max_length * sizeof(HeapRegion*));
+  _regions_biased = _regions - ((uintx) bottom >> _region_shift);
 
   assert(&_regions[0] == &_regions_biased[addr_to_index_biased(bottom)],
          "bottom should be included in the region with index 0");
@@ -105,7 +104,7 @@
   assert(_heap_bottom <= next_bottom, "invariant");
   while (next_bottom < new_end) {
     assert(next_bottom < _heap_end, "invariant");
-    size_t index = length();
+    uint index = length();
 
     assert(index < _max_length, "otherwise we cannot expand further");
     if (index == 0) {
@@ -139,9 +138,9 @@
   return MemRegion(old_end, next_bottom);
 }
 
-size_t HeapRegionSeq::free_suffix() {
-  size_t res = 0;
-  size_t index = length();
+uint HeapRegionSeq::free_suffix() {
+  uint res = 0;
+  uint index = length();
   while (index > 0) {
     index -= 1;
     if (!at(index)->is_empty()) {
@@ -152,27 +151,24 @@
   return res;
 }
 
-size_t HeapRegionSeq::find_contiguous(size_t num) {
+uint HeapRegionSeq::find_contiguous(uint num) {
   assert(num > 1, "use this only for sequences of length 2 or greater");
   assert(_next_search_index <= length(),
-         err_msg("_next_search_indeex: "SIZE_FORMAT" "
-                 "should be valid and <= than "SIZE_FORMAT,
+         err_msg("_next_search_index: %u should be valid and <= than %u",
                  _next_search_index, length()));
 
-  size_t start = _next_search_index;
-  size_t res = find_contiguous_from(start, num);
+  uint start = _next_search_index;
+  uint res = find_contiguous_from(start, num);
   if (res == G1_NULL_HRS_INDEX && start > 0) {
     // Try starting from the beginning. If _next_search_index was 0,
     // no point in doing this again.
     res = find_contiguous_from(0, num);
   }
   if (res != G1_NULL_HRS_INDEX) {
-    assert(res < length(),
-           err_msg("res: "SIZE_FORMAT" should be valid", res));
+    assert(res < length(), err_msg("res: %u should be valid", res));
     _next_search_index = res + num;
     assert(_next_search_index <= length(),
-           err_msg("_next_search_indeex: "SIZE_FORMAT" "
-                   "should be valid and <= than "SIZE_FORMAT,
+           err_msg("_next_search_index: %u should be valid and <= than %u",
                    _next_search_index, length()));
   }
   return res;
@@ -183,20 +179,20 @@
 }
 
 void HeapRegionSeq::iterate_from(HeapRegion* hr, HeapRegionClosure* blk) const {
-  size_t hr_index = 0;
+  uint hr_index = 0;
   if (hr != NULL) {
-    hr_index = (size_t) hr->hrs_index();
+    hr_index = hr->hrs_index();
   }
 
-  size_t len = length();
-  for (size_t i = hr_index; i < len; i += 1) {
+  uint len = length();
+  for (uint i = hr_index; i < len; i += 1) {
     bool res = blk->doHeapRegion(at(i));
     if (res) {
       blk->incomplete();
       return;
     }
   }
-  for (size_t i = 0; i < hr_index; i += 1) {
+  for (uint i = 0; i < hr_index; i += 1) {
     bool res = blk->doHeapRegion(at(i));
     if (res) {
       blk->incomplete();
@@ -206,7 +202,7 @@
 }
 
 MemRegion HeapRegionSeq::shrink_by(size_t shrink_bytes,
-                                   size_t* num_regions_deleted) {
+                                   uint* num_regions_deleted) {
   // Reset this in case it's currently pointing into the regions that
   // we just removed.
   _next_search_index = 0;
@@ -218,7 +214,7 @@
   assert(_allocated_length > 0, "we should have at least one region committed");
 
   // around the loop, i will be the next region to be removed
-  size_t i = length() - 1;
+  uint i = length() - 1;
   assert(i > 0, "we should never remove all regions");
   // [last_start, end) is the MemRegion that covers the regions we will remove.
   HeapWord* end = at(i)->end();
@@ -249,29 +245,24 @@
 #ifndef PRODUCT
 void HeapRegionSeq::verify_optional() {
   guarantee(_length <= _allocated_length,
-            err_msg("invariant: _length: "SIZE_FORMAT" "
-                    "_allocated_length: "SIZE_FORMAT,
+            err_msg("invariant: _length: %u _allocated_length: %u",
                     _length, _allocated_length));
   guarantee(_allocated_length <= _max_length,
-            err_msg("invariant: _allocated_length: "SIZE_FORMAT" "
-                    "_max_length: "SIZE_FORMAT,
+            err_msg("invariant: _allocated_length: %u _max_length: %u",
                     _allocated_length, _max_length));
   guarantee(_next_search_index <= _length,
-            err_msg("invariant: _next_search_index: "SIZE_FORMAT" "
-                    "_length: "SIZE_FORMAT,
+            err_msg("invariant: _next_search_index: %u _length: %u",
                     _next_search_index, _length));
 
   HeapWord* prev_end = _heap_bottom;
-  for (size_t i = 0; i < _allocated_length; i += 1) {
+  for (uint i = 0; i < _allocated_length; i += 1) {
     HeapRegion* hr = _regions[i];
-    guarantee(hr != NULL, err_msg("invariant: i: "SIZE_FORMAT, i));
+    guarantee(hr != NULL, err_msg("invariant: i: %u", i));
     guarantee(hr->bottom() == prev_end,
-              err_msg("invariant i: "SIZE_FORMAT" "HR_FORMAT" "
-                      "prev_end: "PTR_FORMAT,
+              err_msg("invariant i: %u "HR_FORMAT" prev_end: "PTR_FORMAT,
                       i, HR_FORMAT_PARAMS(hr), prev_end));
     guarantee(hr->hrs_index() == i,
-              err_msg("invariant: i: "SIZE_FORMAT" hrs_index(): "SIZE_FORMAT,
-                      i, hr->hrs_index()));
+              err_msg("invariant: i: %u hrs_index(): %u", i, hr->hrs_index()));
     if (i < _length) {
       // Asserts will fire if i is >= _length
       HeapWord* addr = hr->bottom();
@@ -290,8 +281,8 @@
       prev_end = hr->end();
     }
   }
-  for (size_t i = _allocated_length; i < _max_length; i += 1) {
-    guarantee(_regions[i] == NULL, err_msg("invariant i: "SIZE_FORMAT, i));
+  for (uint i = _allocated_length; i < _max_length; i += 1) {
+    guarantee(_regions[i] == NULL, err_msg("invariant i: %u", i));
   }
 }
 #endif // PRODUCT
--- a/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,8 +29,6 @@
 class HeapRegionClosure;
 class FreeRegionList;
 
-#define G1_NULL_HRS_INDEX ((size_t) -1)
-
 // This class keeps track of the region metadata (i.e., HeapRegion
 // instances). They are kept in the _regions array in address
 // order. A region's index in the array corresponds to its index in
@@ -55,7 +53,7 @@
 //
 // and maintain that: _length <= _allocated_length <= _max_length
 
-class HeapRegionSeq: public CHeapObj {
+class HeapRegionSeq: public CHeapObj<mtGC> {
   friend class VMStructs;
 
   // The array that holds the HeapRegions.
@@ -65,7 +63,7 @@
   HeapRegion** _regions_biased;
 
   // The number of regions committed in the heap.
-  size_t _length;
+  uint _length;
 
   // The address of the first reserved word in the heap.
   HeapWord* _heap_bottom;
@@ -74,32 +72,32 @@
   HeapWord* _heap_end;
 
   // The log of the region byte size.
-  size_t _region_shift;
+  uint _region_shift;
 
   // A hint for which index to start searching from for humongous
   // allocations.
-  size_t _next_search_index;
+  uint _next_search_index;
 
   // The number of regions for which we have allocated HeapRegions for.
-  size_t _allocated_length;
+  uint _allocated_length;
 
   // The maximum number of regions in the heap.
-  size_t _max_length;
+  uint _max_length;
 
   // Find a contiguous set of empty regions of length num, starting
   // from the given index.
-  size_t find_contiguous_from(size_t from, size_t num);
+  uint find_contiguous_from(uint from, uint num);
 
   // Map a heap address to a biased region index. Assume that the
   // address is valid.
-  inline size_t addr_to_index_biased(HeapWord* addr) const;
+  inline uintx addr_to_index_biased(HeapWord* addr) const;
 
-  void increment_length(size_t* length) {
+  void increment_length(uint* length) {
     assert(*length < _max_length, "pre-condition");
     *length += 1;
   }
 
-  void decrement_length(size_t* length) {
+  void decrement_length(uint* length) {
     assert(*length > 0, "pre-condition");
     *length -= 1;
   }
@@ -108,11 +106,11 @@
   // Empty contructor, we'll initialize it with the initialize() method.
   HeapRegionSeq() { }
 
-  void initialize(HeapWord* bottom, HeapWord* end, size_t max_length);
+  void initialize(HeapWord* bottom, HeapWord* end, uint max_length);
 
   // Return the HeapRegion at the given index. Assume that the index
   // is valid.
-  inline HeapRegion* at(size_t index) const;
+  inline HeapRegion* at(uint index) const;
 
   // If addr is within the committed space return its corresponding
   // HeapRegion, otherwise return NULL.
@@ -123,10 +121,10 @@
   inline HeapRegion* addr_to_region_unsafe(HeapWord* addr) const;
 
   // Return the number of regions that have been committed in the heap.
-  size_t length() const { return _length; }
+  uint length() const { return _length; }
 
   // Return the maximum number of regions in the heap.
-  size_t max_length() const { return _max_length; }
+  uint max_length() const { return _max_length; }
 
   // Expand the sequence to reflect that the heap has grown from
   // old_end to new_end. Either create new HeapRegions, or re-use
@@ -139,12 +137,12 @@
 
   // Return the number of contiguous regions at the end of the sequence
   // that are available for allocation.
-  size_t free_suffix();
+  uint free_suffix();
 
   // Find a contiguous set of empty regions of length num and return
   // the index of the first region or G1_NULL_HRS_INDEX if the
   // search was unsuccessful.
-  size_t find_contiguous(size_t num);
+  uint find_contiguous(uint num);
 
   // Apply blk->doHeapRegion() on all committed regions in address order,
   // terminating the iteration early if doHeapRegion() returns true.
@@ -159,7 +157,7 @@
   // sequence. Return a MemRegion that corresponds to the address
   // range of the uncommitted regions. Assume shrink_bytes is page and
   // heap region aligned.
-  MemRegion shrink_by(size_t shrink_bytes, size_t* num_regions_deleted);
+  MemRegion shrink_by(size_t shrink_bytes, uint* num_regions_deleted);
 
   // Do some sanity checking.
   void verify_optional() PRODUCT_RETURN;
--- a/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,11 +28,11 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "gc_implementation/g1/heapRegionSeq.hpp"
 
-inline size_t HeapRegionSeq::addr_to_index_biased(HeapWord* addr) const {
+inline uintx HeapRegionSeq::addr_to_index_biased(HeapWord* addr) const {
   assert(_heap_bottom <= addr && addr < _heap_end,
          err_msg("addr: "PTR_FORMAT" bottom: "PTR_FORMAT" end: "PTR_FORMAT,
                  addr, _heap_bottom, _heap_end));
-  size_t index = (size_t) addr >> _region_shift;
+  uintx index = (uintx) addr >> _region_shift;
   return index;
 }
 
@@ -40,7 +40,7 @@
   assert(_heap_bottom <= addr && addr < _heap_end,
          err_msg("addr: "PTR_FORMAT" bottom: "PTR_FORMAT" end: "PTR_FORMAT,
                  addr, _heap_bottom, _heap_end));
-  size_t index_biased = addr_to_index_biased(addr);
+  uintx index_biased = addr_to_index_biased(addr);
   HeapRegion* hr = _regions_biased[index_biased];
   assert(hr != NULL, "invariant");
   return hr;
@@ -55,7 +55,7 @@
   return NULL;
 }
 
-inline HeapRegion* HeapRegionSeq::at(size_t index) const {
+inline HeapRegion* HeapRegionSeq::at(uint index) const {
   assert(index < length(), "pre-condition");
   HeapRegion* hr = _regions[index];
   assert(hr != NULL, "sanity");
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,28 +25,18 @@
 #include "precompiled.hpp"
 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
 
-size_t HeapRegionSetBase::_unrealistically_long_length = 0;
+uint HeapRegionSetBase::_unrealistically_long_length = 0;
 HRSPhase HeapRegionSetBase::_phase = HRSPhaseNone;
 
 //////////////////// HeapRegionSetBase ////////////////////
 
-void HeapRegionSetBase::set_unrealistically_long_length(size_t len) {
+void HeapRegionSetBase::set_unrealistically_long_length(uint len) {
   guarantee(_unrealistically_long_length == 0, "should only be set once");
   _unrealistically_long_length = len;
 }
 
-size_t HeapRegionSetBase::calculate_region_num(HeapRegion* hr) {
-  assert(hr->startsHumongous(), "pre-condition");
-  assert(hr->capacity() % HeapRegion::GrainBytes == 0, "invariant");
-  size_t region_num = hr->capacity() >> HeapRegion::LogOfHRGrainBytes;
-  assert(region_num > 0, "sanity");
-  return region_num;
-}
-
 void HeapRegionSetBase::fill_in_ext_msg(hrs_ext_msg* msg, const char* message) {
-  msg->append("[%s] %s "
-              "ln: "SIZE_FORMAT" rn: "SIZE_FORMAT" "
-              "cy: "SIZE_FORMAT" ud: "SIZE_FORMAT,
+  msg->append("[%s] %s ln: %u rn: %u cy: "SIZE_FORMAT" ud: "SIZE_FORMAT,
               name(), message, length(), region_num(),
               total_capacity_bytes(), total_used_bytes());
   fill_in_ext_msg_extra(msg);
@@ -154,11 +144,7 @@
   guarantee(verify_region(hr, this), hrs_ext_msg(this, "region verification"));
 
   _calc_length               += 1;
-  if (!hr->isHumongous()) {
-    _calc_region_num         += 1;
-  } else {
-    _calc_region_num         += calculate_region_num(hr);
-  }
+  _calc_region_num           += hr->region_num();
   _calc_total_capacity_bytes += hr->capacity();
   _calc_total_used_bytes     += hr->used();
 }
@@ -170,13 +156,11 @@
          hrs_ext_msg(this, "verification should be in progress"));
 
   guarantee(length() == _calc_length,
-            hrs_err_msg("[%s] length: "SIZE_FORMAT" should be == "
-                        "calc length: "SIZE_FORMAT,
+            hrs_err_msg("[%s] length: %u should be == calc length: %u",
                         name(), length(), _calc_length));
 
   guarantee(region_num() == _calc_region_num,
-            hrs_err_msg("[%s] region num: "SIZE_FORMAT" should be == "
-                        "calc region num: "SIZE_FORMAT,
+            hrs_err_msg("[%s] region num: %u should be == calc region num: %u",
                         name(), region_num(), _calc_region_num));
 
   guarantee(total_capacity_bytes() == _calc_total_capacity_bytes,
@@ -211,8 +195,8 @@
   out->print_cr("    humongous         : %s", BOOL_TO_STR(regions_humongous()));
   out->print_cr("    empty             : %s", BOOL_TO_STR(regions_empty()));
   out->print_cr("  Attributes");
-  out->print_cr("    length            : "SIZE_FORMAT_W(14), length());
-  out->print_cr("    region num        : "SIZE_FORMAT_W(14), region_num());
+  out->print_cr("    length            : %14u", length());
+  out->print_cr("    region num        : %14u", region_num());
   out->print_cr("    total capacity    : "SIZE_FORMAT_W(14)" bytes",
                 total_capacity_bytes());
   out->print_cr("    total used        : "SIZE_FORMAT_W(14)" bytes",
@@ -243,14 +227,12 @@
   if (proxy_set->is_empty()) return;
 
   assert(proxy_set->length() <= _length,
-         hrs_err_msg("[%s] proxy set length: "SIZE_FORMAT" "
-                     "should be <= length: "SIZE_FORMAT,
+         hrs_err_msg("[%s] proxy set length: %u should be <= length: %u",
                      name(), proxy_set->length(), _length));
   _length -= proxy_set->length();
 
   assert(proxy_set->region_num() <= _region_num,
-         hrs_err_msg("[%s] proxy set region num: "SIZE_FORMAT" "
-                     "should be <= region num: "SIZE_FORMAT,
+         hrs_err_msg("[%s] proxy set region num: %u should be <= region num: %u",
                      name(), proxy_set->region_num(), _region_num));
   _region_num -= proxy_set->region_num();
 
@@ -298,7 +280,7 @@
     assert(length() >  0 && _tail != NULL, hrs_ext_msg(this, "invariant"));
     from_list->_tail->set_next(_head);
   } else {
-    assert(length() == 0 && _head == NULL, hrs_ext_msg(this, "invariant"));
+    assert(length() == 0 && _tail == NULL, hrs_ext_msg(this, "invariant"));
     _tail = from_list->_tail;
   }
   _head = from_list->_head;
@@ -369,17 +351,17 @@
   verify_optional();
 }
 
-void HeapRegionLinkedList::remove_all_pending(size_t target_count) {
+void HeapRegionLinkedList::remove_all_pending(uint target_count) {
   hrs_assert_mt_safety_ok(this);
   assert(target_count > 1, hrs_ext_msg(this, "pre-condition"));
   assert(!is_empty(), hrs_ext_msg(this, "pre-condition"));
 
   verify_optional();
-  DEBUG_ONLY(size_t old_length = length();)
+  DEBUG_ONLY(uint old_length = length();)
 
   HeapRegion* curr = _head;
   HeapRegion* prev = NULL;
-  size_t count = 0;
+  uint count = 0;
   while (curr != NULL) {
     hrs_assert_region_ok(this, curr, this);
     HeapRegion* next = curr->next();
@@ -387,7 +369,7 @@
     if (curr->pending_removal()) {
       assert(count < target_count,
              hrs_err_msg("[%s] should not come across more regions "
-                         "pending for removal than target_count: "SIZE_FORMAT,
+                         "pending for removal than target_count: %u",
                          name(), target_count));
 
       if (prev == NULL) {
@@ -422,12 +404,11 @@
   }
 
   assert(count == target_count,
-         hrs_err_msg("[%s] count: "SIZE_FORMAT" should be == "
-                     "target_count: "SIZE_FORMAT, name(), count, target_count));
+         hrs_err_msg("[%s] count: %u should be == target_count: %u",
+                     name(), count, target_count));
   assert(length() + target_count == old_length,
          hrs_err_msg("[%s] new length should be consistent "
-                     "new length: "SIZE_FORMAT" old length: "SIZE_FORMAT" "
-                     "target_count: "SIZE_FORMAT,
+                     "new length: %u old length: %u target_count: %u",
                      name(), length(), old_length, target_count));
 
   verify_optional();
@@ -444,16 +425,16 @@
   HeapRegion* curr  = _head;
   HeapRegion* prev1 = NULL;
   HeapRegion* prev0 = NULL;
-  size_t      count = 0;
+  uint        count = 0;
   while (curr != NULL) {
     verify_next_region(curr);
 
     count += 1;
     guarantee(count < _unrealistically_long_length,
-              hrs_err_msg("[%s] the calculated length: "SIZE_FORMAT" "
+              hrs_err_msg("[%s] the calculated length: %u "
                           "seems very long, is there maybe a cycle? "
                           "curr: "PTR_FORMAT" prev0: "PTR_FORMAT" "
-                          "prev1: "PTR_FORMAT" length: "SIZE_FORMAT,
+                          "prev1: "PTR_FORMAT" length: %u",
                           name(), count, curr, prev0, prev1, length()));
 
     prev1 = prev0;
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -62,20 +62,18 @@
   friend class VMStructs;
 
 protected:
-  static size_t calculate_region_num(HeapRegion* hr);
-
-  static size_t _unrealistically_long_length;
+  static uint _unrealistically_long_length;
 
   // The number of regions added to the set. If the set contains
   // only humongous regions, this reflects only 'starts humongous'
   // regions and does not include 'continues humongous' ones.
-  size_t _length;
+  uint _length;
 
   // The total number of regions represented by the set. If the set
   // does not contain humongous regions, this should be the same as
   // _length. If the set contains only humongous regions, this will
   // include the 'continues humongous' regions.
-  size_t _region_num;
+  uint _region_num;
 
   // We don't keep track of the total capacity explicitly, we instead
   // recalculate it based on _region_num and the heap region size.
@@ -86,8 +84,8 @@
   const char* _name;
 
   bool        _verify_in_progress;
-  size_t      _calc_length;
-  size_t      _calc_region_num;
+  uint        _calc_length;
+  uint        _calc_region_num;
   size_t      _calc_total_capacity_bytes;
   size_t      _calc_total_used_bytes;
 
@@ -153,18 +151,18 @@
   HeapRegionSetBase(const char* name);
 
 public:
-  static void set_unrealistically_long_length(size_t len);
+  static void set_unrealistically_long_length(uint len);
 
   const char* name() { return _name; }
 
-  size_t length() { return _length; }
+  uint length() { return _length; }
 
   bool is_empty() { return _length == 0; }
 
-  size_t region_num() { return _region_num; }
+  uint region_num() { return _region_num; }
 
   size_t total_capacity_bytes() {
-    return region_num() << HeapRegion::LogOfHRGrainBytes;
+    return (size_t) region_num() << HeapRegion::LogOfHRGrainBytes;
   }
 
   size_t total_used_bytes() { return _total_used_bytes; }
@@ -341,7 +339,7 @@
   // of regions that are pending for removal in the list, and
   // target_count should be > 1 (currently, we never need to remove a
   // single region using this).
-  void remove_all_pending(size_t target_count);
+  void remove_all_pending(uint target_count);
 
   virtual void verify();
 
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,11 +33,7 @@
   // Assumes the caller has already verified the region.
 
   _length           += 1;
-  if (!hr->isHumongous()) {
-    _region_num     += 1;
-  } else {
-    _region_num     += calculate_region_num(hr);
-  }
+  _region_num       += hr->region_num();
   _total_used_bytes += hr->used();
 }
 
@@ -54,15 +50,10 @@
   assert(_length > 0, hrs_ext_msg(this, "pre-condition"));
   _length -= 1;
 
-  size_t region_num_diff;
-  if (!hr->isHumongous()) {
-    region_num_diff = 1;
-  } else {
-    region_num_diff = calculate_region_num(hr);
-  }
+  uint region_num_diff = hr->region_num();
   assert(region_num_diff <= _region_num,
-         hrs_err_msg("[%s] region's region num: "SIZE_FORMAT" "
-                     "should be <= region num: "SIZE_FORMAT,
+         hrs_err_msg("[%s] region's region num: %u "
+                     "should be <= region num: %u",
                      name(), region_num_diff, _region_num));
   _region_num -= region_num_diff;
 
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -126,7 +126,7 @@
     return res;
   } else {
     // Allocate space for the BufferNode in front of the buffer.
-    char *b =  NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size());
+    char *b =  NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size(), mtGC);
     return BufferNode::make_buffer_from_block(b);
   }
 }
@@ -149,7 +149,7 @@
     assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
     void* b = BufferNode::make_block_from_node(_buf_free_list);
     _buf_free_list = _buf_free_list->next();
-    FREE_C_HEAP_ARRAY(char, b);
+    FREE_C_HEAP_ARRAY(char, b, mtGC);
     _buf_free_list_sz --;
     n--;
   }
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -208,7 +208,7 @@
   PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1);
   _shared_satb_queue.set_lock(lock);
   if (ParallelGCThreads > 0) {
-    _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads);
+    _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads, mtGC);
   }
 }
 
--- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -148,8 +148,8 @@
 RSHashTable::RSHashTable(size_t capacity) :
   _capacity(capacity), _capacity_mask(capacity-1),
   _occupied_entries(0), _occupied_cards(0),
-  _entries((SparsePRTEntry*)NEW_C_HEAP_ARRAY(char, SparsePRTEntry::size() * capacity)),
-  _buckets(NEW_C_HEAP_ARRAY(int, capacity)),
+  _entries((SparsePRTEntry*)NEW_C_HEAP_ARRAY(char, SparsePRTEntry::size() * capacity, mtGC)),
+  _buckets(NEW_C_HEAP_ARRAY(int, capacity, mtGC)),
   _free_list(NullEntry), _free_region(0)
 {
   clear();
@@ -157,11 +157,11 @@
 
 RSHashTable::~RSHashTable() {
   if (_entries != NULL) {
-    FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries);
+    FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries, mtGC);
     _entries = NULL;
   }
   if (_buckets != NULL) {
-    FREE_C_HEAP_ARRAY(int, _buckets);
+    FREE_C_HEAP_ARRAY(int, _buckets, mtGC);
     _buckets = NULL;
   }
 }
@@ -481,8 +481,7 @@
 
 bool SparsePRT::add_card(RegionIdx_t region_id, CardIdx_t card_index) {
 #if SPARSE_PRT_VERBOSE
-  gclog_or_tty->print_cr("  Adding card %d from region %d to region "
-                         SIZE_FORMAT" sparse.",
+  gclog_or_tty->print_cr("  Adding card %d from region %d to region %u sparse.",
                          card_index, region_id, _hr->hrs_index());
 #endif
   if (_next->occupied_entries() * 2 > _next->capacity()) {
@@ -534,7 +533,7 @@
   _next = new RSHashTable(last->capacity() * 2);
 
 #if SPARSE_PRT_VERBOSE
-  gclog_or_tty->print_cr("  Expanded sparse table for "SIZE_FORMAT" to %d.",
+  gclog_or_tty->print_cr("  Expanded sparse table for %u to %d.",
                          _hr->hrs_index(), _next->capacity());
 #endif
   for (size_t i = 0; i < last->capacity(); i++) {
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -42,7 +42,7 @@
 // insertions only enqueue old versions for deletions, but do not delete
 // old versions synchronously.
 
-class SparsePRTEntry: public CHeapObj {
+class SparsePRTEntry: public CHeapObj<mtGC> {
 public:
   enum SomePublicConstants {
     NullEntry     = -1,
@@ -101,7 +101,7 @@
 };
 
 
-class RSHashTable : public CHeapObj {
+class RSHashTable : public CHeapObj<mtGC> {
 
   friend class RSHashTableIter;
 
--- a/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,7 +43,7 @@
   reset();
   if (summary_surv_rates_len > 0) {
     size_t length = summary_surv_rates_len;
-    _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
+      _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length, mtGC);
     for (size_t i = 0; i < length; ++i) {
       _summary_surv_rates[i] = new NumberSeq();
     }
@@ -90,9 +90,9 @@
     double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
     TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;
 
-    _surv_rate = NEW_C_HEAP_ARRAY(double, _region_num);
-    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, _region_num);
-    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, _region_num);
+    _surv_rate = NEW_C_HEAP_ARRAY(double, _region_num, mtGC);
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, _region_num, mtGC);
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, _region_num, mtGC);
 
     for (size_t i = 0; i < _stats_arrays_length; ++i) {
       _surv_rate_pred[i] = old_surv_rate_pred[i];
@@ -104,13 +104,13 @@
     _stats_arrays_length = _region_num;
 
     if (old_surv_rate != NULL) {
-      FREE_C_HEAP_ARRAY(double, old_surv_rate);
+      FREE_C_HEAP_ARRAY(double, old_surv_rate, mtGC);
     }
     if (old_accum_surv_rate_pred != NULL) {
-      FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred);
+      FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred, mtGC);
     }
     if (old_surv_rate_pred != NULL) {
-      FREE_C_HEAP_ARRAY(TruncatedSeq*, old_surv_rate_pred);
+      FREE_C_HEAP_ARRAY(TruncatedSeq*, old_surv_rate_pred, mtGC);
     }
   }
 
--- a/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,7 +29,7 @@
 
 class G1CollectorPolicy;
 
-class SurvRateGroup : public CHeapObj {
+class SurvRateGroup : public CHeapObj<mtGC> {
 private:
   G1CollectorPolicy* _g1p;
   const char* _name;
--- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
   static_field(HeapRegion, GrainBytes, size_t)                                \
                                                                               \
   nonstatic_field(HeapRegionSeq,   _regions, HeapRegion**)                    \
-  nonstatic_field(HeapRegionSeq,   _length,  size_t)                          \
+  nonstatic_field(HeapRegionSeq,   _length,  uint)                            \
                                                                               \
   nonstatic_field(G1CollectedHeap, _hrs,                HeapRegionSeq)        \
   nonstatic_field(G1CollectedHeap, _g1_committed,       MemRegion)            \
@@ -50,8 +50,8 @@
   nonstatic_field(G1MonitoringSupport, _old_committed,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_used,           size_t)           \
                                                                               \
-  nonstatic_field(HeapRegionSetBase,   _length,             size_t)           \
-  nonstatic_field(HeapRegionSetBase,   _region_num,         size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _length,             uint)             \
+  nonstatic_field(HeapRegionSetBase,   _region_num,         uint)             \
   nonstatic_field(HeapRegionSetBase,   _total_used_bytes,   size_t)           \
 
 
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,6 +26,7 @@
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
 #include "gc_implementation/shared/isGCActiveMark.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -41,6 +42,7 @@
 
 void VM_G1CollectForAllocation::doit() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, _gc_cause);
   _result = g1h->satisfy_failed_allocation(_word_size, &_pause_succeeded);
   assert(_result == NULL || _pause_succeeded,
          "if we get back a result, the pause should have succeeded");
@@ -62,7 +64,7 @@
     _should_initiate_conc_mark(should_initiate_conc_mark),
     _target_pause_time_ms(target_pause_time_ms),
     _should_retry_gc(false),
-    _full_collections_completed_before(0) {
+    _old_marking_cycles_completed_before(0) {
   guarantee(target_pause_time_ms > 0.0,
             err_msg("target_pause_time_ms = %1.6lf should be positive",
                     target_pause_time_ms));
@@ -110,11 +112,11 @@
 
   GCCauseSetter x(g1h, _gc_cause);
   if (_should_initiate_conc_mark) {
-    // It's safer to read full_collections_completed() here, given
+    // It's safer to read old_marking_cycles_completed() here, given
     // that noone else will be updating it concurrently. Since we'll
     // only need it if we're initiating a marking cycle, no point in
     // setting it earlier.
-    _full_collections_completed_before = g1h->full_collections_completed();
+    _old_marking_cycles_completed_before = g1h->old_marking_cycles_completed();
 
     // At this point we are supposed to start a concurrent cycle. We
     // will do so if one is not already in progress.
@@ -179,17 +181,17 @@
 
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
-    // In the doit() method we saved g1h->full_collections_completed()
-    // in the _full_collections_completed_before field. We have to
-    // wait until we observe that g1h->full_collections_completed()
+    // In the doit() method we saved g1h->old_marking_cycles_completed()
+    // in the _old_marking_cycles_completed_before field. We have to
+    // wait until we observe that g1h->old_marking_cycles_completed()
     // has increased by at least one. This can happen if a) we started
     // a cycle and it completes, b) a cycle already in progress
     // completes, or c) a Full GC happens.
 
     // If the condition has already been reached, there's no point in
     // actually taking the lock and doing the wait.
-    if (g1h->full_collections_completed() <=
-                                          _full_collections_completed_before) {
+    if (g1h->old_marking_cycles_completed() <=
+                                          _old_marking_cycles_completed_before) {
       // The following is largely copied from CMS
 
       Thread* thr = Thread::current();
@@ -198,8 +200,8 @@
       ThreadToNativeFromVM native(jt);
 
       MutexLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
-      while (g1h->full_collections_completed() <=
-                                          _full_collections_completed_before) {
+      while (g1h->old_marking_cycles_completed() <=
+                                          _old_marking_cycles_completed_before) {
         FullGCCount_lock->wait(Mutex::_no_safepoint_check_flag);
       }
     }
@@ -223,9 +225,9 @@
 }
 
 void VM_CGC_Operation::doit() {
-  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
-  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-  TraceTime t(_printGCMessage, PrintGC, true, gclog_or_tty);
+  gclog_or_tty->date_stamp(G1Log::fine() && PrintGCDateStamps);
+  TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
+  TraceTime t(_printGCMessage, G1Log::fine(), true, gclog_or_tty);
   SharedHeap* sh = SharedHeap::heap();
   // This could go away if CollectedHeap gave access to _gc_is_active...
   if (sh != NULL) {
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -80,7 +80,7 @@
   bool         _should_initiate_conc_mark;
   bool         _should_retry_gc;
   double       _target_pause_time_ms;
-  unsigned int _full_collections_completed_before;
+  unsigned int _old_marking_cycles_completed_before;
 public:
   VM_G1IncCollectionPause(unsigned int   gc_count_before,
                           size_t         word_size,
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -457,12 +457,12 @@
         if (_lowest_non_clean[i] != NULL) {
           assert(n_chunks != _lowest_non_clean_chunk_size[i],
                  "logical consequence");
-          FREE_C_HEAP_ARRAY(CardPtr, _lowest_non_clean[i]);
+          FREE_C_HEAP_ARRAY(CardPtr, _lowest_non_clean[i], mtGC);
           _lowest_non_clean[i] = NULL;
         }
         // Now allocate a new one if necessary.
         if (_lowest_non_clean[i] == NULL) {
-          _lowest_non_clean[i]                  = NEW_C_HEAP_ARRAY(CardPtr, n_chunks);
+          _lowest_non_clean[i]                  = NEW_C_HEAP_ARRAY(CardPtr, n_chunks, mtGC);
           _lowest_non_clean_chunk_size[i]       = n_chunks;
           _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start());
           for (int j = 0; j < (int)n_chunks; j++)
--- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
-#include "memory/sharedHeap.hpp"
-#include "oops/arrayOop.hpp"
-#include "oops/oop.inline.hpp"
-
-ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) :
-  _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL),
-  _end(NULL), _hard_end(NULL),
-  _retained(false), _retained_filler(),
-  _allocated(0), _wasted(0)
-{
-  assert (min_size() > AlignmentReserve, "Inconsistency!");
-  // arrayOopDesc::header_size depends on command line initialization.
-  FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT));
-  AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0;
-}
-
-size_t ParGCAllocBuffer::FillerHeaderSize;
-
-// If the minimum object size is greater than MinObjAlignment, we can
-// end up with a shard at the end of the buffer that's smaller than
-// the smallest object.  We can't allow that because the buffer must
-// look like it's full of objects when we retire it, so we make
-// sure we have enough space for a filler int array object.
-size_t ParGCAllocBuffer::AlignmentReserve;
-
-void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) {
-  assert(!retain || end_of_gc, "Can only retain at GC end.");
-  if (_retained) {
-    // If the buffer had been retained shorten the previous filler object.
-    assert(_retained_filler.end() <= _top, "INVARIANT");
-    CollectedHeap::fill_with_object(_retained_filler);
-    // Wasted space book-keeping, otherwise (normally) done in invalidate()
-    _wasted += _retained_filler.word_size();
-    _retained = false;
-  }
-  assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained.");
-  if (_top < _hard_end) {
-    CollectedHeap::fill_with_object(_top, _hard_end);
-    if (!retain) {
-      invalidate();
-    } else {
-      // Is there wasted space we'd like to retain for the next GC?
-      if (pointer_delta(_end, _top) > FillerHeaderSize) {
-        _retained = true;
-        _retained_filler = MemRegion(_top, FillerHeaderSize);
-        _top = _top + FillerHeaderSize;
-      } else {
-        invalidate();
-      }
-    }
-  }
-}
-
-void ParGCAllocBuffer::flush_stats(PLABStats* stats) {
-  assert(ResizePLAB, "Wasted work");
-  stats->add_allocated(_allocated);
-  stats->add_wasted(_wasted);
-  stats->add_unused(pointer_delta(_end, _top));
-}
-
-// Compute desired plab size and latch result for later
-// use. This should be called once at the end of parallel
-// scavenge; it clears the sensor accumulators.
-void PLABStats::adjust_desired_plab_sz() {
-  assert(ResizePLAB, "Not set");
-  if (_allocated == 0) {
-    assert(_unused == 0, "Inconsistency in PLAB stats");
-    _allocated = 1;
-  }
-  double wasted_frac    = (double)_unused/(double)_allocated;
-  size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/
-                                   TargetPLABWastePct);
-  if (target_refills == 0) {
-    target_refills = 1;
-  }
-  _used = _allocated - _wasted - _unused;
-  size_t plab_sz = _used/(target_refills*ParallelGCThreads);
-  if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz);
-  // Take historical weighted average
-  _filter.sample(plab_sz);
-  // Clip from above and below, and align to object boundary
-  plab_sz = MAX2(min_size(), (size_t)_filter.average());
-  plab_sz = MIN2(max_size(), plab_sz);
-  plab_sz = align_object_size(plab_sz);
-  // Latch the result
-  if (PrintPLAB) gclog_or_tty->print(" desired_plab_sz = %d) ", plab_sz);
-  if (ResizePLAB) {
-    _desired_plab_sz = plab_sz;
-  }
-  // Now clear the accumulators for next round:
-  // note this needs to be fixed in the case where we
-  // are retaining across scavenges. FIX ME !!! XXX
-  _allocated = 0;
-  _wasted    = 0;
-  _unused    = 0;
-}
-
-#ifndef PRODUCT
-void ParGCAllocBuffer::print() {
-  gclog_or_tty->print("parGCAllocBuffer: _bottom: %p  _top: %p  _end: %p  _hard_end: %p"
-             "_retained: %c _retained_filler: [%p,%p)\n",
-             _bottom, _top, _end, _hard_end,
-             "FT"[_retained], _retained_filler.start(), _retained_filler.end());
-}
-#endif // !PRODUCT
-
-const size_t ParGCAllocBufferWithBOT::ChunkSizeInWords =
-MIN2(CardTableModRefBS::par_chunk_heapword_alignment(),
-     ((size_t)Generation::GenGrain)/HeapWordSize);
-const size_t ParGCAllocBufferWithBOT::ChunkSizeInBytes =
-MIN2(CardTableModRefBS::par_chunk_heapword_alignment() * HeapWordSize,
-     (size_t)Generation::GenGrain);
-
-ParGCAllocBufferWithBOT::ParGCAllocBufferWithBOT(size_t word_sz,
-                                                 BlockOffsetSharedArray* bsa) :
-  ParGCAllocBuffer(word_sz),
-  _bsa(bsa),
-  _bt(bsa, MemRegion(_bottom, _hard_end)),
-  _true_end(_hard_end)
-{}
-
-// The buffer comes with its own BOT, with a shared (obviously) underlying
-// BlockOffsetSharedArray. We manipulate this BOT in the normal way
-// as we would for any contiguous space. However, on accasion we
-// need to do some buffer surgery at the extremities before we
-// start using the body of the buffer for allocations. Such surgery
-// (as explained elsewhere) is to prevent allocation on a card that
-// is in the process of being walked concurrently by another GC thread.
-// When such surgery happens at a point that is far removed (to the
-// right of the current allocation point, top), we use the "contig"
-// parameter below to directly manipulate the shared array without
-// modifying the _next_threshold state in the BOT.
-void ParGCAllocBufferWithBOT::fill_region_with_block(MemRegion mr,
-                                                     bool contig) {
-  CollectedHeap::fill_with_object(mr);
-  if (contig) {
-    _bt.alloc_block(mr.start(), mr.end());
-  } else {
-    _bt.BlockOffsetArray::alloc_block(mr.start(), mr.end());
-  }
-}
-
-HeapWord* ParGCAllocBufferWithBOT::allocate_slow(size_t word_sz) {
-  HeapWord* res = NULL;
-  if (_true_end > _hard_end) {
-    assert((HeapWord*)align_size_down(intptr_t(_hard_end),
-                                      ChunkSizeInBytes) == _hard_end,
-           "or else _true_end should be equal to _hard_end");
-    assert(_retained, "or else _true_end should be equal to _hard_end");
-    assert(_retained_filler.end() <= _top, "INVARIANT");
-    CollectedHeap::fill_with_object(_retained_filler);
-    if (_top < _hard_end) {
-      fill_region_with_block(MemRegion(_top, _hard_end), true);
-    }
-    HeapWord* next_hard_end = MIN2(_true_end, _hard_end + ChunkSizeInWords);
-    _retained_filler = MemRegion(_hard_end, FillerHeaderSize);
-    _bt.alloc_block(_retained_filler.start(), _retained_filler.word_size());
-    _top      = _retained_filler.end();
-    _hard_end = next_hard_end;
-    _end      = _hard_end - AlignmentReserve;
-    res       = ParGCAllocBuffer::allocate(word_sz);
-    if (res != NULL) {
-      _bt.alloc_block(res, word_sz);
-    }
-  }
-  return res;
-}
-
-void
-ParGCAllocBufferWithBOT::undo_allocation(HeapWord* obj, size_t word_sz) {
-  ParGCAllocBuffer::undo_allocation(obj, word_sz);
-  // This may back us up beyond the previous threshold, so reset.
-  _bt.set_region(MemRegion(_top, _hard_end));
-  _bt.initialize_threshold();
-}
-
-void ParGCAllocBufferWithBOT::retire(bool end_of_gc, bool retain) {
-  assert(!retain || end_of_gc, "Can only retain at GC end.");
-  if (_retained) {
-    // We're about to make the retained_filler into a block.
-    _bt.BlockOffsetArray::alloc_block(_retained_filler.start(),
-                                      _retained_filler.end());
-  }
-  // Reset _hard_end to _true_end (and update _end)
-  if (retain && _hard_end != NULL) {
-    assert(_hard_end <= _true_end, "Invariant.");
-    _hard_end = _true_end;
-    _end      = MAX2(_top, _hard_end - AlignmentReserve);
-    assert(_end <= _hard_end, "Invariant.");
-  }
-  _true_end = _hard_end;
-  HeapWord* pre_top = _top;
-
-  ParGCAllocBuffer::retire(end_of_gc, retain);
-  // Now any old _retained_filler is cut back to size, the free part is
-  // filled with a filler object, and top is past the header of that
-  // object.
-
-  if (retain && _top < _end) {
-    assert(end_of_gc && retain, "Or else retain should be false.");
-    // If the lab does not start on a card boundary, we don't want to
-    // allocate onto that card, since that might lead to concurrent
-    // allocation and card scanning, which we don't support.  So we fill
-    // the first card with a garbage object.
-    size_t first_card_index = _bsa->index_for(pre_top);
-    HeapWord* first_card_start = _bsa->address_for_index(first_card_index);
-    if (first_card_start < pre_top) {
-      HeapWord* second_card_start =
-        _bsa->inc_by_region_size(first_card_start);
-
-      // Ensure enough room to fill with the smallest block
-      second_card_start = MAX2(second_card_start, pre_top + AlignmentReserve);
-
-      // If the end is already in the first card, don't go beyond it!
-      // Or if the remainder is too small for a filler object, gobble it up.
-      if (_hard_end < second_card_start ||
-          pointer_delta(_hard_end, second_card_start) < AlignmentReserve) {
-        second_card_start = _hard_end;
-      }
-      if (pre_top < second_card_start) {
-        MemRegion first_card_suffix(pre_top, second_card_start);
-        fill_region_with_block(first_card_suffix, true);
-      }
-      pre_top = second_card_start;
-      _top = pre_top;
-      _end = MAX2(_top, _hard_end - AlignmentReserve);
-    }
-
-    // If the lab does not end on a card boundary, we don't want to
-    // allocate onto that card, since that might lead to concurrent
-    // allocation and card scanning, which we don't support.  So we fill
-    // the last card with a garbage object.
-    size_t last_card_index = _bsa->index_for(_hard_end);
-    HeapWord* last_card_start = _bsa->address_for_index(last_card_index);
-    if (last_card_start < _hard_end) {
-
-      // Ensure enough room to fill with the smallest block
-      last_card_start = MIN2(last_card_start, _hard_end - AlignmentReserve);
-
-      // If the top is already in the last card, don't go back beyond it!
-      // Or if the remainder is too small for a filler object, gobble it up.
-      if (_top > last_card_start ||
-          pointer_delta(last_card_start, _top) < AlignmentReserve) {
-        last_card_start = _top;
-      }
-      if (last_card_start < _hard_end) {
-        MemRegion last_card_prefix(last_card_start, _hard_end);
-        fill_region_with_block(last_card_prefix, false);
-      }
-      _hard_end = last_card_start;
-      _end      = MAX2(_top, _hard_end - AlignmentReserve);
-      _true_end = _hard_end;
-      assert(_end <= _hard_end, "Invariant.");
-    }
-
-    // At this point:
-    //   1) we had a filler object from the original top to hard_end.
-    //   2) We've filled in any partial cards at the front and back.
-    if (pre_top < _hard_end) {
-      // Now we can reset the _bt to do allocation in the given area.
-      MemRegion new_filler(pre_top, _hard_end);
-      fill_region_with_block(new_filler, false);
-      _top = pre_top + ParGCAllocBuffer::FillerHeaderSize;
-      // If there's no space left, don't retain.
-      if (_top >= _end) {
-        _retained = false;
-        invalidate();
-        return;
-      }
-      _retained_filler = MemRegion(pre_top, _top);
-      _bt.set_region(MemRegion(_top, _hard_end));
-      _bt.initialize_threshold();
-      assert(_bt.threshold() > _top, "initialize_threshold failed!");
-
-      // There may be other reasons for queries into the middle of the
-      // filler object.  When such queries are done in parallel with
-      // allocation, bad things can happen, if the query involves object
-      // iteration.  So we ensure that such queries do not involve object
-      // iteration, by putting another filler object on the boundaries of
-      // such queries.  One such is the object spanning a parallel card
-      // chunk boundary.
-
-      // "chunk_boundary" is the address of the first chunk boundary less
-      // than "hard_end".
-      HeapWord* chunk_boundary =
-        (HeapWord*)align_size_down(intptr_t(_hard_end-1), ChunkSizeInBytes);
-      assert(chunk_boundary < _hard_end, "Or else above did not work.");
-      assert(pointer_delta(_true_end, chunk_boundary) >= AlignmentReserve,
-             "Consequence of last card handling above.");
-
-      if (_top <= chunk_boundary) {
-        assert(_true_end == _hard_end, "Invariant.");
-        while (_top <= chunk_boundary) {
-          assert(pointer_delta(_hard_end, chunk_boundary) >= AlignmentReserve,
-                 "Consequence of last card handling above.");
-          _bt.BlockOffsetArray::alloc_block(chunk_boundary, _hard_end);
-          CollectedHeap::fill_with_object(chunk_boundary, _hard_end);
-          _hard_end = chunk_boundary;
-          chunk_boundary -= ChunkSizeInWords;
-        }
-        _end = _hard_end - AlignmentReserve;
-        assert(_top <= _end, "Invariant.");
-        // Now reset the initial filler chunk so it doesn't overlap with
-        // the one(s) inserted above.
-        MemRegion new_filler(pre_top, _hard_end);
-        fill_region_with_block(new_filler, false);
-      }
-    } else {
-      _retained = false;
-      invalidate();
-    }
-  } else {
-    assert(!end_of_gc ||
-           (!_retained && _true_end == _hard_end), "Checking.");
-  }
-  assert(_end <= _hard_end, "Invariant.");
-  assert(_top < _end || _top == _hard_end, "Invariant");
-}
--- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-
-#include "memory/allocation.hpp"
-#include "memory/blockOffsetTable.hpp"
-#include "memory/threadLocalAllocBuffer.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-// Forward decl.
-
-class PLABStats;
-
-// A per-thread allocation buffer used during GC.
-class ParGCAllocBuffer: public CHeapObj {
-protected:
-  char head[32];
-  size_t _word_sz;          // in HeapWord units
-  HeapWord* _bottom;
-  HeapWord* _top;
-  HeapWord* _end;       // last allocatable address + 1
-  HeapWord* _hard_end;  // _end + AlignmentReserve
-  bool      _retained;  // whether we hold a _retained_filler
-  MemRegion _retained_filler;
-  // In support of ergonomic sizing of PLAB's
-  size_t    _allocated;     // in HeapWord units
-  size_t    _wasted;        // in HeapWord units
-  char tail[32];
-  static size_t FillerHeaderSize;
-  static size_t AlignmentReserve;
-
-public:
-  // Initializes the buffer to be empty, but with the given "word_sz".
-  // Must get initialized with "set_buf" for an allocation to succeed.
-  ParGCAllocBuffer(size_t word_sz);
-
-  static const size_t min_size() {
-    return ThreadLocalAllocBuffer::min_size();
-  }
-
-  static const size_t max_size() {
-    return ThreadLocalAllocBuffer::max_size();
-  }
-
-  // If an allocation of the given "word_sz" can be satisfied within the
-  // buffer, do the allocation, returning a pointer to the start of the
-  // allocated block.  If the allocation request cannot be satisfied,
-  // return NULL.
-  HeapWord* allocate(size_t word_sz) {
-    HeapWord* res = _top;
-    if (pointer_delta(_end, _top) >= word_sz) {
-      _top = _top + word_sz;
-      return res;
-    } else {
-      return NULL;
-    }
-  }
-
-  // Undo the last allocation in the buffer, which is required to be of the
-  // "obj" of the given "word_sz".
-  void undo_allocation(HeapWord* obj, size_t word_sz) {
-    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
-    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
-    _top = obj;
-  }
-
-  // The total (word) size of the buffer, including both allocated and
-  // unallocted space.
-  size_t word_sz() { return _word_sz; }
-
-  // Should only be done if we are about to reset with a new buffer of the
-  // given size.
-  void set_word_size(size_t new_word_sz) {
-    assert(new_word_sz > AlignmentReserve, "Too small");
-    _word_sz = new_word_sz;
-  }
-
-  // The number of words of unallocated space remaining in the buffer.
-  size_t words_remaining() {
-    assert(_end >= _top, "Negative buffer");
-    return pointer_delta(_end, _top, HeapWordSize);
-  }
-
-  bool contains(void* addr) {
-    return (void*)_bottom <= addr && addr < (void*)_hard_end;
-  }
-
-  // Sets the space of the buffer to be [buf, space+word_sz()).
-  void set_buf(HeapWord* buf) {
-    _bottom   = buf;
-    _top      = _bottom;
-    _hard_end = _bottom + word_sz();
-    _end      = _hard_end - AlignmentReserve;
-    assert(_end >= _top, "Negative buffer");
-    // In support of ergonomic sizing
-    _allocated += word_sz();
-  }
-
-  // Flush the stats supporting ergonomic sizing of PLAB's
-  void flush_stats(PLABStats* stats);
-  void flush_stats_and_retire(PLABStats* stats, bool retain) {
-    // We flush the stats first in order to get a reading of
-    // unused space in the last buffer.
-    if (ResizePLAB) {
-      flush_stats(stats);
-    }
-    // Retire the last allocation buffer.
-    retire(true, retain);
-  }
-
-  // Force future allocations to fail and queries for contains()
-  // to return false
-  void invalidate() {
-    assert(!_retained, "Shouldn't retain an invalidated buffer.");
-    _end    = _hard_end;
-    _wasted += pointer_delta(_end, _top);  // unused  space
-    _top    = _end;      // force future allocations to fail
-    _bottom = _end;      // force future contains() queries to return false
-  }
-
-  // Fills in the unallocated portion of the buffer with a garbage object.
-  // If "end_of_gc" is TRUE, is after the last use in the GC.  IF "retain"
-  // is true, attempt to re-use the unused portion in the next GC.
-  void retire(bool end_of_gc, bool retain);
-
-  void print() PRODUCT_RETURN;
-};
-
-// PLAB stats book-keeping
-class PLABStats VALUE_OBJ_CLASS_SPEC {
-  size_t _allocated;      // total allocated
-  size_t _wasted;         // of which wasted (internal fragmentation)
-  size_t _unused;         // Unused in last buffer
-  size_t _used;           // derived = allocated - wasted - unused
-  size_t _desired_plab_sz;// output of filter (below), suitably trimmed and quantized
-  AdaptiveWeightedAverage
-         _filter;         // integrator with decay
-
- public:
-  PLABStats(size_t desired_plab_sz_, unsigned wt) :
-    _allocated(0),
-    _wasted(0),
-    _unused(0),
-    _used(0),
-    _desired_plab_sz(desired_plab_sz_),
-    _filter(wt)
-  {
-    size_t min_sz = min_size();
-    size_t max_sz = max_size();
-    size_t aligned_min_sz = align_object_size(min_sz);
-    size_t aligned_max_sz = align_object_size(max_sz);
-    assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz &&
-           min_sz <= max_sz,
-           "PLAB clipping computation in adjust_desired_plab_sz()"
-           " may be incorrect");
-  }
-
-  static const size_t min_size() {
-    return ParGCAllocBuffer::min_size();
-  }
-
-  static const size_t max_size() {
-    return ParGCAllocBuffer::max_size();
-  }
-
-  size_t desired_plab_sz() {
-    return _desired_plab_sz;
-  }
-
-  void adjust_desired_plab_sz(); // filter computation, latches output to
-                                 // _desired_plab_sz, clears sensor accumulators
-
-  void add_allocated(size_t v) {
-    Atomic::add_ptr(v, &_allocated);
-  }
-
-  void add_unused(size_t v) {
-    Atomic::add_ptr(v, &_unused);
-  }
-
-  void add_wasted(size_t v) {
-    Atomic::add_ptr(v, &_wasted);
-  }
-};
-
-class ParGCAllocBufferWithBOT: public ParGCAllocBuffer {
-  BlockOffsetArrayContigSpace _bt;
-  BlockOffsetSharedArray*     _bsa;
-  HeapWord*                   _true_end;  // end of the whole ParGCAllocBuffer
-
-  static const size_t ChunkSizeInWords;
-  static const size_t ChunkSizeInBytes;
-  HeapWord* allocate_slow(size_t word_sz);
-
-  void fill_region_with_block(MemRegion mr, bool contig);
-
-public:
-  ParGCAllocBufferWithBOT(size_t word_sz, BlockOffsetSharedArray* bsa);
-
-  HeapWord* allocate(size_t word_sz) {
-    HeapWord* res = ParGCAllocBuffer::allocate(word_sz);
-    if (res != NULL) {
-      _bt.alloc_block(res, word_sz);
-    } else {
-      res = allocate_slow(word_sz);
-    }
-    return res;
-  }
-
-  void undo_allocation(HeapWord* obj, size_t word_sz);
-
-  void set_buf(HeapWord* buf_start) {
-    ParGCAllocBuffer::set_buf(buf_start);
-    _true_end = _hard_end;
-    _bt.set_region(MemRegion(buf_start, word_sz()));
-    _bt.initialize_threshold();
-  }
-
-  void retire(bool end_of_gc, bool retain);
-
-  MemRegion range() {
-    return MemRegion(_top, _true_end);
-  }
-};
-
-#endif // SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -24,11 +24,11 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "gc_implementation/parNew/parNewGeneration.hpp"
 #include "gc_implementation/parNew/parOopClosures.inline.hpp"
 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/defNewGeneration.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -59,7 +59,7 @@
                                        Generation* old_gen_,
                                        int thread_num_,
                                        ObjToScanQueueSet* work_queue_set_,
-                                       Stack<oop>* overflow_stacks_,
+                                       Stack<oop, mtGC>* overflow_stacks_,
                                        size_t desired_plab_sz_,
                                        ParallelTaskTerminator& term_) :
   _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_),
@@ -184,7 +184,7 @@
   assert(ParGCUseLocalOverflow, "Else should not call");
   assert(young_gen()->overflow_list() == NULL, "Error");
   ObjToScanQueue* queue = work_queue();
-  Stack<oop>* const of_stack = overflow_stack();
+  Stack<oop, mtGC>* const of_stack = overflow_stack();
   const size_t num_overflow_elems = of_stack->size();
   const size_t space_available = queue->max_elems() - queue->size();
   const size_t num_take_elems = MIN3(space_available / 4,
@@ -297,7 +297,7 @@
                         ParNewGeneration&       gen,
                         Generation&             old_gen,
                         ObjToScanQueueSet&      queue_set,
-                        Stack<oop>*             overflow_stacks_,
+                        Stack<oop, mtGC>*       overflow_stacks_,
                         size_t                  desired_plab_sz,
                         ParallelTaskTerminator& term);
 
@@ -331,7 +331,7 @@
 ParScanThreadStateSet::ParScanThreadStateSet(
   int num_threads, Space& to_space, ParNewGeneration& gen,
   Generation& old_gen, ObjToScanQueueSet& queue_set,
-  Stack<oop>* overflow_stacks,
+  Stack<oop, mtGC>* overflow_stacks,
   size_t desired_plab_sz, ParallelTaskTerminator& term)
   : ResourceArray(sizeof(ParScanThreadState), num_threads),
     _gen(gen), _next_gen(old_gen), _term(term)
@@ -453,7 +453,8 @@
     // retire the last buffer.
     par_scan_state.to_space_alloc_buffer()->
       flush_stats_and_retire(_gen.plab_stats(),
-                             false /* !retain */);
+                             true /* end_of_gc */,
+                             false /* retain */);
 
     // Every thread has its own age table.  We need to merge
     // them all into one.
@@ -649,9 +650,14 @@
 
   _overflow_stacks = NULL;
   if (ParGCUseLocalOverflow) {
-    _overflow_stacks = NEW_C_HEAP_ARRAY(Stack<oop>, ParallelGCThreads);
+
+    // typedef to workaround NEW_C_HEAP_ARRAY macro, which can not deal
+    // with ','
+    typedef Stack<oop, mtGC> GCOopStack;
+
+    _overflow_stacks = NEW_C_HEAP_ARRAY(GCOopStack, ParallelGCThreads, mtGC);
     for (size_t i = 0; i < ParallelGCThreads; ++i) {
-      new (_overflow_stacks + i) Stack<oop>();
+      new (_overflow_stacks + i) Stack<oop, mtGC>();
     }
   }
 
@@ -916,7 +922,7 @@
     size_policy->minor_collection_begin();
   }
 
-  TraceTime t1("GC", PrintGC && !PrintGCDetails, true, gclog_or_tty);
+  TraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, gclog_or_tty);
   // Capture heap used before collection (for printing).
   size_t gch_prev_used = gch->used();
 
@@ -1401,7 +1407,7 @@
     assert(_num_par_pushes > 0, "Tautology");
 #endif
     if (from_space_obj->forwardee() == from_space_obj) {
-      oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
+      oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1, mtGC);
       listhead->forward_to(from_space_obj);
       from_space_obj = listhead;
     }
@@ -1553,7 +1559,7 @@
       // This can become a scaling bottleneck when there is work queue overflow coincident
       // with promotion failure.
       oopDesc* f = cur;
-      FREE_C_HEAP_ARRAY(oopDesc, f);
+      FREE_C_HEAP_ARRAY(oopDesc, f, mtGC);
     } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
       assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
       obj_to_push = cur;
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP
 
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/defNewGeneration.hpp"
 #include "utilities/taskqueue.hpp"
 
@@ -41,7 +41,7 @@
 // in genOopClosures.inline.hpp.
 
 typedef Padded<OopTaskQueue> ObjToScanQueue;
-typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
+typedef GenericTaskQueueSet<ObjToScanQueue, mtGC> ObjToScanQueueSet;
 
 class ParKeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
  private:
@@ -59,7 +59,7 @@
   friend class ParScanThreadStateSet;
  private:
   ObjToScanQueue *_work_queue;
-  Stack<oop>* const _overflow_stack;
+  Stack<oop, mtGC>* const _overflow_stack;
 
   ParGCAllocBuffer _to_space_alloc_buffer;
 
@@ -127,7 +127,7 @@
   ParScanThreadState(Space* to_space_, ParNewGeneration* gen_,
                      Generation* old_gen_, int thread_num_,
                      ObjToScanQueueSet* work_queue_set_,
-                     Stack<oop>* overflow_stacks_,
+                     Stack<oop, mtGC>* overflow_stacks_,
                      size_t desired_plab_sz_,
                      ParallelTaskTerminator& term_);
 
@@ -151,7 +151,7 @@
   void trim_queues(int max_size);
 
   // Private overflow stack usage
-  Stack<oop>* overflow_stack() { return _overflow_stack; }
+  Stack<oop, mtGC>* overflow_stack() { return _overflow_stack; }
   bool take_from_overflow_stack();
   void push_on_overflow_stack(oop p);
 
@@ -312,7 +312,7 @@
   ObjToScanQueueSet* _task_queues;
 
   // Per-worker-thread local overflow stacks
-  Stack<oop>* _overflow_stacks;
+  Stack<oop, mtGC>* _overflow_stacks;
 
   // Desired size of survivor space plab's
   PLABStats _plab_stats;
--- a/src/share/vm/gc_implementation/parNew/parOopClosures.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parOopClosures.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,7 @@
 class ParScanThreadState;
 class ParNewGeneration;
 typedef Padded<OopTaskQueue> ObjToScanQueue;
-typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
+typedef GenericTaskQueueSet<ObjToScanQueue, mtGC> ObjToScanQueueSet;
 class ParallelTaskTerminator;
 
 class ParScanClosure: public OopsInGenClosure {
--- a/src/share/vm/gc_implementation/parallelScavenge/adjoiningGenerations.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/adjoiningGenerations.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
 // must be shrunk.  Adjusting the boundary between the generations
 // is called for in this class.
 
-class AdjoiningGenerations : public CHeapObj {
+class AdjoiningGenerations : public CHeapObj<mtGC> {
   friend class VMStructs;
  private:
   // The young generation and old generation, respectively
--- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -42,7 +42,7 @@
 
  protected:
   template <class T> void do_oop_work(T* p) {
-    oop obj = oopDesc::load_decode_heap_oop_not_null(p);
+    oop obj = oopDesc::load_decode_heap_oop(p);
     if (_young_gen->is_in_reserved(obj) &&
         !_card_table->addr_is_marked_imprecise(p)) {
       // Don't overwrite the first missing card mark
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -116,7 +116,7 @@
 }
 
 GCTaskQueue* GCTaskQueue::create_on_c_heap() {
-  GCTaskQueue* result = new(ResourceObj::C_HEAP) GCTaskQueue(true);
+  GCTaskQueue* result = new(ResourceObj::C_HEAP, mtGC) GCTaskQueue(true);
   if (TraceGCTaskQueue) {
     tty->print_cr("GCTaskQueue::create_on_c_heap()"
                   " returns " INTPTR_FORMAT,
@@ -403,19 +403,19 @@
   _queue = SynchronizedGCTaskQueue::create(unsynchronized_queue, lock());
   _noop_task = NoopGCTask::create_on_c_heap();
   _idle_inactive_task = WaitForBarrierGCTask::create_on_c_heap();
-  _resource_flag = NEW_C_HEAP_ARRAY(bool, workers());
+  _resource_flag = NEW_C_HEAP_ARRAY(bool, workers(), mtGC);
   {
     // Set up worker threads.
     //     Distribute the workers among the available processors,
     //     unless we were told not to, or if the os doesn't want to.
-    uint* processor_assignment = NEW_C_HEAP_ARRAY(uint, workers());
+    uint* processor_assignment = NEW_C_HEAP_ARRAY(uint, workers(), mtGC);
     if (!BindGCTaskThreadsToCPUs ||
         !os::distribute_processes(workers(), processor_assignment)) {
       for (uint a = 0; a < workers(); a += 1) {
         processor_assignment[a] = sentinel_worker();
       }
     }
-    _thread = NEW_C_HEAP_ARRAY(GCTaskThread*, workers());
+    _thread = NEW_C_HEAP_ARRAY(GCTaskThread*, workers(), mtGC);
     for (uint t = 0; t < workers(); t += 1) {
       set_thread(t, GCTaskThread::create(this, t, processor_assignment[t]));
     }
@@ -426,7 +426,7 @@
       }
       tty->cr();
     }
-    FREE_C_HEAP_ARRAY(uint, processor_assignment);
+    FREE_C_HEAP_ARRAY(uint, processor_assignment, mtGC);
   }
   reset_busy_workers();
   set_unblocked();
@@ -455,11 +455,11 @@
       GCTaskThread::destroy(thread(i));
       set_thread(i, NULL);
     }
-    FREE_C_HEAP_ARRAY(GCTaskThread*, _thread);
+    FREE_C_HEAP_ARRAY(GCTaskThread*, _thread, mtGC);
     _thread = NULL;
   }
   if (_resource_flag != NULL) {
-    FREE_C_HEAP_ARRAY(bool, _resource_flag);
+    FREE_C_HEAP_ARRAY(bool, _resource_flag, mtGC);
     _resource_flag = NULL;
   }
   if (queue() != NULL) {
@@ -817,7 +817,7 @@
 }
 
 NoopGCTask* NoopGCTask::create_on_c_heap() {
-  NoopGCTask* result = new(ResourceObj::C_HEAP) NoopGCTask(true);
+  NoopGCTask* result = new(ResourceObj::C_HEAP, mtGC) NoopGCTask(true);
   return result;
 }
 
@@ -848,7 +848,7 @@
 }
 
 IdleGCTask* IdleGCTask::create_on_c_heap() {
-  IdleGCTask* result = new(ResourceObj::C_HEAP) IdleGCTask(true);
+  IdleGCTask* result = new(ResourceObj::C_HEAP, mtGC) IdleGCTask(true);
   assert(UseDynamicNumberOfGCThreads,
     "Should only be used with dynamic GC thread");
   return result;
@@ -984,7 +984,7 @@
 
 WaitForBarrierGCTask* WaitForBarrierGCTask::create_on_c_heap() {
   WaitForBarrierGCTask* result =
-    new (ResourceObj::C_HEAP) WaitForBarrierGCTask(true);
+    new (ResourceObj::C_HEAP, mtGC) WaitForBarrierGCTask(true);
   return result;
 }
 
@@ -1114,7 +1114,7 @@
     // Lazy initialization.
     if (freelist() == NULL) {
       _freelist =
-        new(ResourceObj::C_HEAP) GrowableArray<Monitor*>(ParallelGCThreads,
+        new(ResourceObj::C_HEAP, mtGC) GrowableArray<Monitor*>(ParallelGCThreads,
                                                          true);
     }
     if (! freelist()->is_empty()) {
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -216,7 +216,7 @@
 
 // A GCTaskQueue that can be synchronized.
 // This "has-a" GCTaskQueue and a mutex to do the exclusion.
-class SynchronizedGCTaskQueue : public CHeapObj {
+class SynchronizedGCTaskQueue : public CHeapObj<mtGC> {
 private:
   // Instance state.
   GCTaskQueue* _unsynchronized_queue;   // Has-a unsynchronized queue.
@@ -278,7 +278,7 @@
 
 // This is an abstract base class for getting notifications
 // when a GCTaskManager is done.
-class NotifyDoneClosure : public CHeapObj {
+class NotifyDoneClosure : public CHeapObj<mtGC> {
 public:
   // The notification callback method.
   virtual void notify(GCTaskManager* manager) = 0;
@@ -355,7 +355,7 @@
 // held in the GCTaskThread** _thread array in GCTaskManager.
 
 
-class GCTaskManager : public CHeapObj {
+class GCTaskManager : public CHeapObj<mtGC> {
  friend class ParCompactionManager;
  friend class PSParallelCompact;
  friend class PSScavenge;
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -46,7 +46,7 @@
     vm_exit_out_of_memory(0, "Cannot create GC thread. Out of system resources.");
 
   if (PrintGCTaskTimeStamps) {
-    _time_stamps = NEW_C_HEAP_ARRAY(GCTaskTimeStamp, GCTaskTimeStampEntries );
+    _time_stamps = NEW_C_HEAP_ARRAY(GCTaskTimeStamp, GCTaskTimeStampEntries, mtGC);
 
     guarantee(_time_stamps != NULL, "Sanity");
   }
@@ -56,7 +56,7 @@
 
 GCTaskThread::~GCTaskThread() {
   if (_time_stamps != NULL) {
-    FREE_C_HEAP_ARRAY(GCTaskTimeStamp, _time_stamps);
+    FREE_C_HEAP_ARRAY(GCTaskTimeStamp, _time_stamps, mtGC);
   }
 }
 
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -90,7 +90,7 @@
   void set_is_working(bool v) { _is_working = v; }
 };
 
-class GCTaskTimeStamp : public CHeapObj
+class GCTaskTimeStamp : public CHeapObj<mtGC>
 {
  private:
   jlong  _entry_time;
--- a/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -28,6 +28,7 @@
 #include "memory/cardTableModRefBS.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
+#include "services/memTracker.hpp"
 
 void ObjectStartArray::initialize(MemRegion reserved_region) {
   // We're based on the assumption that we use the same
@@ -50,6 +51,7 @@
   if (!backing_store.is_reserved()) {
     vm_exit_during_initialization("Could not reserve space for ObjectStartArray");
   }
+  MemTracker::record_virtual_memory_type((address)backing_store.base(), mtGC);
 
   // We do not commit any memory initially
   if (!_virtual_space.initialize(backing_store, 0)) {
@@ -57,10 +59,14 @@
   }
 
   _raw_base = (jbyte*)_virtual_space.low_boundary();
+
   if (_raw_base == NULL) {
     vm_exit_during_initialization("Could not get raw_base address");
   }
 
+  MemTracker::record_virtual_memory_type((address)_raw_base, mtGC);
+
+
   _offset_base = _raw_base - (size_t(reserved_region.start()) >> block_shift);
 
   _covered_region.set_start(reserved_region.start());
--- a/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,7 @@
 // covered region.
 //
 
-class ObjectStartArray : public CHeapObj {
+class ObjectStartArray : public CHeapObj<mtGC> {
  friend class VerifyObjectStartArrayClosure;
 
  private:
--- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,6 +29,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/os.hpp"
 #include "utilities/bitMap.inline.hpp"
+#include "services/memTracker.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
 #endif
@@ -61,6 +62,9 @@
   ReservedSpace rs(bytes, rs_align, rs_align > 0);
   os::trace_page_sizes("par bitmap", raw_bytes, raw_bytes, page_sz,
                        rs.base(), rs.size());
+
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+
   _virtual_space = new PSVirtualSpace(rs, page_sz);
   if (_virtual_space != NULL && _virtual_space->expand_by(bytes)) {
     _region_start = covered_region.start();
--- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,7 @@
 class oopDesc;
 class ParMarkBitMapClosure;
 
-class ParMarkBitMap: public CHeapObj
+class ParMarkBitMap: public CHeapObj<mtGC>
 {
 public:
   typedef BitMap::idx_t idx_t;
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,6 +40,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
 #include "runtime/vmThread.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/vmError.hpp"
 
 PSYoungGen*  ParallelScavengeHeap::_young_gen = NULL;
@@ -161,6 +162,8 @@
     }
   }
 
+  MemTracker::record_virtual_memory_type((address)heap_rs.base(), mtJavaHeap);
+
   os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz,
                        heap_rs.base(), pg_max_size);
   os::trace_page_sizes("ps main", og_min_size + yg_min_size,
@@ -911,23 +914,23 @@
 }
 
 
-void ParallelScavengeHeap::verify(bool allow_dirty, bool silent, VerifyOption option /* ignored */) {
+void ParallelScavengeHeap::verify(bool silent, VerifyOption option /* ignored */) {
   // Why do we need the total_collections()-filter below?
   if (total_collections() > 0) {
     if (!silent) {
       gclog_or_tty->print("permanent ");
     }
-    perm_gen()->verify(allow_dirty);
+    perm_gen()->verify();
 
     if (!silent) {
       gclog_or_tty->print("tenured ");
     }
-    old_gen()->verify(allow_dirty);
+    old_gen()->verify();
 
     if (!silent) {
       gclog_or_tty->print("eden ");
     }
-    young_gen()->verify(allow_dirty);
+    young_gen()->verify();
   }
 }
 
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -257,7 +257,7 @@
   virtual void gc_threads_do(ThreadClosure* tc) const;
   virtual void print_tracing_info() const;
 
-  void verify(bool allow_dirty, bool silent, VerifyOption option /* ignored */);
+  void verify(bool silent, VerifyOption option /* ignored */);
 
   void print_heap_change(size_t prev_used);
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -81,14 +81,14 @@
   uint parallel_gc_threads = PSParallelCompact::gc_task_manager()->workers();
 
   assert(_manager_array == NULL, "Attempt to initialize twice");
-  _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1 );
+  _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1, mtGC);
   guarantee(_manager_array != NULL, "Could not allocate manager_array");
 
   _region_list = NEW_C_HEAP_ARRAY(RegionTaskQueue*,
-                                         parallel_gc_threads+1);
+                         parallel_gc_threads+1, mtGC);
   guarantee(_region_list != NULL, "Could not initialize promotion manager");
 
-  _recycled_stack_index = NEW_C_HEAP_ARRAY(uint, parallel_gc_threads);
+  _recycled_stack_index = NEW_C_HEAP_ARRAY(uint, parallel_gc_threads, mtGC);
 
   // parallel_gc-threads + 1 to be consistent with the number of
   // compaction managers.
--- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -41,7 +41,7 @@
 class ParallelCompactData;
 class ParMarkBitMap;
 
-class ParCompactionManager : public CHeapObj {
+class ParCompactionManager : public CHeapObj<mtGC> {
   friend class ParallelTaskTerminator;
   friend class ParMarkBitMap;
   friend class PSParallelCompact;
@@ -66,8 +66,8 @@
  private:
   // 32-bit:  4K * 8 = 32KiB; 64-bit:  8K * 16 = 128KiB
   #define QUEUE_SIZE (1 << NOT_LP64(12) LP64_ONLY(13))
-  typedef OverflowTaskQueue<ObjArrayTask, QUEUE_SIZE> ObjArrayTaskQueue;
-  typedef GenericTaskQueueSet<ObjArrayTaskQueue>      ObjArrayTaskQueueSet;
+  typedef OverflowTaskQueue<ObjArrayTask, mtGC, QUEUE_SIZE> ObjArrayTaskQueue;
+  typedef GenericTaskQueueSet<ObjArrayTaskQueue, mtGC>      ObjArrayTaskQueueSet;
   #undef QUEUE_SIZE
 
   static ParCompactionManager** _manager_array;
@@ -78,7 +78,7 @@
   static PSOldGen*              _old_gen;
 
 private:
-  OverflowTaskQueue<oop>        _marking_stack;
+  OverflowTaskQueue<oop, mtGC>        _marking_stack;
   ObjArrayTaskQueue             _objarray_stack;
 
   // Is there a way to reuse the _marking_stack for the
@@ -110,8 +110,8 @@
   // popped.  If -1, there has not been any entry popped.
   static int                      _recycled_bottom;
 
-  Stack<Klass*>                 _revisit_klass_stack;
-  Stack<DataLayout*>            _revisit_mdo_stack;
+  Stack<Klass*, mtGC>                 _revisit_klass_stack;
+  Stack<DataLayout*, mtGC>            _revisit_mdo_stack;
 
   static ParMarkBitMap* _mark_bitmap;
 
@@ -126,7 +126,7 @@
  protected:
   // Array of tasks.  Needed by the ParallelTaskTerminator.
   static RegionTaskQueueSet* region_array()      { return _region_array; }
-  OverflowTaskQueue<oop>*  marking_stack()       { return &_marking_stack; }
+  OverflowTaskQueue<oop, mtGC>*  marking_stack()       { return &_marking_stack; }
 
   // Pushes onto the marking stack.  If the marking stack is full,
   // pushes onto the overflow stack.
@@ -175,8 +175,8 @@
   bool should_update();
   bool should_copy();
 
-  Stack<Klass*>* revisit_klass_stack() { return &_revisit_klass_stack; }
-  Stack<DataLayout*>* revisit_mdo_stack() { return &_revisit_mdo_stack; }
+  Stack<Klass*, mtGC>* revisit_klass_stack() { return &_revisit_klass_stack; }
+  Stack<DataLayout*, mtGC>* revisit_mdo_stack() { return &_revisit_mdo_stack; }
 
   // Save for later processing.  Must not fail.
   inline void push(oop obj) { _marking_stack.push(obj); }
--- a/src/share/vm/gc_implementation/parallelScavenge/psGenerationCounters.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psGenerationCounters.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
 
     const char* cns = PerfDataManager::name_space("generation", ordinal);
 
-    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
     strcpy(_name_space, cns);
 
     const char* cname = PerfDataManager::counter_name(_name_space, "name");
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -160,16 +160,10 @@
 
   {
     HandleMark hm;
-    const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc;
-    // This is useful for debugging but don't change the output the
-    // the customer sees.
-    const char* gc_cause_str = "Full GC";
-    if (is_system_gc && PrintGCDetails) {
-      gc_cause_str = "Full GC (System)";
-    }
+
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    TraceTime t1(gc_cause_str, PrintGC, !PrintGCDetails, gclog_or_tty);
+    TraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, gclog_or_tty);
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 
 class ObjectStartArray;
 
-class PSMarkSweepDecorator: public CHeapObj {
+class PSMarkSweepDecorator: public CHeapObj<mtGC> {
  private:
   static PSMarkSweepDecorator* _destination_decorator;
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -477,8 +477,8 @@
 }
 #endif
 
-void PSOldGen::verify(bool allow_dirty) {
-  object_space()->verify(allow_dirty);
+void PSOldGen::verify() {
+  object_space()->verify();
 }
 class VerifyObjectStartArrayClosure : public ObjectClosure {
   PSOldGen* _gen;
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
 
 class PSMarkSweepDecorator;
 
-class PSOldGen : public CHeapObj {
+class PSOldGen : public CHeapObj<mtGC> {
   friend class VMStructs;
   friend class PSPromotionManager; // Uses the cas_allocate methods
   friend class ParallelScavengeHeap;
@@ -174,7 +174,7 @@
   virtual void print_on(outputStream* st) const;
   void print_used_change(size_t prev_used) const;
 
-  void verify(bool allow_dirty);
+  void verify();
   void verify_object_start_array();
 
   // These should not used
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -53,6 +53,7 @@
 #include "runtime/vmThread.hpp"
 #include "services/management.hpp"
 #include "services/memoryService.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/events.hpp"
 #include "utilities/stack.inline.hpp"
 
@@ -405,6 +406,9 @@
   ReservedSpace rs(bytes, rs_align, rs_align > 0);
   os::trace_page_sizes("par compact", raw_bytes, raw_bytes, page_sz, rs.base(),
                        rs.size());
+
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+
   PSVirtualSpace* vspace = new PSVirtualSpace(rs, page_sz);
   if (vspace != 0) {
     if (vspace->expand_by(bytes)) {
@@ -2047,17 +2051,9 @@
     gc_task_manager()->task_idle_workers();
     heap->set_par_threads(gc_task_manager()->active_workers());
 
-    const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc;
-
-    // This is useful for debugging but don't change the output the
-    // the customer sees.
-    const char* gc_cause_str = "Full GC";
-    if (is_system_gc && PrintGCDetails) {
-      gc_cause_str = "Full GC (System)";
-    }
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    TraceTime t1(gc_cause_str, PrintGC, !PrintGCDetails, gclog_or_tty);
+    TraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, gclog_or_tty);
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
 
@@ -2090,7 +2086,8 @@
     }
 #endif  // #ifndef PRODUCT
 
-    bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc;
+    bool max_on_system_gc = UseMaximumCompactionOnSystemGC
+      && gc_cause == GCCause::_java_lang_system_gc;
     summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc);
 
     COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity"));
@@ -2739,7 +2736,7 @@
   for (uint i = 0; i < ParallelGCThreads + 1; i++) {
     ParCompactionManager* cm = ParCompactionManager::manager_array(i);
     KeepAliveClosure keep_alive_closure(cm);
-    Stack<Klass*>* const rks = cm->revisit_klass_stack();
+    Stack<Klass*, mtGC>* const rks = cm->revisit_klass_stack();
     if (PrintRevisitStats) {
       gclog_or_tty->print_cr("Revisit klass stack[%u] length = " SIZE_FORMAT,
                              i, rks->size());
@@ -2772,7 +2769,7 @@
   }
   for (uint i = 0; i < ParallelGCThreads + 1; i++) {
     ParCompactionManager* cm = ParCompactionManager::manager_array(i);
-    Stack<DataLayout*>* rms = cm->revisit_mdo_stack();
+    Stack<DataLayout*, mtGC>* rms = cm->revisit_mdo_stack();
     if (PrintRevisitStats) {
       gclog_or_tty->print_cr("Revisit MDO stack[%u] size = " SIZE_FORMAT,
                              i, rms->size());
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,7 +36,7 @@
 
 class ObjectStartArray;
 
-class PSPromotionLAB : public CHeapObj {
+class PSPromotionLAB : public CHeapObj<mtGC> {
  protected:
   static size_t filler_header_size;
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -45,7 +45,7 @@
   _young_space = heap->young_gen()->to_space();
 
   assert(_manager_array == NULL, "Attempt to initialize twice");
-  _manager_array = NEW_C_HEAP_ARRAY(PSPromotionManager*, ParallelGCThreads+1 );
+  _manager_array = NEW_C_HEAP_ARRAY(PSPromotionManager*, ParallelGCThreads+1, mtGC);
   guarantee(_manager_array != NULL, "Could not initialize promotion manager");
 
   _stack_array_depth = new OopStarTaskQueueSet(ParallelGCThreads);
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -49,7 +49,7 @@
 class PSOldGen;
 class ParCompactionManager;
 
-class PSPromotionManager : public CHeapObj {
+class PSPromotionManager : public CHeapObj<mtGC> {
   friend class PSScavenge;
   friend class PSRefProcTaskExecutor;
  private:
@@ -77,7 +77,7 @@
   bool                                _old_gen_is_full;
 
   OopStarTaskQueue                    _claimed_stack_depth;
-  OverflowTaskQueue<oop>              _claimed_stack_breadth;
+  OverflowTaskQueue<oop, mtGC>        _claimed_stack_breadth;
 
   bool                                _totally_drain;
   uint                                _target_stack_size;
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONMANAGER_INLINE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONMANAGER_INLINE_HPP
 
+#include "gc_implementation/parallelScavenge/psOldGen.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -62,8 +62,8 @@
 int                        PSScavenge::_tenuring_threshold = 0;
 HeapWord*                  PSScavenge::_young_generation_boundary = NULL;
 elapsedTimer               PSScavenge::_accumulated_time;
-Stack<markOop>             PSScavenge::_preserved_mark_stack;
-Stack<oop>                 PSScavenge::_preserved_oop_stack;
+Stack<markOop, mtGC>       PSScavenge::_preserved_mark_stack;
+Stack<oop, mtGC>           PSScavenge::_preserved_oop_stack;
 CollectorCounters*         PSScavenge::_counters = NULL;
 bool                       PSScavenge::_promotion_failed = false;
 
@@ -325,7 +325,7 @@
 
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    TraceTime t1("GC", PrintGC, !PrintGCDetails, gclog_or_tty);
+    TraceTime t1(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, gclog_or_tty);
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(false /* not full GC */,gc_cause);
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -71,8 +71,8 @@
   static HeapWord*           _young_generation_boundary; // The lowest address possible for the young_gen.
                                                          // This is used to decide if an oop should be scavenged,
                                                          // cards should be marked, etc.
-  static Stack<markOop>          _preserved_mark_stack; // List of marks to be restored after failed promotion
-  static Stack<oop>              _preserved_oop_stack;  // List of oops that need their mark restored.
+  static Stack<markOop, mtGC> _preserved_mark_stack; // List of marks to be restored after failed promotion
+  static Stack<oop, mtGC>     _preserved_oop_stack;  // List of oops that need their mark restored.
   static CollectorCounters*      _counters;         // collector performance counters
   static bool                    _promotion_failed;
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,7 @@
 // VirtualSpace is data structure for committing a previously reserved address
 // range in smaller chunks.
 
-class PSVirtualSpace : public CHeapObj {
+class PSVirtualSpace : public CHeapObj<mtGC> {
   friend class VMStructs;
  protected:
   // The space is committed/uncommited in chunks of size _alignment.  The
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -937,10 +937,10 @@
   }
 }
 
-void PSYoungGen::verify(bool allow_dirty) {
-  eden_space()->verify(allow_dirty);
-  from_space()->verify(allow_dirty);
-  to_space()->verify(allow_dirty);
+void PSYoungGen::verify() {
+  eden_space()->verify();
+  from_space()->verify();
+  to_space()->verify();
 }
 
 #ifndef PRODUCT
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,7 +33,7 @@
 
 class PSMarkSweepDecorator;
 
-class PSYoungGen : public CHeapObj {
+class PSYoungGen : public CHeapObj<mtGC> {
   friend class VMStructs;
   friend class ParallelScavengeHeap;
   friend class AdjoiningGenerations;
@@ -181,7 +181,7 @@
   void print_used_change(size_t prev_used) const;
   virtual const char* name() const { return "PSYoungGen"; }
 
-  void verify(bool allow_dirty);
+  void verify();
 
   // Space boundary invariant checker
   void space_invariants() PRODUCT_RETURN;
--- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -38,7 +38,7 @@
 class elapsedTimer;
 class CollectorPolicy;
 
-class AdaptiveSizePolicy : public CHeapObj {
+class AdaptiveSizePolicy : public CHeapObj<mtGC> {
  friend class GCAdaptivePolicyCounters;
  friend class PSGCAdaptivePolicyCounters;
  friend class CMSGCAdaptivePolicyCounters;
--- a/src/share/vm/gc_implementation/shared/allocationStats.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/allocationStats.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -39,7 +39,7 @@
   // We measure the demand between the end of the previous sweep and
   // beginning of this sweep:
   //   Count(end_last_sweep) - Count(start_this_sweep)
-  //     + splitBirths(between) - splitDeaths(between)
+  //     + split_births(between) - split_deaths(between)
   // The above number divided by the time since the end of the
   // previous sweep gives us a time rate of demand for blocks
   // of this size. We compute a padded average of this rate as
@@ -51,34 +51,34 @@
   AdaptivePaddedAverage _demand_rate_estimate;
 
   ssize_t     _desired;         // Demand stimate computed as described above
-  ssize_t     _coalDesired;     // desired +/- small-percent for tuning coalescing
+  ssize_t     _coal_desired;     // desired +/- small-percent for tuning coalescing
 
   ssize_t     _surplus;         // count - (desired +/- small-percent),
                                 // used to tune splitting in best fit
-  ssize_t     _bfrSurp;         // surplus at start of current sweep
-  ssize_t     _prevSweep;       // count from end of previous sweep
-  ssize_t     _beforeSweep;     // count from before current sweep
-  ssize_t     _coalBirths;      // additional chunks from coalescing
-  ssize_t     _coalDeaths;      // loss from coalescing
-  ssize_t     _splitBirths;     // additional chunks from splitting
-  ssize_t     _splitDeaths;     // loss from splitting
-  size_t      _returnedBytes;   // number of bytes returned to list.
+  ssize_t     _bfr_surp;         // surplus at start of current sweep
+  ssize_t     _prev_sweep;       // count from end of previous sweep
+  ssize_t     _before_sweep;     // count from before current sweep
+  ssize_t     _coal_births;      // additional chunks from coalescing
+  ssize_t     _coal_deaths;      // loss from coalescing
+  ssize_t     _split_births;     // additional chunks from splitting
+  ssize_t     _split_deaths;     // loss from splitting
+  size_t      _returned_bytes;   // number of bytes returned to list.
  public:
   void initialize(bool split_birth = false) {
     AdaptivePaddedAverage* dummy =
       new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight,
                                                          CMS_FLSPadding);
     _desired = 0;
-    _coalDesired = 0;
+    _coal_desired = 0;
     _surplus = 0;
-    _bfrSurp = 0;
-    _prevSweep = 0;
-    _beforeSweep = 0;
-    _coalBirths = 0;
-    _coalDeaths = 0;
-    _splitBirths = (split_birth ? 1 : 0);
-    _splitDeaths = 0;
-    _returnedBytes = 0;
+    _bfr_surp = 0;
+    _prev_sweep = 0;
+    _before_sweep = 0;
+    _coal_births = 0;
+    _coal_deaths = 0;
+    _split_births = (split_birth ? 1 : 0);
+    _split_deaths = 0;
+    _returned_bytes = 0;
   }
 
   AllocationStats() {
@@ -99,12 +99,12 @@
     // vulnerable to noisy glitches. In such cases, we
     // ignore the current sample and use currently available
     // historical estimates.
-    assert(prevSweep() + splitBirths() + coalBirths()        // "Total Production Stock"
-           >= splitDeaths() + coalDeaths() + (ssize_t)count, // "Current stock + depletion"
+    assert(prev_sweep() + split_births() + coal_births()        // "Total Production Stock"
+           >= split_deaths() + coal_deaths() + (ssize_t)count, // "Current stock + depletion"
            "Conservation Principle");
     if (inter_sweep_current > _threshold) {
-      ssize_t demand = prevSweep() - (ssize_t)count + splitBirths() + coalBirths()
-                       - splitDeaths() - coalDeaths();
+      ssize_t demand = prev_sweep() - (ssize_t)count + split_births() + coal_births()
+                       - split_deaths() - coal_deaths();
       assert(demand >= 0,
              err_msg("Demand (" SSIZE_FORMAT ") should be non-negative for "
                      PTR_FORMAT " (size=" SIZE_FORMAT ")",
@@ -130,40 +130,40 @@
   ssize_t desired() const { return _desired; }
   void set_desired(ssize_t v) { _desired = v; }
 
-  ssize_t coalDesired() const { return _coalDesired; }
-  void set_coalDesired(ssize_t v) { _coalDesired = v; }
+  ssize_t coal_desired() const { return _coal_desired; }
+  void set_coal_desired(ssize_t v) { _coal_desired = v; }
 
   ssize_t surplus() const { return _surplus; }
   void set_surplus(ssize_t v) { _surplus = v; }
   void increment_surplus() { _surplus++; }
   void decrement_surplus() { _surplus--; }
 
-  ssize_t bfrSurp() const { return _bfrSurp; }
-  void set_bfrSurp(ssize_t v) { _bfrSurp = v; }
-  ssize_t prevSweep() const { return _prevSweep; }
-  void set_prevSweep(ssize_t v) { _prevSweep = v; }
-  ssize_t beforeSweep() const { return _beforeSweep; }
-  void set_beforeSweep(ssize_t v) { _beforeSweep = v; }
+  ssize_t bfr_surp() const { return _bfr_surp; }
+  void set_bfr_surp(ssize_t v) { _bfr_surp = v; }
+  ssize_t prev_sweep() const { return _prev_sweep; }
+  void set_prev_sweep(ssize_t v) { _prev_sweep = v; }
+  ssize_t before_sweep() const { return _before_sweep; }
+  void set_before_sweep(ssize_t v) { _before_sweep = v; }
 
-  ssize_t coalBirths() const { return _coalBirths; }
-  void set_coalBirths(ssize_t v) { _coalBirths = v; }
-  void increment_coalBirths() { _coalBirths++; }
+  ssize_t coal_births() const { return _coal_births; }
+  void set_coal_births(ssize_t v) { _coal_births = v; }
+  void increment_coal_births() { _coal_births++; }
 
-  ssize_t coalDeaths() const { return _coalDeaths; }
-  void set_coalDeaths(ssize_t v) { _coalDeaths = v; }
-  void increment_coalDeaths() { _coalDeaths++; }
+  ssize_t coal_deaths() const { return _coal_deaths; }
+  void set_coal_deaths(ssize_t v) { _coal_deaths = v; }
+  void increment_coal_deaths() { _coal_deaths++; }
 
-  ssize_t splitBirths() const { return _splitBirths; }
-  void set_splitBirths(ssize_t v) { _splitBirths = v; }
-  void increment_splitBirths() { _splitBirths++; }
+  ssize_t split_births() const { return _split_births; }
+  void set_split_births(ssize_t v) { _split_births = v; }
+  void increment_split_births() { _split_births++; }
 
-  ssize_t splitDeaths() const { return _splitDeaths; }
-  void set_splitDeaths(ssize_t v) { _splitDeaths = v; }
-  void increment_splitDeaths() { _splitDeaths++; }
+  ssize_t split_deaths() const { return _split_deaths; }
+  void set_split_deaths(ssize_t v) { _split_deaths = v; }
+  void increment_split_deaths() { _split_deaths++; }
 
   NOT_PRODUCT(
-    size_t returnedBytes() const { return _returnedBytes; }
-    void set_returnedBytes(size_t v) { _returnedBytes = v; }
+    size_t returned_bytes() const { return _returned_bytes; }
+    void set_returned_bytes(size_t v) { _returned_bytes = v; }
   )
 };
 
--- a/src/share/vm/gc_implementation/shared/cSpaceCounters.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/cSpaceCounters.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -37,7 +37,7 @@
     const char* cns = PerfDataManager::name_space(gc->name_space(), "space",
                                                   ordinal);
 
-    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
     strcpy(_name_space, cns);
 
     const char* cname = PerfDataManager::counter_name(_name_space, "name");
--- a/src/share/vm/gc_implementation/shared/cSpaceCounters.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/cSpaceCounters.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,7 @@
 // A CSpaceCounters is a holder class for performance counters
 // that track a space;
 
-class CSpaceCounters: public CHeapObj {
+class CSpaceCounters: public CHeapObj<mtGC> {
   friend class VMStructs;
 
  private:
@@ -52,7 +52,7 @@
                  ContiguousSpace* s, GenerationCounters* gc);
 
   ~CSpaceCounters() {
-      if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+      if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space, mtInternal);
   }
 
   inline void update_capacity() {
--- a/src/share/vm/gc_implementation/shared/collectorCounters.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/collectorCounters.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 
     const char* cns = PerfDataManager::name_space("collector", ordinal);
 
-    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
     strcpy(_name_space, cns);
 
     char* cname = PerfDataManager::counter_name(_name_space, "name");
--- a/src/share/vm/gc_implementation/shared/collectorCounters.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/collectorCounters.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,7 +30,7 @@
 // CollectorCounters is a holder class for performance counters
 // that track a collector
 
-class CollectorCounters: public CHeapObj {
+class CollectorCounters: public CHeapObj<mtGC> {
   friend class VMStructs;
 
   private:
@@ -50,7 +50,7 @@
     CollectorCounters(const char* name, int ordinal);
 
     ~CollectorCounters() {
-      if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+      if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space, mtGC);
     }
 
     inline PerfCounter* invocation_counter() const  { return _invocations; }
--- a/src/share/vm/gc_implementation/shared/gSpaceCounters.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/gSpaceCounters.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -41,7 +41,7 @@
     const char* cns = PerfDataManager::name_space(gc->name_space(), "space",
                                                   ordinal);
 
-    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
     strcpy(_name_space, cns);
 
     const char* cname = PerfDataManager::counter_name(_name_space, "name");
--- a/src/share/vm/gc_implementation/shared/gSpaceCounters.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/gSpaceCounters.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,7 +34,7 @@
 // A GSpaceCounter is a holder class for performance counters
 // that track a space;
 
-class GSpaceCounters: public CHeapObj {
+class GSpaceCounters: public CHeapObj<mtGC> {
   friend class VMStructs;
 
  private:
@@ -54,7 +54,7 @@
                  GenerationCounters* gc, bool sampled=true);
 
   ~GSpaceCounters() {
-    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space, mtGC);
   }
 
   inline void update_capacity() {
--- a/src/share/vm/gc_implementation/shared/gcPolicyCounters.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/gcPolicyCounters.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,7 +30,7 @@
 // GCPolicyCounters is a holder class for performance counters
 // that track a generation
 
-class GCPolicyCounters: public CHeapObj {
+class GCPolicyCounters: public CHeapObj<mtGC> {
   friend class VMStructs;
 
   private:
--- a/src/share/vm/gc_implementation/shared/gcStats.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/gcStats.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,7 +27,7 @@
 
 #include "gc_implementation/shared/gcUtil.hpp"
 
-class GCStats : public CHeapObj {
+class GCStats : public CHeapObj<mtGC> {
  protected:
   // Avg amount promoted; used for avoiding promotion undo
   // This class does not update deviations if the sample is zero.
--- a/src/share/vm/gc_implementation/shared/gcUtil.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/gcUtil.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,7 +43,7 @@
 //
 // This serves as our best estimate of a future unknown.
 //
-class AdaptiveWeightedAverage : public CHeapObj {
+class AdaptiveWeightedAverage : public CHeapObj<mtGC> {
  private:
   float            _average;        // The last computed average
   unsigned         _sample_count;   // How often we've sampled this average
@@ -146,7 +146,7 @@
   // Placement support
   void* operator new(size_t ignored, void* p) { return p; }
   // Allocator
-  void* operator new(size_t size) { return CHeapObj::operator new(size); }
+  void* operator new(size_t size) { return CHeapObj<mtGC>::operator new(size); }
 
   // Accessor
   float padded_average() const         { return _padded_avg; }
@@ -192,7 +192,7 @@
 // equation.
 //              y = intercept + slope * x
 
-class LinearLeastSquareFit : public CHeapObj {
+class LinearLeastSquareFit : public CHeapObj<mtGC> {
   double _sum_x;        // sum of all independent data points x
   double _sum_x_squared; // sum of all independent data points x**2
   double _sum_y;        // sum of all dependent data points y
--- a/src/share/vm/gc_implementation/shared/generationCounters.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/generationCounters.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,7 @@
 
     const char* cns = PerfDataManager::name_space("generation", ordinal);
 
-    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
     strcpy(_name_space, cns);
 
     const char* cname = PerfDataManager::counter_name(_name_space, "name");
--- a/src/share/vm/gc_implementation/shared/generationCounters.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/generationCounters.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,7 +31,7 @@
 // A GenerationCounter is a holder class for performance counters
 // that track a generation
 
-class GenerationCounters: public CHeapObj {
+class GenerationCounters: public CHeapObj<mtGC> {
   friend class VMStructs;
 
 private:
@@ -69,7 +69,7 @@
                      VirtualSpace* v);
 
   ~GenerationCounters() {
-    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space, mtGC);
   }
 
   virtual void update_all();
--- a/src/share/vm/gc_implementation/shared/hSpaceCounters.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/hSpaceCounters.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
     const char* cns =
       PerfDataManager::name_space(gc->name_space(), "space", ordinal);
 
-    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
     strcpy(_name_space, cns);
 
     const char* cname = PerfDataManager::counter_name(_name_space, "name");
--- a/src/share/vm/gc_implementation/shared/hSpaceCounters.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/hSpaceCounters.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -37,7 +37,7 @@
 class HeapSpaceUsedHelper;
 class G1SpaceMonitoringSupport;
 
-class HSpaceCounters: public CHeapObj {
+class HSpaceCounters: public CHeapObj<mtGC> {
   friend class VMStructs;
 
  private:
@@ -55,7 +55,7 @@
                  size_t initial_capacity, GenerationCounters* gc);
 
   ~HSpaceCounters() {
-    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space, mtGC);
   }
 
   inline void update_capacity(size_t v) {
--- a/src/share/vm/gc_implementation/shared/immutableSpace.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/immutableSpace.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +70,7 @@
 
 #endif
 
-void ImmutableSpace::verify(bool allow_dirty) {
+void ImmutableSpace::verify() {
   HeapWord* p = bottom();
   HeapWord* t = end();
   HeapWord* prev_p = NULL;
--- a/src/share/vm/gc_implementation/shared/immutableSpace.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/immutableSpace.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,7 +33,7 @@
 // Invariant: bottom() and end() are on page_size boundaries and
 // bottom() <= end()
 
-class ImmutableSpace: public CHeapObj {
+class ImmutableSpace: public CHeapObj<mtGC> {
   friend class VMStructs;
  protected:
   HeapWord* _bottom;
@@ -65,7 +65,7 @@
   // Debugging
   virtual void print() const            PRODUCT_RETURN;
   virtual void print_short() const      PRODUCT_RETURN;
-  virtual void verify(bool allow_dirty);
+  virtual void verify();
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_IMMUTABLESPACE_HPP
--- a/src/share/vm/gc_implementation/shared/markSweep.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/markSweep.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,13 +30,13 @@
 #include "oops/objArrayKlass.inline.hpp"
 #include "oops/oop.inline.hpp"
 
-Stack<oop>              MarkSweep::_marking_stack;
-Stack<DataLayout*>      MarkSweep::_revisit_mdo_stack;
-Stack<Klass*>           MarkSweep::_revisit_klass_stack;
-Stack<ObjArrayTask>     MarkSweep::_objarray_stack;
+Stack<oop, mtGC>              MarkSweep::_marking_stack;
+Stack<DataLayout*, mtGC>      MarkSweep::_revisit_mdo_stack;
+Stack<Klass*, mtGC>           MarkSweep::_revisit_klass_stack;
+Stack<ObjArrayTask, mtGC>     MarkSweep::_objarray_stack;
 
-Stack<oop>              MarkSweep::_preserved_oop_stack;
-Stack<markOop>          MarkSweep::_preserved_mark_stack;
+Stack<oop, mtGC>              MarkSweep::_preserved_oop_stack;
+Stack<markOop, mtGC>          MarkSweep::_preserved_mark_stack;
 size_t                  MarkSweep::_preserved_count = 0;
 size_t                  MarkSweep::_preserved_count_max = 0;
 PreservedMark*          MarkSweep::_preserved_marks = NULL;
@@ -166,7 +166,7 @@
   }
 
   // deal with the overflow stack
-  StackIterator<oop> iter(_preserved_oop_stack);
+  StackIterator<oop, mtGC> iter(_preserved_oop_stack);
   while (!iter.is_empty()) {
     oop* p = iter.next_addr();
     adjust_pointer(p);
--- a/src/share/vm/gc_implementation/shared/markSweep.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/markSweep.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -122,16 +122,16 @@
   //
  protected:
   // Traversal stacks used during phase1
-  static Stack<oop>                      _marking_stack;
-  static Stack<ObjArrayTask>             _objarray_stack;
+  static Stack<oop, mtGC>                      _marking_stack;
+  static Stack<ObjArrayTask, mtGC>             _objarray_stack;
   // Stack for live klasses to revisit at end of marking phase
-  static Stack<Klass*>                   _revisit_klass_stack;
+  static Stack<Klass*, mtGC>                   _revisit_klass_stack;
   // Set (stack) of MDO's to revisit at end of marking phase
-  static Stack<DataLayout*>              _revisit_mdo_stack;
+  static Stack<DataLayout*, mtGC>              _revisit_mdo_stack;
 
   // Space for storing/restoring mark word
-  static Stack<markOop>                  _preserved_mark_stack;
-  static Stack<oop>                      _preserved_oop_stack;
+  static Stack<markOop, mtGC>                  _preserved_mark_stack;
+  static Stack<oop, mtGC>                      _preserved_oop_stack;
   static size_t                          _preserved_count;
   static size_t                          _preserved_count_max;
   static PreservedMark*                  _preserved_marks;
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,7 +43,7 @@
 
 
 MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment) {
-  _lgrp_spaces = new (ResourceObj::C_HEAP) GrowableArray<LGRPSpace*>(0, true);
+  _lgrp_spaces = new (ResourceObj::C_HEAP, mtGC) GrowableArray<LGRPSpace*>(0, true);
   _page_size = os::vm_page_size();
   _adaptation_cycles = 0;
   _samples_count = 0;
@@ -231,7 +231,7 @@
   if (force || changed) {
     // Compute lgrp intersection. Add/remove spaces.
     int lgrp_limit = (int)os::numa_get_groups_num();
-    int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit);
+    int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit, mtGC);
     int lgrp_num = (int)os::numa_get_leaf_groups(lgrp_ids, lgrp_limit);
     assert(lgrp_num > 0, "There should be at least one locality group");
     // Add new spaces for the new nodes
@@ -265,7 +265,7 @@
       }
     }
 
-    FREE_C_HEAP_ARRAY(int, lgrp_ids);
+    FREE_C_HEAP_ARRAY(int, lgrp_ids, mtGC);
 
     if (changed) {
       for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
@@ -891,12 +891,12 @@
   }
 }
 
-void MutableNUMASpace::verify(bool allow_dirty) {
+void MutableNUMASpace::verify() {
   // This can be called after setting an arbitary value to the space's top,
   // so an object can cross the chunk boundary. We ensure the parsablity
   // of the space and just walk the objects in linear fashion.
   ensure_parsability();
-  MutableSpace::verify(allow_dirty);
+  MutableSpace::verify();
 }
 
 // Scan pages and gather stats about page placement and size.
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,7 +63,7 @@
 class MutableNUMASpace : public MutableSpace {
   friend class VMStructs;
 
-  class LGRPSpace : public CHeapObj {
+  class LGRPSpace : public CHeapObj<mtGC> {
     int _lgrp_id;
     MutableSpace* _space;
     MemRegion _invalid_region;
@@ -225,7 +225,7 @@
   // Debugging
   virtual void print_on(outputStream* st) const;
   virtual void print_short_on(outputStream* st) const;
-  virtual void verify(bool allow_dirty);
+  virtual void verify();
 
   virtual void set_top(HeapWord* value);
 };
--- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -246,7 +246,7 @@
                  bottom(), top(), end());
 }
 
-void MutableSpace::verify(bool allow_dirty) {
+void MutableSpace::verify() {
   HeapWord* p = bottom();
   HeapWord* t = top();
   HeapWord* prev_p = NULL;
--- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -141,7 +141,7 @@
   virtual void print_on(outputStream* st) const;
   virtual void print_short() const;
   virtual void print_short_on(outputStream* st) const;
-  virtual void verify(bool allow_dirty);
+  virtual void verify();
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLESPACE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "memory/sharedHeap.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/oop.inline.hpp"
+
+ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) :
+  _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL),
+  _end(NULL), _hard_end(NULL),
+  _retained(false), _retained_filler(),
+  _allocated(0), _wasted(0)
+{
+  assert (min_size() > AlignmentReserve, "Inconsistency!");
+  // arrayOopDesc::header_size depends on command line initialization.
+  FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT));
+  AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0;
+}
+
+size_t ParGCAllocBuffer::FillerHeaderSize;
+
+// If the minimum object size is greater than MinObjAlignment, we can
+// end up with a shard at the end of the buffer that's smaller than
+// the smallest object.  We can't allow that because the buffer must
+// look like it's full of objects when we retire it, so we make
+// sure we have enough space for a filler int array object.
+size_t ParGCAllocBuffer::AlignmentReserve;
+
+void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) {
+  assert(!retain || end_of_gc, "Can only retain at GC end.");
+  if (_retained) {
+    // If the buffer had been retained shorten the previous filler object.
+    assert(_retained_filler.end() <= _top, "INVARIANT");
+    CollectedHeap::fill_with_object(_retained_filler);
+    // Wasted space book-keeping, otherwise (normally) done in invalidate()
+    _wasted += _retained_filler.word_size();
+    _retained = false;
+  }
+  assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained.");
+  if (_top < _hard_end) {
+    CollectedHeap::fill_with_object(_top, _hard_end);
+    if (!retain) {
+      invalidate();
+    } else {
+      // Is there wasted space we'd like to retain for the next GC?
+      if (pointer_delta(_end, _top) > FillerHeaderSize) {
+        _retained = true;
+        _retained_filler = MemRegion(_top, FillerHeaderSize);
+        _top = _top + FillerHeaderSize;
+      } else {
+        invalidate();
+      }
+    }
+  }
+}
+
+void ParGCAllocBuffer::flush_stats(PLABStats* stats) {
+  assert(ResizePLAB, "Wasted work");
+  stats->add_allocated(_allocated);
+  stats->add_wasted(_wasted);
+  stats->add_unused(pointer_delta(_end, _top));
+}
+
+// Compute desired plab size and latch result for later
+// use. This should be called once at the end of parallel
+// scavenge; it clears the sensor accumulators.
+void PLABStats::adjust_desired_plab_sz() {
+  assert(ResizePLAB, "Not set");
+  if (_allocated == 0) {
+    assert(_unused == 0, "Inconsistency in PLAB stats");
+    _allocated = 1;
+  }
+  double wasted_frac    = (double)_unused/(double)_allocated;
+  size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/
+                                   TargetPLABWastePct);
+  if (target_refills == 0) {
+    target_refills = 1;
+  }
+  _used = _allocated - _wasted - _unused;
+  size_t plab_sz = _used/(target_refills*ParallelGCThreads);
+  if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz);
+  // Take historical weighted average
+  _filter.sample(plab_sz);
+  // Clip from above and below, and align to object boundary
+  plab_sz = MAX2(min_size(), (size_t)_filter.average());
+  plab_sz = MIN2(max_size(), plab_sz);
+  plab_sz = align_object_size(plab_sz);
+  // Latch the result
+  if (PrintPLAB) gclog_or_tty->print(" desired_plab_sz = %d) ", plab_sz);
+  _desired_plab_sz = plab_sz;
+  // Now clear the accumulators for next round:
+  // note this needs to be fixed in the case where we
+  // are retaining across scavenges. FIX ME !!! XXX
+  _allocated = 0;
+  _wasted    = 0;
+  _unused    = 0;
+}
+
+#ifndef PRODUCT
+void ParGCAllocBuffer::print() {
+  gclog_or_tty->print("parGCAllocBuffer: _bottom: %p  _top: %p  _end: %p  _hard_end: %p"
+             "_retained: %c _retained_filler: [%p,%p)\n",
+             _bottom, _top, _end, _hard_end,
+             "FT"[_retained], _retained_filler.start(), _retained_filler.end());
+}
+#endif // !PRODUCT
+
+const size_t ParGCAllocBufferWithBOT::ChunkSizeInWords =
+MIN2(CardTableModRefBS::par_chunk_heapword_alignment(),
+     ((size_t)Generation::GenGrain)/HeapWordSize);
+const size_t ParGCAllocBufferWithBOT::ChunkSizeInBytes =
+MIN2(CardTableModRefBS::par_chunk_heapword_alignment() * HeapWordSize,
+     (size_t)Generation::GenGrain);
+
+ParGCAllocBufferWithBOT::ParGCAllocBufferWithBOT(size_t word_sz,
+                                                 BlockOffsetSharedArray* bsa) :
+  ParGCAllocBuffer(word_sz),
+  _bsa(bsa),
+  _bt(bsa, MemRegion(_bottom, _hard_end)),
+  _true_end(_hard_end)
+{}
+
+// The buffer comes with its own BOT, with a shared (obviously) underlying
+// BlockOffsetSharedArray. We manipulate this BOT in the normal way
+// as we would for any contiguous space. However, on accasion we
+// need to do some buffer surgery at the extremities before we
+// start using the body of the buffer for allocations. Such surgery
+// (as explained elsewhere) is to prevent allocation on a card that
+// is in the process of being walked concurrently by another GC thread.
+// When such surgery happens at a point that is far removed (to the
+// right of the current allocation point, top), we use the "contig"
+// parameter below to directly manipulate the shared array without
+// modifying the _next_threshold state in the BOT.
+void ParGCAllocBufferWithBOT::fill_region_with_block(MemRegion mr,
+                                                     bool contig) {
+  CollectedHeap::fill_with_object(mr);
+  if (contig) {
+    _bt.alloc_block(mr.start(), mr.end());
+  } else {
+    _bt.BlockOffsetArray::alloc_block(mr.start(), mr.end());
+  }
+}
+
+HeapWord* ParGCAllocBufferWithBOT::allocate_slow(size_t word_sz) {
+  HeapWord* res = NULL;
+  if (_true_end > _hard_end) {
+    assert((HeapWord*)align_size_down(intptr_t(_hard_end),
+                                      ChunkSizeInBytes) == _hard_end,
+           "or else _true_end should be equal to _hard_end");
+    assert(_retained, "or else _true_end should be equal to _hard_end");
+    assert(_retained_filler.end() <= _top, "INVARIANT");
+    CollectedHeap::fill_with_object(_retained_filler);
+    if (_top < _hard_end) {
+      fill_region_with_block(MemRegion(_top, _hard_end), true);
+    }
+    HeapWord* next_hard_end = MIN2(_true_end, _hard_end + ChunkSizeInWords);
+    _retained_filler = MemRegion(_hard_end, FillerHeaderSize);
+    _bt.alloc_block(_retained_filler.start(), _retained_filler.word_size());
+    _top      = _retained_filler.end();
+    _hard_end = next_hard_end;
+    _end      = _hard_end - AlignmentReserve;
+    res       = ParGCAllocBuffer::allocate(word_sz);
+    if (res != NULL) {
+      _bt.alloc_block(res, word_sz);
+    }
+  }
+  return res;
+}
+
+void
+ParGCAllocBufferWithBOT::undo_allocation(HeapWord* obj, size_t word_sz) {
+  ParGCAllocBuffer::undo_allocation(obj, word_sz);
+  // This may back us up beyond the previous threshold, so reset.
+  _bt.set_region(MemRegion(_top, _hard_end));
+  _bt.initialize_threshold();
+}
+
+void ParGCAllocBufferWithBOT::retire(bool end_of_gc, bool retain) {
+  assert(!retain || end_of_gc, "Can only retain at GC end.");
+  if (_retained) {
+    // We're about to make the retained_filler into a block.
+    _bt.BlockOffsetArray::alloc_block(_retained_filler.start(),
+                                      _retained_filler.end());
+  }
+  // Reset _hard_end to _true_end (and update _end)
+  if (retain && _hard_end != NULL) {
+    assert(_hard_end <= _true_end, "Invariant.");
+    _hard_end = _true_end;
+    _end      = MAX2(_top, _hard_end - AlignmentReserve);
+    assert(_end <= _hard_end, "Invariant.");
+  }
+  _true_end = _hard_end;
+  HeapWord* pre_top = _top;
+
+  ParGCAllocBuffer::retire(end_of_gc, retain);
+  // Now any old _retained_filler is cut back to size, the free part is
+  // filled with a filler object, and top is past the header of that
+  // object.
+
+  if (retain && _top < _end) {
+    assert(end_of_gc && retain, "Or else retain should be false.");
+    // If the lab does not start on a card boundary, we don't want to
+    // allocate onto that card, since that might lead to concurrent
+    // allocation and card scanning, which we don't support.  So we fill
+    // the first card with a garbage object.
+    size_t first_card_index = _bsa->index_for(pre_top);
+    HeapWord* first_card_start = _bsa->address_for_index(first_card_index);
+    if (first_card_start < pre_top) {
+      HeapWord* second_card_start =
+        _bsa->inc_by_region_size(first_card_start);
+
+      // Ensure enough room to fill with the smallest block
+      second_card_start = MAX2(second_card_start, pre_top + AlignmentReserve);
+
+      // If the end is already in the first card, don't go beyond it!
+      // Or if the remainder is too small for a filler object, gobble it up.
+      if (_hard_end < second_card_start ||
+          pointer_delta(_hard_end, second_card_start) < AlignmentReserve) {
+        second_card_start = _hard_end;
+      }
+      if (pre_top < second_card_start) {
+        MemRegion first_card_suffix(pre_top, second_card_start);
+        fill_region_with_block(first_card_suffix, true);
+      }
+      pre_top = second_card_start;
+      _top = pre_top;
+      _end = MAX2(_top, _hard_end - AlignmentReserve);
+    }
+
+    // If the lab does not end on a card boundary, we don't want to
+    // allocate onto that card, since that might lead to concurrent
+    // allocation and card scanning, which we don't support.  So we fill
+    // the last card with a garbage object.
+    size_t last_card_index = _bsa->index_for(_hard_end);
+    HeapWord* last_card_start = _bsa->address_for_index(last_card_index);
+    if (last_card_start < _hard_end) {
+
+      // Ensure enough room to fill with the smallest block
+      last_card_start = MIN2(last_card_start, _hard_end - AlignmentReserve);
+
+      // If the top is already in the last card, don't go back beyond it!
+      // Or if the remainder is too small for a filler object, gobble it up.
+      if (_top > last_card_start ||
+          pointer_delta(last_card_start, _top) < AlignmentReserve) {
+        last_card_start = _top;
+      }
+      if (last_card_start < _hard_end) {
+        MemRegion last_card_prefix(last_card_start, _hard_end);
+        fill_region_with_block(last_card_prefix, false);
+      }
+      _hard_end = last_card_start;
+      _end      = MAX2(_top, _hard_end - AlignmentReserve);
+      _true_end = _hard_end;
+      assert(_end <= _hard_end, "Invariant.");
+    }
+
+    // At this point:
+    //   1) we had a filler object from the original top to hard_end.
+    //   2) We've filled in any partial cards at the front and back.
+    if (pre_top < _hard_end) {
+      // Now we can reset the _bt to do allocation in the given area.
+      MemRegion new_filler(pre_top, _hard_end);
+      fill_region_with_block(new_filler, false);
+      _top = pre_top + ParGCAllocBuffer::FillerHeaderSize;
+      // If there's no space left, don't retain.
+      if (_top >= _end) {
+        _retained = false;
+        invalidate();
+        return;
+      }
+      _retained_filler = MemRegion(pre_top, _top);
+      _bt.set_region(MemRegion(_top, _hard_end));
+      _bt.initialize_threshold();
+      assert(_bt.threshold() > _top, "initialize_threshold failed!");
+
+      // There may be other reasons for queries into the middle of the
+      // filler object.  When such queries are done in parallel with
+      // allocation, bad things can happen, if the query involves object
+      // iteration.  So we ensure that such queries do not involve object
+      // iteration, by putting another filler object on the boundaries of
+      // such queries.  One such is the object spanning a parallel card
+      // chunk boundary.
+
+      // "chunk_boundary" is the address of the first chunk boundary less
+      // than "hard_end".
+      HeapWord* chunk_boundary =
+        (HeapWord*)align_size_down(intptr_t(_hard_end-1), ChunkSizeInBytes);
+      assert(chunk_boundary < _hard_end, "Or else above did not work.");
+      assert(pointer_delta(_true_end, chunk_boundary) >= AlignmentReserve,
+             "Consequence of last card handling above.");
+
+      if (_top <= chunk_boundary) {
+        assert(_true_end == _hard_end, "Invariant.");
+        while (_top <= chunk_boundary) {
+          assert(pointer_delta(_hard_end, chunk_boundary) >= AlignmentReserve,
+                 "Consequence of last card handling above.");
+          _bt.BlockOffsetArray::alloc_block(chunk_boundary, _hard_end);
+          CollectedHeap::fill_with_object(chunk_boundary, _hard_end);
+          _hard_end = chunk_boundary;
+          chunk_boundary -= ChunkSizeInWords;
+        }
+        _end = _hard_end - AlignmentReserve;
+        assert(_top <= _end, "Invariant.");
+        // Now reset the initial filler chunk so it doesn't overlap with
+        // the one(s) inserted above.
+        MemRegion new_filler(pre_top, _hard_end);
+        fill_region_with_block(new_filler, false);
+      }
+    } else {
+      _retained = false;
+      invalidate();
+    }
+  } else {
+    assert(!end_of_gc ||
+           (!_retained && _true_end == _hard_end), "Checking.");
+  }
+  assert(_end <= _hard_end, "Invariant.");
+  assert(_top < _end || _top == _hard_end, "Invariant");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/blockOffsetTable.hpp"
+#include "memory/threadLocalAllocBuffer.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Forward decl.
+
+class PLABStats;
+
+// A per-thread allocation buffer used during GC.
+class ParGCAllocBuffer: public CHeapObj<mtGC> {
+protected:
+  char head[32];
+  size_t _word_sz;          // in HeapWord units
+  HeapWord* _bottom;
+  HeapWord* _top;
+  HeapWord* _end;       // last allocatable address + 1
+  HeapWord* _hard_end;  // _end + AlignmentReserve
+  bool      _retained;  // whether we hold a _retained_filler
+  MemRegion _retained_filler;
+  // In support of ergonomic sizing of PLAB's
+  size_t    _allocated;     // in HeapWord units
+  size_t    _wasted;        // in HeapWord units
+  char tail[32];
+  static size_t FillerHeaderSize;
+  static size_t AlignmentReserve;
+
+public:
+  // Initializes the buffer to be empty, but with the given "word_sz".
+  // Must get initialized with "set_buf" for an allocation to succeed.
+  ParGCAllocBuffer(size_t word_sz);
+
+  static const size_t min_size() {
+    return ThreadLocalAllocBuffer::min_size();
+  }
+
+  static const size_t max_size() {
+    return ThreadLocalAllocBuffer::max_size();
+  }
+
+  // If an allocation of the given "word_sz" can be satisfied within the
+  // buffer, do the allocation, returning a pointer to the start of the
+  // allocated block.  If the allocation request cannot be satisfied,
+  // return NULL.
+  HeapWord* allocate(size_t word_sz) {
+    HeapWord* res = _top;
+    if (pointer_delta(_end, _top) >= word_sz) {
+      _top = _top + word_sz;
+      return res;
+    } else {
+      return NULL;
+    }
+  }
+
+  // Undo the last allocation in the buffer, which is required to be of the
+  // "obj" of the given "word_sz".
+  void undo_allocation(HeapWord* obj, size_t word_sz) {
+    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
+    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
+    _top = obj;
+  }
+
+  // The total (word) size of the buffer, including both allocated and
+  // unallocted space.
+  size_t word_sz() { return _word_sz; }
+
+  // Should only be done if we are about to reset with a new buffer of the
+  // given size.
+  void set_word_size(size_t new_word_sz) {
+    assert(new_word_sz > AlignmentReserve, "Too small");
+    _word_sz = new_word_sz;
+  }
+
+  // The number of words of unallocated space remaining in the buffer.
+  size_t words_remaining() {
+    assert(_end >= _top, "Negative buffer");
+    return pointer_delta(_end, _top, HeapWordSize);
+  }
+
+  bool contains(void* addr) {
+    return (void*)_bottom <= addr && addr < (void*)_hard_end;
+  }
+
+  // Sets the space of the buffer to be [buf, space+word_sz()).
+  void set_buf(HeapWord* buf) {
+    _bottom   = buf;
+    _top      = _bottom;
+    _hard_end = _bottom + word_sz();
+    _end      = _hard_end - AlignmentReserve;
+    assert(_end >= _top, "Negative buffer");
+    // In support of ergonomic sizing
+    _allocated += word_sz();
+  }
+
+  // Flush the stats supporting ergonomic sizing of PLAB's
+  void flush_stats(PLABStats* stats);
+  void flush_stats_and_retire(PLABStats* stats, bool end_of_gc, bool retain) {
+    // We flush the stats first in order to get a reading of
+    // unused space in the last buffer.
+    if (ResizePLAB) {
+      flush_stats(stats);
+    }
+    // Retire the last allocation buffer.
+    retire(end_of_gc, retain);
+  }
+
+  // Force future allocations to fail and queries for contains()
+  // to return false
+  void invalidate() {
+    assert(!_retained, "Shouldn't retain an invalidated buffer.");
+    _end    = _hard_end;
+    _wasted += pointer_delta(_end, _top);  // unused  space
+    _top    = _end;      // force future allocations to fail
+    _bottom = _end;      // force future contains() queries to return false
+  }
+
+  // Fills in the unallocated portion of the buffer with a garbage object.
+  // If "end_of_gc" is TRUE, is after the last use in the GC.  IF "retain"
+  // is true, attempt to re-use the unused portion in the next GC.
+  void retire(bool end_of_gc, bool retain);
+
+  void print() PRODUCT_RETURN;
+};
+
+// PLAB stats book-keeping
+class PLABStats VALUE_OBJ_CLASS_SPEC {
+  size_t _allocated;      // total allocated
+  size_t _wasted;         // of which wasted (internal fragmentation)
+  size_t _unused;         // Unused in last buffer
+  size_t _used;           // derived = allocated - wasted - unused
+  size_t _desired_plab_sz;// output of filter (below), suitably trimmed and quantized
+  AdaptiveWeightedAverage
+         _filter;         // integrator with decay
+
+ public:
+  PLABStats(size_t desired_plab_sz_, unsigned wt) :
+    _allocated(0),
+    _wasted(0),
+    _unused(0),
+    _used(0),
+    _desired_plab_sz(desired_plab_sz_),
+    _filter(wt)
+  {
+    size_t min_sz = min_size();
+    size_t max_sz = max_size();
+    size_t aligned_min_sz = align_object_size(min_sz);
+    size_t aligned_max_sz = align_object_size(max_sz);
+    assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz &&
+           min_sz <= max_sz,
+           "PLAB clipping computation in adjust_desired_plab_sz()"
+           " may be incorrect");
+  }
+
+  static const size_t min_size() {
+    return ParGCAllocBuffer::min_size();
+  }
+
+  static const size_t max_size() {
+    return ParGCAllocBuffer::max_size();
+  }
+
+  size_t desired_plab_sz() {
+    return _desired_plab_sz;
+  }
+
+  void adjust_desired_plab_sz(); // filter computation, latches output to
+                                 // _desired_plab_sz, clears sensor accumulators
+
+  void add_allocated(size_t v) {
+    Atomic::add_ptr(v, &_allocated);
+  }
+
+  void add_unused(size_t v) {
+    Atomic::add_ptr(v, &_unused);
+  }
+
+  void add_wasted(size_t v) {
+    Atomic::add_ptr(v, &_wasted);
+  }
+};
+
+class ParGCAllocBufferWithBOT: public ParGCAllocBuffer {
+  BlockOffsetArrayContigSpace _bt;
+  BlockOffsetSharedArray*     _bsa;
+  HeapWord*                   _true_end;  // end of the whole ParGCAllocBuffer
+
+  static const size_t ChunkSizeInWords;
+  static const size_t ChunkSizeInBytes;
+  HeapWord* allocate_slow(size_t word_sz);
+
+  void fill_region_with_block(MemRegion mr, bool contig);
+
+public:
+  ParGCAllocBufferWithBOT(size_t word_sz, BlockOffsetSharedArray* bsa);
+
+  HeapWord* allocate(size_t word_sz) {
+    HeapWord* res = ParGCAllocBuffer::allocate(word_sz);
+    if (res != NULL) {
+      _bt.alloc_block(res, word_sz);
+    } else {
+      res = allocate_slow(word_sz);
+    }
+    return res;
+  }
+
+  void undo_allocation(HeapWord* obj, size_t word_sz);
+
+  void set_buf(HeapWord* buf_start) {
+    ParGCAllocBuffer::set_buf(buf_start);
+    _true_end = _hard_end;
+    _bt.set_region(MemRegion(buf_start, word_sz()));
+    _bt.initialize_threshold();
+  }
+
+  void retire(bool end_of_gc, bool retain);
+
+  MemRegion range() {
+    return MemRegion(_top, _true_end);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
--- a/src/share/vm/gc_implementation/shared/spaceCounters.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/spaceCounters.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -39,7 +39,7 @@
     const char* cns = PerfDataManager::name_space(gc->name_space(), "space",
                                                   ordinal);
 
-    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
+    _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
     strcpy(_name_space, cns);
 
     const char* cname = PerfDataManager::counter_name(_name_space, "name");
--- a/src/share/vm/gc_implementation/shared/spaceCounters.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/spaceCounters.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,7 @@
 // A SpaceCounter is a holder class for performance counters
 // that track a space;
 
-class SpaceCounters: public CHeapObj {
+class SpaceCounters: public CHeapObj<mtGC> {
   friend class VMStructs;
 
  private:
@@ -55,7 +55,7 @@
                 MutableSpace* m, GenerationCounters* gc);
 
   ~SpaceCounters() {
-    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
+    if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space, mtGC);
   }
 
   inline void update_capacity() {
--- a/src/share/vm/gc_implementation/shared/spaceDecorator.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_implementation/shared/spaceDecorator.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -70,7 +70,7 @@
 // These subclasses abstract the differences in the types of spaces used
 // by each heap.
 
-class SpaceMangler: public CHeapObj {
+class SpaceMangler: public CHeapObj<mtGC> {
   friend class VMStructs;
 
   // High water mark for allocations.  Typically, the space above
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -333,7 +333,7 @@
 
   // Set the length first for concurrent GC.
   ((arrayOop)start)->set_length((int)len);
-  post_allocation_setup_common(Universe::intArrayKlassObj(), start, words);
+  post_allocation_setup_common(Universe::intArrayKlassObj(), start);
   DEBUG_ONLY(zap_filler_array(start, words, zap);)
 }
 
@@ -346,8 +346,7 @@
     fill_with_array(start, words, zap);
   } else if (words > 0) {
     assert(words == min_fill_size(), "unaligned size");
-    post_allocation_setup_common(SystemDictionary::Object_klass(), start,
-                                 words);
+    post_allocation_setup_common(SystemDictionary::Object_klass(), start);
   }
 }
 
@@ -477,7 +476,7 @@
     assert(ScavengeRootsInCode > 0, "must be");
     obj = common_mem_allocate_init(size, CHECK_NULL);
   }
-  post_allocation_setup_common(klass, obj, size);
+  post_allocation_setup_common(klass, obj);
   assert(Universe::is_bootstrapping() ||
          !((oop)obj)->blueprint()->oop_is_array(), "must not be an array");
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -74,7 +74,7 @@
 //     G1CollectedHeap
 //   ParallelScavengeHeap
 //
-class CollectedHeap : public CHeapObj {
+class CollectedHeap : public CHeapObj<mtInternal> {
   friend class VMStructs;
   friend class IsGCActiveMark; // Block structured external access to _is_gc_active
   friend class constantPoolCacheKlass; // allocate() method inserts is_conc_safe
@@ -149,18 +149,14 @@
   inline static HeapWord* common_permanent_mem_allocate_init(size_t size, TRAPS);
 
   // Helper functions for (VM) allocation.
-  inline static void post_allocation_setup_common(KlassHandle klass,
-                                                  HeapWord* obj, size_t size);
+  inline static void post_allocation_setup_common(KlassHandle klass, HeapWord* obj);
   inline static void post_allocation_setup_no_klass_install(KlassHandle klass,
-                                                            HeapWord* objPtr,
-                                                            size_t size);
+                                                            HeapWord* objPtr);
 
-  inline static void post_allocation_setup_obj(KlassHandle klass,
-                                               HeapWord* obj, size_t size);
+  inline static void post_allocation_setup_obj(KlassHandle klass, HeapWord* obj);
 
   inline static void post_allocation_setup_array(KlassHandle klass,
-                                                 HeapWord* obj, size_t size,
-                                                 int length);
+                                                 HeapWord* obj, int length);
 
   // Clears an allocated object.
   inline static void init_obj(HeapWord* obj, size_t size);
@@ -368,9 +364,7 @@
   inline static oop permanent_obj_allocate_no_klass_install(KlassHandle klass,
                                                             int size,
                                                             TRAPS);
-  inline static void post_allocation_install_obj_klass(KlassHandle klass,
-                                                       oop obj,
-                                                       int size);
+  inline static void post_allocation_install_obj_klass(KlassHandle klass, oop obj);
   inline static oop permanent_array_allocate(KlassHandle klass, int size, int length, TRAPS);
 
   // Raw memory allocation facilities
@@ -664,11 +658,8 @@
     }
   }
 
-  // Allocate GCHeapLog during VM startup
-  static void initialize_heap_log();
-
   // Heap verification
-  virtual void verify(bool allow_dirty, bool silent, VerifyOption option) = 0;
+  virtual void verify(bool silent, VerifyOption option) = 0;
 
   // Non product verification and debugging.
 #ifndef PRODUCT
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,15 +50,13 @@
 // Inline allocation implementations.
 
 void CollectedHeap::post_allocation_setup_common(KlassHandle klass,
-                                                 HeapWord* obj,
-                                                 size_t size) {
-  post_allocation_setup_no_klass_install(klass, obj, size);
-  post_allocation_install_obj_klass(klass, oop(obj), (int) size);
+                                                 HeapWord* obj) {
+  post_allocation_setup_no_klass_install(klass, obj);
+  post_allocation_install_obj_klass(klass, oop(obj));
 }
 
 void CollectedHeap::post_allocation_setup_no_klass_install(KlassHandle klass,
-                                                           HeapWord* objPtr,
-                                                           size_t size) {
+                                                           HeapWord* objPtr) {
   oop obj = (oop)objPtr;
 
   assert(obj != NULL, "NULL object pointer");
@@ -71,8 +69,7 @@
 }
 
 void CollectedHeap::post_allocation_install_obj_klass(KlassHandle klass,
-                                                   oop obj,
-                                                   int size) {
+                                                   oop obj) {
   // These asserts are kind of complicated because of klassKlass
   // and the beginning of the world.
   assert(klass() != NULL || !Universe::is_fully_initialized(), "NULL klass");
@@ -101,9 +98,8 @@
 }
 
 void CollectedHeap::post_allocation_setup_obj(KlassHandle klass,
-                                              HeapWord* obj,
-                                              size_t size) {
-  post_allocation_setup_common(klass, obj, size);
+                                              HeapWord* obj) {
+  post_allocation_setup_common(klass, obj);
   assert(Universe::is_bootstrapping() ||
          !((oop)obj)->blueprint()->oop_is_array(), "must not be an array");
   // notify jvmti and dtrace
@@ -112,14 +108,13 @@
 
 void CollectedHeap::post_allocation_setup_array(KlassHandle klass,
                                                 HeapWord* obj,
-                                                size_t size,
                                                 int length) {
   // Set array length before setting the _klass field
   // in post_allocation_setup_common() because the klass field
   // indicates that the object is parsable by concurrent GC.
   assert(length >= 0, "length should be non-negative");
   ((arrayOop)obj)->set_length(length);
-  post_allocation_setup_common(klass, obj, size);
+  post_allocation_setup_common(klass, obj);
   assert(((oop)obj)->blueprint()->oop_is_array(), "must be an array");
   // notify jvmti and dtrace (must be after length is set for dtrace)
   post_allocation_notify(klass, (oop)obj);
@@ -256,7 +251,7 @@
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
   HeapWord* obj = common_mem_allocate_init(size, CHECK_NULL);
-  post_allocation_setup_obj(klass, obj, size);
+  post_allocation_setup_obj(klass, obj);
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
   return (oop)obj;
 }
@@ -269,7 +264,7 @@
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
   HeapWord* obj = common_mem_allocate_init(size, CHECK_NULL);
-  post_allocation_setup_array(klass, obj, size, length);
+  post_allocation_setup_array(klass, obj, length);
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
   return (oop)obj;
 }
@@ -283,7 +278,7 @@
   assert(size >= 0, "int won't convert to size_t");
   HeapWord* obj = common_mem_allocate_noinit(size, CHECK_NULL);
   ((oop)obj)->set_klass_gap(0);
-  post_allocation_setup_array(klass, obj, size, length);
+  post_allocation_setup_array(klass, obj, length);
 #ifndef PRODUCT
   const size_t hs = oopDesc::header_size()+1;
   Universe::heap()->check_for_non_bad_heap_word_value(obj+hs, size-hs);
@@ -293,7 +288,7 @@
 
 oop CollectedHeap::permanent_obj_allocate(KlassHandle klass, int size, TRAPS) {
   oop obj = permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
-  post_allocation_install_obj_klass(klass, obj, size);
+  post_allocation_install_obj_klass(klass, obj);
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value((HeapWord*) obj,
                                                               size));
   return obj;
@@ -306,7 +301,7 @@
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
   HeapWord* obj = common_permanent_mem_allocate_init(size, CHECK_NULL);
-  post_allocation_setup_no_klass_install(klass, obj, size);
+  post_allocation_setup_no_klass_install(klass, obj);
 #ifndef PRODUCT
   const size_t hs = oopDesc::header_size();
   Universe::heap()->check_for_bad_heap_word_value(obj+hs, size-hs);
@@ -322,7 +317,7 @@
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
   HeapWord* obj = common_permanent_mem_allocate_init(size, CHECK_NULL);
-  post_allocation_setup_array(klass, obj, size, length);
+  post_allocation_setup_array(klass, obj, length);
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
   return (oop)obj;
 }
--- a/src/share/vm/gc_interface/gcCause.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/gc_interface/gcCause.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -88,4 +88,36 @@
   static const char* to_string(GCCause::Cause cause);
 };
 
+// Helper class for doing logging that includes the GC Cause
+// as a string.
+class GCCauseString : StackObj {
+ private:
+   static const int _length = 128;
+   char _buffer[_length];
+   int _position;
+
+ public:
+   GCCauseString(const char* prefix, GCCause::Cause cause) {
+     if (PrintGCCause) {
+      _position = jio_snprintf(_buffer, _length, "%s (%s)", prefix, GCCause::to_string(cause));
+     } else {
+      _position = jio_snprintf(_buffer, _length, "%s", prefix);
+     }
+     assert(_position >= 0 && _position <= _length,
+       err_msg("Need to increase the buffer size in GCCauseString? %d", _position));
+   }
+
+   GCCauseString& append(const char* str) {
+     int res = jio_snprintf(_buffer + _position, _length - _position, "%s", str);
+     _position += res;
+     assert(res >= 0 && _position <= _length,
+       err_msg("Need to increase the buffer size in GCCauseString? %d", res));
+     return *this;
+   }
+
+   operator const char*() {
+     return _buffer;
+   }
+};
+
 #endif // SHARE_VM_GC_INTERFACE_GCCAUSE_HPP
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -99,7 +99,10 @@
     empty,                                                      // empty method (code: _return)
     accessor,                                                   // accessor method (code: _aload_0, _getfield, _(a|i)return)
     abstract,                                                   // abstract method (throws an AbstractMethodException)
-    method_handle,                                              // java.lang.invoke.MethodHandles::invoke
+    method_handle_invoke_FIRST,                                 // java.lang.invoke.MethodHandles::invokeExact, etc.
+    method_handle_invoke_LAST                                   = (method_handle_invoke_FIRST
+                                                                   + (vmIntrinsics::LAST_MH_SIG_POLY
+                                                                      - vmIntrinsics::FIRST_MH_SIG_POLY)),
     java_lang_math_sin,                                         // implementation of java.lang.Math.sin   (x)
     java_lang_math_cos,                                         // implementation of java.lang.Math.cos   (x)
     java_lang_math_tan,                                         // implementation of java.lang.Math.tan   (x)
@@ -107,11 +110,21 @@
     java_lang_math_sqrt,                                        // implementation of java.lang.Math.sqrt  (x)
     java_lang_math_log,                                         // implementation of java.lang.Math.log   (x)
     java_lang_math_log10,                                       // implementation of java.lang.Math.log10 (x)
+    java_lang_math_pow,                                         // implementation of java.lang.Math.pow   (x,y)
+    java_lang_math_exp,                                         // implementation of java.lang.Math.exp   (x)
     java_lang_ref_reference_get,                                // implementation of java.lang.ref.Reference.get()
     number_of_method_entries,
     invalid = -1
   };
 
+  // Conversion from the part of the above enum to vmIntrinsics::_invokeExact, etc.
+  static vmIntrinsics::ID method_handle_intrinsic(MethodKind kind) {
+    if (kind >= method_handle_invoke_FIRST && kind <= method_handle_invoke_LAST)
+      return (vmIntrinsics::ID)( vmIntrinsics::FIRST_MH_SIG_POLY + (kind - method_handle_invoke_FIRST) );
+    else
+      return vmIntrinsics::_none;
+  }
+
   enum SomeConstants {
     number_of_result_handlers = 10                              // number of result handlers for native calls
   };
@@ -146,6 +159,9 @@
   static address    entry_for_kind(MethodKind k)                { assert(0 <= k && k < number_of_method_entries, "illegal kind"); return _entry_table[k]; }
   static address    entry_for_method(methodHandle m)            { return entry_for_kind(method_kind(m)); }
 
+  // used for bootstrapping method handles:
+  static void       set_entry_for_kind(MethodKind k, address e);
+
   static void       print_method_kind(MethodKind kind)          PRODUCT_RETURN;
 
   static bool       can_be_compiled(methodHandle m);
--- a/src/share/vm/interpreter/bytecode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/bytecode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -120,19 +120,22 @@
 
 void Bytecode_invoke::verify() const {
   assert(is_valid(), "check invoke");
-  assert(method()->constants()->cache() != NULL, "do not call this from verifier or rewriter");
+  assert(cpcache() != NULL, "do not call this from verifier or rewriter");
+}
+
+
+Symbol* Bytecode_member_ref::klass() const {
+  return constants()->klass_ref_at_noresolve(index());
+}
+
+
+Symbol* Bytecode_member_ref::name() const {
+  return constants()->name_ref_at(index());
 }
 
 
 Symbol* Bytecode_member_ref::signature() const {
-  constantPoolOop constants = method()->constants();
-  return constants->signature_ref_at(index());
-}
-
-
-Symbol* Bytecode_member_ref::name() const {
-  constantPoolOop constants = method()->constants();
-  return constants->name_ref_at(index());
+  return constants()->signature_ref_at(index());
 }
 
 
@@ -146,18 +149,19 @@
 methodHandle Bytecode_invoke::static_target(TRAPS) {
   methodHandle m;
   KlassHandle resolved_klass;
-  constantPoolHandle constants(THREAD, _method->constants());
+  constantPoolHandle constants(THREAD, this->constants());
 
-  if (java_code() == Bytecodes::_invokedynamic) {
-    LinkResolver::resolve_dynamic_method(m, resolved_klass, constants, index(), CHECK_(methodHandle()));
-  } else if (java_code() != Bytecodes::_invokeinterface) {
-    LinkResolver::resolve_method(m, resolved_klass, constants, index(), CHECK_(methodHandle()));
-  } else {
-    LinkResolver::resolve_interface_method(m, resolved_klass, constants, index(), CHECK_(methodHandle()));
-  }
+  Bytecodes::Code bc = invoke_code();
+  LinkResolver::resolve_method_statically(m, resolved_klass, bc, constants, index(), CHECK_(methodHandle()));
   return m;
 }
 
+Handle Bytecode_invoke::appendix(TRAPS) {
+  ConstantPoolCacheEntry* cpce = cpcache_entry();
+  if (cpce->has_appendix())
+    return Handle(THREAD, cpce->f1_appendix());
+  return Handle();  // usual case
+}
 
 int Bytecode_member_ref::index() const {
   // Note:  Rewriter::rewrite changes the Java_u2 of an invokedynamic to a native_u4,
@@ -170,12 +174,16 @@
 }
 
 int Bytecode_member_ref::pool_index() const {
+  return cpcache_entry()->constant_pool_index();
+}
+
+ConstantPoolCacheEntry* Bytecode_member_ref::cpcache_entry() const {
   int index = this->index();
   DEBUG_ONLY({
       if (!has_index_u4(code()))
-        index -= constantPoolOopDesc::CPCACHE_INDEX_TAG;
+        index = constantPoolOopDesc::get_cpcache_index(index);
     });
-  return _method->constants()->cache()->entry_at(index)->constant_pool_index();
+  return cpcache()->entry_at(index);
 }
 
 // Implementation of Bytecode_field
--- a/src/share/vm/interpreter/bytecode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/bytecode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -80,6 +80,7 @@
 
   Bytecodes::Code code() const                   { return _code; }
   Bytecodes::Code java_code() const              { return Bytecodes::java_code(code()); }
+  Bytecodes::Code invoke_code() const            { return (code() == Bytecodes::_invokehandle) ? code() : java_code(); }
 
   // Static functions for parsing bytecodes in place.
   int get_index_u1(Bytecodes::Code bc) const {
@@ -195,10 +196,14 @@
   Bytecode_member_ref(methodHandle method, int bci)  : Bytecode(method(), method()->bcp_from(bci)), _method(method) {}
 
   methodHandle method() const                    { return _method; }
+  constantPoolOop constants() const              { return _method->constants(); }
+  constantPoolCacheOop cpcache() const           { return _method->constants()->cache(); }
+  ConstantPoolCacheEntry* cpcache_entry() const;
 
  public:
   int          index() const;                    // cache index (loaded from instruction)
   int          pool_index() const;               // constant pool index
+  Symbol*      klass() const;                    // returns the klass of the method or field
   Symbol*      name() const;                     // returns the name of the method or field
   Symbol*      signature() const;                // returns the signature of the method or field
 
@@ -218,13 +223,15 @@
 
   // Attributes
   methodHandle static_target(TRAPS);             // "specified" method   (from constant pool)
+  Handle       appendix(TRAPS);                  // if CPCE::has_appendix (from constant pool)
 
   // Testers
-  bool is_invokeinterface() const                { return java_code() == Bytecodes::_invokeinterface; }
-  bool is_invokevirtual() const                  { return java_code() == Bytecodes::_invokevirtual; }
-  bool is_invokestatic() const                   { return java_code() == Bytecodes::_invokestatic; }
-  bool is_invokespecial() const                  { return java_code() == Bytecodes::_invokespecial; }
-  bool is_invokedynamic() const                  { return java_code() == Bytecodes::_invokedynamic; }
+  bool is_invokeinterface() const                { return invoke_code() == Bytecodes::_invokeinterface; }
+  bool is_invokevirtual() const                  { return invoke_code() == Bytecodes::_invokevirtual; }
+  bool is_invokestatic() const                   { return invoke_code() == Bytecodes::_invokestatic; }
+  bool is_invokespecial() const                  { return invoke_code() == Bytecodes::_invokespecial; }
+  bool is_invokedynamic() const                  { return invoke_code() == Bytecodes::_invokedynamic; }
+  bool is_invokehandle() const                   { return invoke_code() == Bytecodes::_invokehandle; }
 
   bool has_receiver() const                      { return !is_invokestatic() && !is_invokedynamic(); }
 
@@ -232,15 +239,12 @@
                                                           is_invokevirtual()   ||
                                                           is_invokestatic()    ||
                                                           is_invokespecial()   ||
-                                                          is_invokedynamic(); }
+                                                          is_invokedynamic()   ||
+                                                          is_invokehandle(); }
 
-  bool is_method_handle_invoke() const {
-    return (is_invokedynamic() ||
-            (is_invokevirtual() &&
-             method()->constants()->klass_ref_at_noresolve(index()) == vmSymbols::java_lang_invoke_MethodHandle() &&
-             methodOopDesc::is_method_handle_invoke_name(name())));
-  }
+  bool has_appendix()                            { return cpcache_entry()->has_appendix(); }
 
+ private:
   // Helper to skip verification.   Used is_valid() to check if the result is really an invoke
   inline friend Bytecode_invoke Bytecode_invoke_check(methodHandle method, int bci);
 };
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1774,7 +1774,7 @@
 
           oop obj;
           if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) {
-            obj = (oop) cache->f1();
+            obj = (oop) cache->f1_as_instance();
             MORE_STACK(1);  // Assume single slot push
           } else {
             obj = (oop) STACK_OBJECT(-1);
@@ -1785,7 +1785,7 @@
           // Now store the result on the stack
           //
           TosState tos_type = cache->flag_state();
-          int field_offset = cache->f2();
+          int field_offset = cache->f2_as_index();
           if (cache->is_volatile()) {
             if (tos_type == atos) {
               VERIFY_OOP(obj->obj_field_acquire(field_offset));
@@ -1885,7 +1885,7 @@
             --count;
           }
           if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) {
-            obj = (oop) cache->f1();
+            obj = (oop) cache->f1_as_instance();
           } else {
             --count;
             obj = (oop) STACK_OBJECT(count);
@@ -1895,7 +1895,7 @@
           //
           // Now store the result
           //
-          int field_offset = cache->f2();
+          int field_offset = cache->f2_as_index();
           if (cache->is_volatile()) {
             if (tos_type == itos) {
               obj->release_int_field_put(field_offset, STACK_INT(-1));
@@ -2177,13 +2177,15 @@
         // This kind of CP cache entry does not need to match the flags byte, because
         // there is a 1-1 relation between bytecode type and CP entry type.
         ConstantPoolCacheEntry* cache = cp->entry_at(index);
-        if (cache->is_f1_null()) {
+        oop result = cache->f1_as_instance();
+        if (result == NULL) {
           CALL_VM(InterpreterRuntime::resolve_ldc(THREAD, (Bytecodes::Code) opcode),
                   handle_exception);
+          result = cache->f1_as_instance();
         }
 
-        VERIFY_OOP(cache->f1());
-        SET_STACK_OBJECT(cache->f1(), 0);
+        VERIFY_OOP(result);
+        SET_STACK_OBJECT(result, 0);
         UPDATE_PC_AND_TOS_AND_CONTINUE(incr, 1);
       }
 
@@ -2204,13 +2206,15 @@
         // there is a 1-1 relation between bytecode type and CP entry type.
         assert(constantPoolCacheOopDesc::is_secondary_index(index), "incorrect format");
         ConstantPoolCacheEntry* cache = cp->secondary_entry_at(index);
-        if (cache->is_f1_null()) {
+        oop result = cache->f1_as_instance();
+        if (result == NULL) {
           CALL_VM(InterpreterRuntime::resolve_invokedynamic(THREAD),
                   handle_exception);
+          result = cache->f1_as_instance();
         }
 
-        VERIFY_OOP(cache->f1());
-        oop method_handle = java_lang_invoke_CallSite::target(cache->f1());
+        VERIFY_OOP(result);
+        oop method_handle = java_lang_invoke_CallSite::target(result);
         CHECK_NULL(method_handle);
 
         istate->set_msg(call_method_handle);
@@ -2239,11 +2243,11 @@
         // java.lang.Object.  See cpCacheOop.cpp for details.
         // This code isn't produced by javac, but could be produced by
         // another compliant java compiler.
-        if (cache->is_methodInterface()) {
+        if (cache->is_forced_virtual()) {
           methodOop callee;
           CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
           if (cache->is_vfinal()) {
-            callee = (methodOop) cache->f2();
+            callee = cache->f2_as_vfinal_method();
           } else {
             // get receiver
             int parms = cache->parameter_size();
@@ -2251,7 +2255,7 @@
             VERIFY_OOP(STACK_OBJECT(-parms));
             instanceKlass* rcvrKlass = (instanceKlass*)
                                  STACK_OBJECT(-parms)->klass()->klass_part();
-            callee = (methodOop) rcvrKlass->start_of_vtable()[ cache->f2()];
+            callee = (methodOop) rcvrKlass->start_of_vtable()[ cache->f2_as_index()];
           }
           istate->set_callee(callee);
           istate->set_callee_entry_point(callee->from_interpreted_entry());
@@ -2266,7 +2270,7 @@
 
         // this could definitely be cleaned up QQQ
         methodOop callee;
-        klassOop iclass = (klassOop)cache->f1();
+        klassOop iclass = cache->f1_as_klass();
         // instanceKlass* interface = (instanceKlass*) iclass->klass_part();
         // get receiver
         int parms = cache->parameter_size();
@@ -2284,7 +2288,7 @@
         if (i == int2->itable_length()) {
           VM_JAVA_ERROR(vmSymbols::java_lang_IncompatibleClassChangeError(), "");
         }
-        int mindex = cache->f2();
+        int mindex = cache->f2_as_index();
         itableMethodEntry* im = ki->first_method_entry(rcvr->klass());
         callee = im[mindex].method();
         if (callee == NULL) {
@@ -2322,12 +2326,12 @@
           methodOop callee;
           if ((Bytecodes::Code)opcode == Bytecodes::_invokevirtual) {
             CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
-            if (cache->is_vfinal()) callee = (methodOop) cache->f2();
+            if (cache->is_vfinal()) callee = cache->f2_as_vfinal_method();
             else {
               // get receiver
               int parms = cache->parameter_size();
               // this works but needs a resourcemark and seems to create a vtable on every call:
-              // methodOop callee = rcvr->klass()->klass_part()->vtable()->method_at(cache->f2());
+              // methodOop callee = rcvr->klass()->klass_part()->vtable()->method_at(cache->f2_as_index());
               //
               // this fails with an assert
               // instanceKlass* rcvrKlass = instanceKlass::cast(STACK_OBJECT(-parms)->klass());
@@ -2350,13 +2354,13 @@
                   However it seems to have a vtable in the right location. Huh?
 
               */
-              callee = (methodOop) rcvrKlass->start_of_vtable()[ cache->f2()];
+              callee = (methodOop) rcvrKlass->start_of_vtable()[ cache->f2_as_index()];
             }
           } else {
             if ((Bytecodes::Code)opcode == Bytecodes::_invokespecial) {
               CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
             }
-            callee = (methodOop) cache->f1();
+            callee = cache->f1_as_method();
           }
 
           istate->set_callee(callee);
--- a/src/share/vm/interpreter/bytecodes.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/bytecodes.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -534,6 +534,8 @@
 
   def(_return_register_finalizer , "return_register_finalizer" , "b"    , NULL    , T_VOID   ,  0, true, _return);
 
+  def(_invokehandle        , "invokehandle"        , "bJJ"  , NULL    , T_ILLEGAL, -1, true, _invokevirtual   );
+
   def(_fast_aldc           , "fast_aldc"           , "bj"   , NULL    , T_OBJECT,   1, true,  _ldc   );
   def(_fast_aldc_w         , "fast_aldc_w"         , "bJJ"  , NULL    , T_OBJECT,   1, true,  _ldc_w );
 
--- a/src/share/vm/interpreter/bytecodes.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/bytecodes.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -282,6 +282,9 @@
 
     _return_register_finalizer    ,
 
+    // special handling of signature-polymorphic methods:
+    _invokehandle         ,
+
     _shouldnotreachhere,      // For debugging
 
     // Platform specific JVM bytecodes
@@ -356,8 +359,8 @@
 
  public:
   // Conversion
-  static void        check          (Code code)    { assert(is_defined(code), "illegal code"); }
-  static void        wide_check     (Code code)    { assert(wide_is_defined(code), "illegal code"); }
+  static void        check          (Code code)    { assert(is_defined(code),      err_msg("illegal code: %d", (int)code)); }
+  static void        wide_check     (Code code)    { assert(wide_is_defined(code), err_msg("illegal code: %d", (int)code)); }
   static Code        cast           (int  code)    { return (Code)code; }
 
 
@@ -421,6 +424,8 @@
                                                            || code == _fconst_0 || code == _dconst_0); }
   static bool        is_invoke      (Code code)    { return (_invokevirtual <= code && code <= _invokedynamic); }
 
+  static bool        has_optional_appendix(Code code) { return code == _invokedynamic || code == _invokehandle; }
+
   static int         compute_flags  (const char* format, int more_flags = 0);  // compute the flags
   static int         flags          (int code, bool is_wide) {
     assert(code == (u_char)code, "must be a byte");
--- a/src/share/vm/interpreter/interpreter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/interpreter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -37,6 +37,7 @@
 #include "oops/oop.inline.hpp"
 #include "prims/forte.hpp"
 #include "prims/jvmtiExport.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
@@ -180,14 +181,21 @@
   // Abstract method?
   if (m->is_abstract()) return abstract;
 
-  // Invoker for method handles?
-  if (m->is_method_handle_invoke())  return method_handle;
+  // Method handle primitive?
+  if (m->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID id = m->intrinsic_id();
+    assert(MethodHandles::is_signature_polymorphic(id), "must match an intrinsic");
+    MethodKind kind = (MethodKind)( method_handle_invoke_FIRST +
+                                    ((int)id - vmIntrinsics::FIRST_MH_SIG_POLY) );
+    assert(kind <= method_handle_invoke_LAST, "parallel enum ranges");
+    return kind;
+  }
 
   // Native method?
   // Note: This test must come _before_ the test for intrinsic
   //       methods. See also comments below.
   if (m->is_native()) {
-    assert(!m->is_method_handle_invoke(), "overlapping bits here, watch out");
+    assert(!m->is_method_handle_intrinsic(), "overlapping bits here, watch out");
     return m->is_synchronized() ? native_synchronized : native;
   }
 
@@ -221,6 +229,8 @@
     case vmIntrinsics::_dsqrt : return java_lang_math_sqrt ;
     case vmIntrinsics::_dlog  : return java_lang_math_log  ;
     case vmIntrinsics::_dlog10: return java_lang_math_log10;
+    case vmIntrinsics::_dpow  : return java_lang_math_pow  ;
+    case vmIntrinsics::_dexp  : return java_lang_math_exp  ;
 
     case vmIntrinsics::_Reference_get:
                                 return java_lang_ref_reference_get;
@@ -237,6 +247,14 @@
 }
 
 
+void AbstractInterpreter::set_entry_for_kind(AbstractInterpreter::MethodKind kind, address entry) {
+  assert(kind >= method_handle_invoke_FIRST &&
+         kind <= method_handle_invoke_LAST, "late initialization only for MH entry points");
+  assert(_entry_table[kind] == _entry_table[abstract], "previous value must be AME entry");
+  _entry_table[kind] = entry;
+}
+
+
 // Return true if the interpreter can prove that the given bytecode has
 // not yet been executed (in Java semantics, not in actual operation).
 bool AbstractInterpreter::is_not_reached(methodHandle method, int bci) {
@@ -268,7 +286,6 @@
     case empty                  : tty->print("empty"                  ); break;
     case accessor               : tty->print("accessor"               ); break;
     case abstract               : tty->print("abstract"               ); break;
-    case method_handle          : tty->print("method_handle"          ); break;
     case java_lang_math_sin     : tty->print("java_lang_math_sin"     ); break;
     case java_lang_math_cos     : tty->print("java_lang_math_cos"     ); break;
     case java_lang_math_tan     : tty->print("java_lang_math_tan"     ); break;
@@ -276,7 +293,16 @@
     case java_lang_math_sqrt    : tty->print("java_lang_math_sqrt"    ); break;
     case java_lang_math_log     : tty->print("java_lang_math_log"     ); break;
     case java_lang_math_log10   : tty->print("java_lang_math_log10"   ); break;
-    default                     : ShouldNotReachHere();
+    default:
+      if (kind >= method_handle_invoke_FIRST &&
+          kind <= method_handle_invoke_LAST) {
+        const char* kind_name = vmIntrinsics::name_at(method_handle_intrinsic(kind));
+        if (kind_name[0] == '_')  kind_name = &kind_name[1];  // '_invokeExact' => 'invokeExact'
+        tty->print("method_handle_%s", kind_name);
+        break;
+      }
+      ShouldNotReachHere();
+      break;
   }
 }
 #endif // PRODUCT
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -145,7 +145,7 @@
     // The bytecode wrappers aren't GC-safe so construct a new one
     Bytecode_loadconstant ldc2(m, bci(thread));
     ConstantPoolCacheEntry* cpce = m->constants()->cache()->entry_at(ldc2.cache_index());
-    assert(result == cpce->f1(), "expected result for assembly code");
+    assert(result == cpce->f1_as_instance(), "expected result for assembly code");
   }
 #endif
 }
@@ -375,7 +375,6 @@
   Handle             h_exception(thread, exception);
   methodHandle       h_method   (thread, method(thread));
   constantPoolHandle h_constants(thread, h_method->constants());
-  typeArrayHandle    h_extable  (thread, h_method->exception_table());
   bool               should_repeat;
   int                handler_bci;
   int                current_bci = bci(thread);
@@ -547,23 +546,6 @@
     }
   }
 
-  if (is_put && !is_static && klass->is_subclass_of(SystemDictionary::CallSite_klass()) && (info.name() == vmSymbols::target_name())) {
-    const jint direction = frame::interpreter_frame_expression_stack_direction();
-    Handle call_site    (THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at(-1 * direction)));
-    Handle method_handle(THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at( 0 * direction)));
-    assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-    assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
-
-    {
-      // Walk all nmethods depending on this call site.
-      MutexLocker mu(Compile_lock, thread);
-      Universe::flush_dependents_on(call_site, method_handle);
-    }
-
-    // Don't allow fast path for setting CallSite.target and sub-classes.
-    put_code = (Bytecodes::Code) 0;
-  }
-
   cache_entry(thread)->set_field(
     get_code,
     put_code,
@@ -674,7 +656,7 @@
   JvmtiExport::post_raw_breakpoint(thread, method, bcp);
 IRT_END
 
-IRT_ENTRY(void, InterpreterRuntime::resolve_invoke(JavaThread* thread, Bytecodes::Code bytecode))
+IRT_ENTRY(void, InterpreterRuntime::resolve_invoke(JavaThread* thread, Bytecodes::Code bytecode)) {
   // extract receiver from the outgoing argument list if necessary
   Handle receiver(thread, NULL);
   if (bytecode == Bytecodes::_invokevirtual || bytecode == Bytecodes::_invokeinterface) {
@@ -742,86 +724,54 @@
       info.resolved_method(),
       info.vtable_index());
   }
+}
+IRT_END
+
+
+// First time execution:  Resolve symbols, create a permanent MethodType object.
+IRT_ENTRY(void, InterpreterRuntime::resolve_invokehandle(JavaThread* thread)) {
+  assert(EnableInvokeDynamic, "");
+  const Bytecodes::Code bytecode = Bytecodes::_invokehandle;
+
+  // resolve method
+  CallInfo info;
+  constantPoolHandle pool(thread, method(thread)->constants());
+
+  {
+    JvmtiHideSingleStepping jhss(thread);
+    LinkResolver::resolve_invoke(info, Handle(), pool,
+                                 get_index_u2_cpcache(thread, bytecode), bytecode, CHECK);
+  } // end JvmtiHideSingleStepping
+
+  cache_entry(thread)->set_method_handle(
+      info.resolved_method(),
+      info.resolved_appendix());
+}
 IRT_END
 
 
 // First time execution:  Resolve symbols, create a permanent CallSite object.
 IRT_ENTRY(void, InterpreterRuntime::resolve_invokedynamic(JavaThread* thread)) {
-  ResourceMark rm(thread);
-
   assert(EnableInvokeDynamic, "");
-
   const Bytecodes::Code bytecode = Bytecodes::_invokedynamic;
 
-  methodHandle caller_method(thread, method(thread));
-
-  constantPoolHandle pool(thread, caller_method->constants());
-  pool->set_invokedynamic();    // mark header to flag active call sites
+  //TO DO: consider passing BCI to Java.
+  //  int caller_bci = method(thread)->bci_from(bcp(thread));
 
-  int caller_bci = 0;
-  int site_index = 0;
-  { address caller_bcp = bcp(thread);
-    caller_bci = caller_method->bci_from(caller_bcp);
-    site_index = Bytes::get_native_u4(caller_bcp+1);
-  }
-  assert(site_index == InterpreterRuntime::bytecode(thread).get_index_u4(bytecode), "");
-  assert(constantPoolCacheOopDesc::is_secondary_index(site_index), "proper format");
-  // there is a second CPC entries that is of interest; it caches signature info:
-  int main_index = pool->cache()->secondary_entry_at(site_index)->main_entry_index();
-  int pool_index = pool->cache()->entry_at(main_index)->constant_pool_index();
-
-  // first resolve the signature to a MH.invoke methodOop
-  if (!pool->cache()->entry_at(main_index)->is_resolved(bytecode)) {
-    JvmtiHideSingleStepping jhss(thread);
-    CallInfo callinfo;
-    LinkResolver::resolve_invoke(callinfo, Handle(), pool,
-                                 site_index, bytecode, CHECK);
-    // The main entry corresponds to a JVM_CONSTANT_InvokeDynamic, and serves
-    // as a common reference point for all invokedynamic call sites with
-    // that exact call descriptor.  We will link it in the CP cache exactly
-    // as if it were an invokevirtual of MethodHandle.invoke.
-    pool->cache()->entry_at(main_index)->set_method(
-      bytecode,
-      callinfo.resolved_method(),
-      callinfo.vtable_index());
-  }
+  // resolve method
+  CallInfo info;
+  constantPoolHandle pool(thread, method(thread)->constants());
+  int index = get_index_u4(thread, bytecode);
 
-  // The method (f2 entry) of the main entry is the MH.invoke for the
-  // invokedynamic target call signature.
-  oop f1_value = pool->cache()->entry_at(main_index)->f1();
-  methodHandle signature_invoker(THREAD, (methodOop) f1_value);
-  assert(signature_invoker.not_null() && signature_invoker->is_method() && signature_invoker->is_method_handle_invoke(),
-         "correct result from LinkResolver::resolve_invokedynamic");
-
-  Handle info;  // optional argument(s) in JVM_CONSTANT_InvokeDynamic
-  Handle bootm = SystemDictionary::find_bootstrap_method(caller_method, caller_bci,
-                                                         main_index, info, CHECK);
-  if (!java_lang_invoke_MethodHandle::is_instance(bootm())) {
-    THROW_MSG(vmSymbols::java_lang_IllegalStateException(),
-              "no bootstrap method found for invokedynamic");
-  }
+  {
+    JvmtiHideSingleStepping jhss(thread);
+    LinkResolver::resolve_invoke(info, Handle(), pool,
+                                 index, bytecode, CHECK);
+  } // end JvmtiHideSingleStepping
 
-  // Short circuit if CallSite has been bound already:
-  if (!pool->cache()->secondary_entry_at(site_index)->is_f1_null())
-    return;
-
-  Symbol*  call_site_name = pool->name_ref_at(site_index);
-
-  Handle call_site
-    = SystemDictionary::make_dynamic_call_site(bootm,
-                                               // Callee information:
-                                               call_site_name,
-                                               signature_invoker,
-                                               info,
-                                               // Caller information:
-                                               caller_method,
-                                               caller_bci,
-                                               CHECK);
-
-  // In the secondary entry, the f1 field is the call site, and the f2 (index)
-  // field is some data about the invoke site.  Currently, it is just the BCI.
-  // Later, it might be changed to help manage inlining dependencies.
-  pool->cache()->secondary_entry_at(site_index)->set_dynamic_call(call_site, signature_invoker);
+  pool->cache()->secondary_entry_at(index)->set_dynamic_call(
+      info.resolved_method(),
+      info.resolved_appendix());
 }
 IRT_END
 
@@ -993,7 +943,7 @@
 
   // check the access_flags for the field in the klass
 
-  instanceKlass* ik = instanceKlass::cast(java_lang_Class::as_klassOop(cp_entry->f1()));
+  instanceKlass* ik = instanceKlass::cast(java_lang_Class::as_klassOop(cp_entry->f1_as_klass_mirror()));
   int index = cp_entry->field_index();
   if ((ik->field_access_flags(index) & JVM_ACC_FIELD_ACCESS_WATCHED) == 0) return;
 
@@ -1016,15 +966,15 @@
     // non-static field accessors have an object, but we need a handle
     h_obj = Handle(thread, obj);
   }
-  instanceKlassHandle h_cp_entry_f1(thread, java_lang_Class::as_klassOop(cp_entry->f1()));
-  jfieldID fid = jfieldIDWorkaround::to_jfieldID(h_cp_entry_f1, cp_entry->f2(), is_static);
+  instanceKlassHandle h_cp_entry_f1(thread, java_lang_Class::as_klassOop(cp_entry->f1_as_klass_mirror()));
+  jfieldID fid = jfieldIDWorkaround::to_jfieldID(h_cp_entry_f1, cp_entry->f2_as_index(), is_static);
   JvmtiExport::post_field_access(thread, method(thread), bcp(thread), h_cp_entry_f1, h_obj, fid);
 IRT_END
 
 IRT_ENTRY(void, InterpreterRuntime::post_field_modification(JavaThread *thread,
   oopDesc* obj, ConstantPoolCacheEntry *cp_entry, jvalue *value))
 
-  klassOop k = java_lang_Class::as_klassOop(cp_entry->f1());
+  klassOop k = java_lang_Class::as_klassOop(cp_entry->f1_as_klass_mirror());
 
   // check the access_flags for the field in the klass
   instanceKlass* ik = instanceKlass::cast(k);
@@ -1049,7 +999,7 @@
 
   HandleMark hm(thread);
   instanceKlassHandle h_klass(thread, k);
-  jfieldID fid = jfieldIDWorkaround::to_jfieldID(h_klass, cp_entry->f2(), is_static);
+  jfieldID fid = jfieldIDWorkaround::to_jfieldID(h_klass, cp_entry->f2_as_index(), is_static);
   jvalue fvalue;
 #ifdef _LP64
   fvalue = *value;
@@ -1118,8 +1068,8 @@
                                       SignatureHandlerLibrary::buffer_size);
   _buffer = bb->code_begin();
 
-  _fingerprints = new(ResourceObj::C_HEAP)GrowableArray<uint64_t>(32, true);
-  _handlers     = new(ResourceObj::C_HEAP)GrowableArray<address>(32, true);
+  _fingerprints = new(ResourceObj::C_HEAP, mtCode)GrowableArray<uint64_t>(32, true);
+  _handlers     = new(ResourceObj::C_HEAP, mtCode)GrowableArray<address>(32, true);
 }
 
 address SignatureHandlerLibrary::set_handler(CodeBuffer* buffer) {
--- a/src/share/vm/interpreter/interpreterRuntime.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/interpreterRuntime.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -71,6 +71,8 @@
                                                         { return bytecode(thread).get_index_u2(bc); }
   static int       get_index_u2_cpcache(JavaThread *thread, Bytecodes::Code bc)
                                                         { return bytecode(thread).get_index_u2_cpcache(bc); }
+  static int       get_index_u4(JavaThread *thread, Bytecodes::Code bc)
+                                                        { return bytecode(thread).get_index_u4(bc); }
   static int       number_of_dimensions(JavaThread *thread)  { return bcp(thread)[3]; }
 
   static ConstantPoolCacheEntry* cache_entry_at(JavaThread *thread, int i)  { return method(thread)->constants()->cache()->entry_at(i); }
@@ -118,6 +120,7 @@
 
   // Calls
   static void    resolve_invoke       (JavaThread* thread, Bytecodes::Code bytecode);
+  static void    resolve_invokehandle (JavaThread* thread);
   static void    resolve_invokedynamic(JavaThread* thread);
 
   // Breakpoints
--- a/src/share/vm/interpreter/linkResolver.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/linkResolver.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -96,15 +96,21 @@
 void CallInfo::set_virtual(KlassHandle resolved_klass, KlassHandle selected_klass, methodHandle resolved_method, methodHandle selected_method, int vtable_index, TRAPS) {
   assert(vtable_index >= 0 || vtable_index == methodOopDesc::nonvirtual_vtable_index, "valid index");
   set_common(resolved_klass, selected_klass, resolved_method, selected_method, vtable_index, CHECK);
+  assert(!resolved_method->is_compiled_lambda_form(), "these must be handled via an invokehandle call");
 }
 
-void CallInfo::set_dynamic(methodHandle resolved_method, TRAPS) {
-  assert(resolved_method->is_method_handle_invoke(), "");
+void CallInfo::set_handle(methodHandle resolved_method, Handle resolved_appendix, TRAPS) {
+  if (resolved_method.is_null()) {
+    THROW_MSG(vmSymbols::java_lang_InternalError(), "resolved method is null");
+  }
   KlassHandle resolved_klass = SystemDictionaryHandles::MethodHandle_klass();
-  assert(resolved_klass == resolved_method->method_holder(), "");
+  assert(resolved_method->intrinsic_id() == vmIntrinsics::_invokeBasic ||
+         resolved_method->is_compiled_lambda_form(),
+         "linkMethod must return one of these");
   int vtable_index = methodOopDesc::nonvirtual_vtable_index;
   assert(resolved_method->vtable_index() == vtable_index, "");
-  set_common(resolved_klass, KlassHandle(), resolved_method, resolved_method, vtable_index, CHECK);
+  set_common(resolved_klass, resolved_klass, resolved_method, resolved_method, vtable_index, CHECK);
+  _resolved_appendix = resolved_appendix;
 }
 
 void CallInfo::set_common(KlassHandle resolved_klass, KlassHandle selected_klass, methodHandle resolved_method, methodHandle selected_method, int vtable_index, TRAPS) {
@@ -114,6 +120,7 @@
   _resolved_method = resolved_method;
   _selected_method = selected_method;
   _vtable_index    = vtable_index;
+  _resolved_appendix = Handle();
   if (CompilationPolicy::must_be_compiled(selected_method)) {
     // This path is unusual, mostly used by the '-Xcomp' stress test mode.
 
@@ -180,11 +187,9 @@
 void LinkResolver::lookup_method_in_klasses(methodHandle& result, KlassHandle klass, Symbol* name, Symbol* signature, TRAPS) {
   methodOop result_oop = klass->uncached_lookup_method(name, signature);
   if (EnableInvokeDynamic && result_oop != NULL) {
-    switch (result_oop->intrinsic_id()) {
-    case vmIntrinsics::_invokeExact:
-    case vmIntrinsics::_invokeGeneric:
-    case vmIntrinsics::_invokeDynamic:
-      // Do not link directly to these.  The VM must produce a synthetic one using lookup_implicit_method.
+    vmIntrinsics::ID iid = result_oop->intrinsic_id();
+    if (MethodHandles::is_signature_polymorphic(iid)) {
+      // Do not link directly to these.  The VM must produce a synthetic one using lookup_polymorphic_method.
       return;
     }
   }
@@ -213,31 +218,97 @@
   result = methodHandle(THREAD, ik->lookup_method_in_all_interfaces(name, signature));
 }
 
-void LinkResolver::lookup_implicit_method(methodHandle& result,
-                                          KlassHandle klass, Symbol* name, Symbol* signature,
-                                          KlassHandle current_klass,
-                                          TRAPS) {
+void LinkResolver::lookup_polymorphic_method(methodHandle& result,
+                                             KlassHandle klass, Symbol* name, Symbol* full_signature,
+                                             KlassHandle current_klass,
+                                             Handle* appendix_result_or_null,
+                                             TRAPS) {
+  vmIntrinsics::ID iid = MethodHandles::signature_polymorphic_name_id(name);
+  if (TraceMethodHandles) {
+    tty->print_cr("lookup_polymorphic_method iid=%s %s.%s%s",
+                  vmIntrinsics::name_at(iid), klass->external_name(),
+                  name->as_C_string(), full_signature->as_C_string());
+  }
   if (EnableInvokeDynamic &&
       klass() == SystemDictionary::MethodHandle_klass() &&
-      methodOopDesc::is_method_handle_invoke_name(name)) {
-    if (!THREAD->is_Compiler_thread() && !MethodHandles::enabled()) {
-      // Make sure the Java part of the runtime has been booted up.
-      klassOop natives = SystemDictionary::MethodHandleNatives_klass();
-      if (natives == NULL || instanceKlass::cast(natives)->is_not_initialized()) {
-        SystemDictionary::resolve_or_fail(vmSymbols::java_lang_invoke_MethodHandleNatives(),
-                                          Handle(),
-                                          Handle(),
-                                          true,
-                                          CHECK);
+      iid != vmIntrinsics::_none) {
+    if (MethodHandles::is_signature_polymorphic_intrinsic(iid)) {
+      // Most of these do not need an up-call to Java to resolve, so can be done anywhere.
+      // Do not erase last argument type (MemberName) if it is a static linkTo method.
+      bool keep_last_arg = MethodHandles::is_signature_polymorphic_static(iid);
+      TempNewSymbol basic_signature =
+        MethodHandles::lookup_basic_type_signature(full_signature, keep_last_arg, CHECK);
+      if (TraceMethodHandles) {
+        tty->print_cr("lookup_polymorphic_method %s %s => basic %s",
+                      name->as_C_string(),
+                      full_signature->as_C_string(),
+                      basic_signature->as_C_string());
+      }
+      result = SystemDictionary::find_method_handle_intrinsic(iid,
+                                                              basic_signature,
+                                                              CHECK);
+      if (result.not_null()) {
+        assert(result->is_method_handle_intrinsic(), "MH.invokeBasic or MH.linkTo* intrinsic");
+        assert(result->intrinsic_id() != vmIntrinsics::_invokeGeneric, "wrong place to find this");
+        assert(basic_signature == result->signature(), "predict the result signature");
+        if (TraceMethodHandles) {
+          tty->print("lookup_polymorphic_method => intrinsic ");
+          result->print_on(tty);
+        }
+        return;
       }
-    }
-    methodOop result_oop = SystemDictionary::find_method_handle_invoke(name,
-                                                                       signature,
-                                                                       current_klass,
-                                                                       CHECK);
-    if (result_oop != NULL) {
-      assert(result_oop->is_method_handle_invoke() && result_oop->signature() == signature, "consistent");
-      result = methodHandle(THREAD, result_oop);
+    } else if (iid == vmIntrinsics::_invokeGeneric
+               && !THREAD->is_Compiler_thread()
+               && appendix_result_or_null != NULL) {
+      // This is a method with type-checking semantics.
+      // We will ask Java code to spin an adapter method for it.
+      if (!MethodHandles::enabled()) {
+        // Make sure the Java part of the runtime has been booted up.
+        klassOop natives = SystemDictionary::MethodHandleNatives_klass();
+        if (natives == NULL || instanceKlass::cast(natives)->is_not_initialized()) {
+          SystemDictionary::resolve_or_fail(vmSymbols::java_lang_invoke_MethodHandleNatives(),
+                                            Handle(),
+                                            Handle(),
+                                            true,
+                                            CHECK);
+        }
+      }
+
+      Handle appendix;
+      result = SystemDictionary::find_method_handle_invoker(name,
+                                                            full_signature,
+                                                            current_klass,
+                                                            &appendix,
+                                                            CHECK);
+      if (TraceMethodHandles) {
+        tty->print("lookup_polymorphic_method => (via Java) ");
+        result->print_on(tty);
+        tty->print("  lookup_polymorphic_method => appendix = ");
+        if (appendix.is_null())  tty->print_cr("(none)");
+        else                     appendix->print_on(tty);
+      }
+      if (result.not_null()) {
+#ifdef ASSERT
+        TempNewSymbol basic_signature =
+          MethodHandles::lookup_basic_type_signature(full_signature, CHECK);
+        int actual_size_of_params = result->size_of_parameters();
+        int expected_size_of_params = ArgumentSizeComputer(basic_signature).size();
+        // +1 for MethodHandle.this, +1 for trailing MethodType
+        if (!MethodHandles::is_signature_polymorphic_static(iid))  expected_size_of_params += 1;
+        if (appendix.not_null())                                   expected_size_of_params += 1;
+        if (actual_size_of_params != expected_size_of_params) {
+          tty->print_cr("*** basic_signature=%s", basic_signature->as_C_string());
+          tty->print_cr("*** result for %s: ", vmIntrinsics::name_at(iid));
+          result->print();
+        }
+        assert(actual_size_of_params == expected_size_of_params,
+               err_msg("%d != %d", actual_size_of_params, expected_size_of_params));
+#endif //ASSERT
+
+        assert(appendix_result_or_null != NULL, "");
+        (*appendix_result_or_null) = appendix;
+        return;
+      }
     }
   }
 }
@@ -267,6 +338,7 @@
     new_flags = new_flags | JVM_ACC_PUBLIC;
     flags.set_flags(new_flags);
   }
+//  assert(extra_arg_result_or_null != NULL, "must be able to return extra argument");
 
   if (!Reflection::verify_field_access(ref_klass->as_klassOop(),
                                        resolved_klass->as_klassOop(),
@@ -287,10 +359,19 @@
   }
 }
 
-void LinkResolver::resolve_method(methodHandle& resolved_method, KlassHandle& resolved_klass,
-                                  constantPoolHandle pool, int index, TRAPS) {
+void LinkResolver::resolve_method_statically(methodHandle& resolved_method, KlassHandle& resolved_klass,
+                                             Bytecodes::Code code, constantPoolHandle pool, int index, TRAPS) {
 
   // resolve klass
+  if (code == Bytecodes::_invokedynamic) {
+    resolved_klass = SystemDictionaryHandles::MethodHandle_klass();
+    Symbol* method_name = vmSymbols::invoke_name();
+    Symbol* method_signature = pool->signature_ref_at(index);
+    KlassHandle  current_klass(THREAD, pool->pool_holder());
+    resolve_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, true, CHECK);
+    return;
+  }
+
   resolve_klass(resolved_klass, pool, index, CHECK);
 
   Symbol*  method_name       = pool->name_ref_at(index);
@@ -299,7 +380,7 @@
 
   if (pool->has_preresolution()
       || (resolved_klass() == SystemDictionary::MethodHandle_klass() &&
-          methodOopDesc::is_method_handle_invoke_name(method_name))) {
+          MethodHandles::is_signature_polymorphic_name(resolved_klass(), method_name))) {
     methodOop result_oop = constantPoolOopDesc::method_at_if_loaded(pool, index);
     if (result_oop != NULL) {
       resolved_method = methodHandle(THREAD, result_oop);
@@ -307,33 +388,13 @@
     }
   }
 
-  resolve_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, true, CHECK);
+  if (code == Bytecodes::_invokeinterface) {
+    resolve_interface_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, true, CHECK);
+  } else {
+    resolve_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, true, CHECK);
+  }
 }
 
-void LinkResolver::resolve_dynamic_method(methodHandle& resolved_method, KlassHandle& resolved_klass, constantPoolHandle pool, int index, TRAPS) {
-  // The class is java.lang.invoke.MethodHandle
-  resolved_klass = SystemDictionaryHandles::MethodHandle_klass();
-
-  Symbol* method_name = vmSymbols::invokeExact_name();
-
-  Symbol*  method_signature = pool->signature_ref_at(index);
-  KlassHandle  current_klass   (THREAD, pool->pool_holder());
-
-  resolve_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, true, CHECK);
-}
-
-void LinkResolver::resolve_interface_method(methodHandle& resolved_method, KlassHandle& resolved_klass, constantPoolHandle pool, int index, TRAPS) {
-
-  // resolve klass
-  resolve_klass(resolved_klass, pool, index, CHECK);
-  Symbol*  method_name       = pool->name_ref_at(index);
-  Symbol*  method_signature  = pool->signature_ref_at(index);
-  KlassHandle  current_klass(THREAD, pool->pool_holder());
-
-  resolve_interface_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, true, CHECK);
-}
-
-
 void LinkResolver::resolve_method(methodHandle& resolved_method, KlassHandle resolved_klass,
                                   Symbol* method_name, Symbol* method_signature,
                                   KlassHandle current_klass, bool check_access, TRAPS) {
@@ -346,6 +407,8 @@
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
   }
 
+  Handle nested_exception;
+
   // 2. lookup method in resolved klass and its super klasses
   lookup_method_in_klasses(resolved_method, resolved_klass, method_name, method_signature, CHECK);
 
@@ -354,17 +417,23 @@
     lookup_method_in_interfaces(resolved_method, resolved_klass, method_name, method_signature, CHECK);
 
     if (resolved_method.is_null()) {
-      // JSR 292:  see if this is an implicitly generated method MethodHandle.invoke(*...)
-      lookup_implicit_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, CHECK);
+      // JSR 292:  see if this is an implicitly generated method MethodHandle.linkToVirtual(*...), etc
+      lookup_polymorphic_method(resolved_method, resolved_klass, method_name, method_signature,
+                                current_klass, (Handle*)NULL, THREAD);
+      if (HAS_PENDING_EXCEPTION) {
+        nested_exception = Handle(THREAD, PENDING_EXCEPTION);
+        CLEAR_PENDING_EXCEPTION;
+      }
     }
 
     if (resolved_method.is_null()) {
       // 4. method lookup failed
       ResourceMark rm(THREAD);
-      THROW_MSG(vmSymbols::java_lang_NoSuchMethodError(),
-                methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
-                                                        method_name,
-                                                        method_signature));
+      THROW_MSG_CAUSE(vmSymbols::java_lang_NoSuchMethodError(),
+                      methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+                                                              method_name,
+                                                              method_signature),
+                      nested_exception);
     }
   }
 
@@ -1053,6 +1122,7 @@
     case Bytecodes::_invokestatic   : resolve_invokestatic   (result,       pool, index, CHECK); break;
     case Bytecodes::_invokespecial  : resolve_invokespecial  (result,       pool, index, CHECK); break;
     case Bytecodes::_invokevirtual  : resolve_invokevirtual  (result, recv, pool, index, CHECK); break;
+    case Bytecodes::_invokehandle   : resolve_invokehandle   (result,       pool, index, CHECK); break;
     case Bytecodes::_invokedynamic  : resolve_invokedynamic  (result,       pool, index, CHECK); break;
     case Bytecodes::_invokeinterface: resolve_invokeinterface(result, recv, pool, index, CHECK); break;
   }
@@ -1116,22 +1186,91 @@
 }
 
 
-void LinkResolver::resolve_invokedynamic(CallInfo& result, constantPoolHandle pool, int raw_index, TRAPS) {
+void LinkResolver::resolve_invokehandle(CallInfo& result, constantPoolHandle pool, int index, TRAPS) {
   assert(EnableInvokeDynamic, "");
+  // This guy is reached from InterpreterRuntime::resolve_invokehandle.
+  KlassHandle  resolved_klass;
+  Symbol* method_name = NULL;
+  Symbol* method_signature = NULL;
+  KlassHandle  current_klass;
+  resolve_pool(resolved_klass, method_name,  method_signature, current_klass, pool, index, CHECK);
+  if (TraceMethodHandles)
+    tty->print_cr("resolve_invokehandle %s %s", method_name->as_C_string(), method_signature->as_C_string());
+  resolve_handle_call(result, resolved_klass, method_name, method_signature, current_klass, CHECK);
+}
 
-  // This guy is reached from InterpreterRuntime::resolve_invokedynamic.
+void LinkResolver::resolve_handle_call(CallInfo& result, KlassHandle resolved_klass,
+                                       Symbol* method_name, Symbol* method_signature,
+                                       KlassHandle current_klass,
+                                       TRAPS) {
+  // JSR 292:  this must be an implicitly generated method MethodHandle.invokeExact(*...) or similar
+  assert(resolved_klass() == SystemDictionary::MethodHandle_klass(), "");
+  assert(MethodHandles::is_signature_polymorphic_name(method_name), "");
+  methodHandle resolved_method;
+  Handle resolved_appendix;
+  lookup_polymorphic_method(resolved_method, resolved_klass,
+                            method_name, method_signature,
+                            current_klass, &resolved_appendix, CHECK);
+  result.set_handle(resolved_method, resolved_appendix, CHECK);
+}
+
+
+void LinkResolver::resolve_invokedynamic(CallInfo& result, constantPoolHandle pool, int index, TRAPS) {
+  assert(EnableInvokeDynamic, "");
+  pool->set_invokedynamic();    // mark header to flag active call sites
+
+  //resolve_pool(<resolved_klass>, method_name,  method_signature, current_klass, pool, index, CHECK);
+  Symbol* method_name       = pool->name_ref_at(index);
+  Symbol* method_signature  = pool->signature_ref_at(index);
+  KlassHandle current_klass = KlassHandle(THREAD, pool->pool_holder());
 
-  // At this point, we only need the signature, and can ignore the name.
-  Symbol*  method_signature = pool->signature_ref_at(raw_index);  // raw_index works directly
-  Symbol* method_name = vmSymbols::invokeExact_name();
-  KlassHandle resolved_klass = SystemDictionaryHandles::MethodHandle_klass();
+  // Resolve the bootstrap specifier (BSM + optional arguments).
+  Handle bootstrap_specifier;
+  // Check if CallSite has been bound already:
+  ConstantPoolCacheEntry* cpce = pool->cache()->secondary_entry_at(index);
+  if (cpce->is_f1_null()) {
+    int pool_index = pool->cache()->main_entry_at(index)->constant_pool_index();
+    oop bsm_info = pool->resolve_bootstrap_specifier_at(pool_index, CHECK);
+    assert(bsm_info != NULL, "");
+    // FIXME: Cache this once per BootstrapMethods entry, not once per CONSTANT_InvokeDynamic.
+    bootstrap_specifier = Handle(THREAD, bsm_info);
+  }
+  if (!cpce->is_f1_null()) {
+    methodHandle method(THREAD, cpce->f2_as_vfinal_method());
+    Handle appendix(THREAD, cpce->has_appendix() ? cpce->f1_appendix() : (oop)NULL);
+    result.set_handle(method, appendix, CHECK);
+    return;
+  }
 
-  // JSR 292:  this must be an implicitly generated method MethodHandle.invokeExact(*...)
-  // The extra MH receiver will be inserted into the stack on every call.
-  methodHandle resolved_method;
-  KlassHandle current_klass(THREAD, pool->pool_holder());
-  lookup_implicit_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, THREAD);
+  if (TraceMethodHandles) {
+    tty->print_cr("resolve_invokedynamic #%d %s %s",
+                  constantPoolCacheOopDesc::decode_secondary_index(index),
+                  method_name->as_C_string(), method_signature->as_C_string());
+    tty->print("  BSM info: "); bootstrap_specifier->print();
+  }
+
+  resolve_dynamic_call(result, bootstrap_specifier, method_name, method_signature, current_klass, CHECK);
+}
+
+void LinkResolver::resolve_dynamic_call(CallInfo& result,
+                                        Handle bootstrap_specifier,
+                                        Symbol* method_name, Symbol* method_signature,
+                                        KlassHandle current_klass,
+                                        TRAPS) {
+  // JSR 292:  this must resolve to an implicitly generated method MH.linkToCallSite(*...)
+  // The appendix argument is likely to be a freshly-created CallSite.
+  Handle       resolved_appendix;
+  methodHandle resolved_method =
+    SystemDictionary::find_dynamic_call_site_invoker(current_klass,
+                                                     bootstrap_specifier,
+                                                     method_name, method_signature,
+                                                     &resolved_appendix,
+                                                     THREAD);
   if (HAS_PENDING_EXCEPTION) {
+    if (TraceMethodHandles) {
+      tty->print_cr("invokedynamic throws BSME for "INTPTR_FORMAT, PENDING_EXCEPTION);
+      PENDING_EXCEPTION->print();
+    }
     if (PENDING_EXCEPTION->is_a(SystemDictionary::BootstrapMethodError_klass())) {
       // throw these guys, since they are already wrapped
       return;
@@ -1141,17 +1280,11 @@
       return;
     }
     // See the "Linking Exceptions" section for the invokedynamic instruction in the JVMS.
-    Handle ex(THREAD, PENDING_EXCEPTION);
+    Handle nested_exception(THREAD, PENDING_EXCEPTION);
     CLEAR_PENDING_EXCEPTION;
-    oop bsme = Klass::cast(SystemDictionary::BootstrapMethodError_klass())->java_mirror();
-    MethodHandles::raise_exception(Bytecodes::_athrow, ex(), bsme, CHECK);
-    // java code should not return, but if it does throw out anyway
-    THROW(vmSymbols::java_lang_InternalError());
+    THROW_CAUSE(vmSymbols::java_lang_BootstrapMethodError(), nested_exception)
   }
-  if (resolved_method.is_null()) {
-    THROW(vmSymbols::java_lang_InternalError());
-  }
-  result.set_dynamic(resolved_method, CHECK);
+  result.set_handle(resolved_method, resolved_appendix, CHECK);
 }
 
 //------------------------------------------------------------------------------------------------------------------------
--- a/src/share/vm/interpreter/linkResolver.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/linkResolver.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -75,11 +75,12 @@
   methodHandle _resolved_method;        // static target method
   methodHandle _selected_method;        // dynamic (actual) target method
   int          _vtable_index;           // vtable index of selected method
+  Handle       _resolved_appendix;      // extra argument in constant pool (if CPCE::has_appendix)
 
   void         set_static(   KlassHandle resolved_klass,                             methodHandle resolved_method                                                , TRAPS);
   void         set_interface(KlassHandle resolved_klass, KlassHandle selected_klass, methodHandle resolved_method, methodHandle selected_method                  , TRAPS);
   void         set_virtual(  KlassHandle resolved_klass, KlassHandle selected_klass, methodHandle resolved_method, methodHandle selected_method, int vtable_index, TRAPS);
-  void         set_dynamic(                                                          methodHandle resolved_method,                                                 TRAPS);
+  void         set_handle(                                                           methodHandle resolved_method,   Handle resolved_appendix,                     TRAPS);
   void         set_common(   KlassHandle resolved_klass, KlassHandle selected_klass, methodHandle resolved_method, methodHandle selected_method, int vtable_index, TRAPS);
 
   friend class LinkResolver;
@@ -89,6 +90,7 @@
   KlassHandle  selected_klass() const            { return _selected_klass; }
   methodHandle resolved_method() const           { return _resolved_method; }
   methodHandle selected_method() const           { return _selected_method; }
+  Handle       resolved_appendix() const         { return _resolved_appendix; }
 
   BasicType    result_type() const               { return selected_method()->result_type(); }
   bool         has_vtable_index() const          { return _vtable_index >= 0; }
@@ -110,8 +112,8 @@
   static void lookup_method_in_klasses          (methodHandle& result, KlassHandle klass, Symbol* name, Symbol* signature, TRAPS);
   static void lookup_instance_method_in_klasses (methodHandle& result, KlassHandle klass, Symbol* name, Symbol* signature, TRAPS);
   static void lookup_method_in_interfaces       (methodHandle& result, KlassHandle klass, Symbol* name, Symbol* signature, TRAPS);
-  static void lookup_implicit_method            (methodHandle& result, KlassHandle klass, Symbol* name, Symbol* signature,
-                                                 KlassHandle current_klass, TRAPS);
+  static void lookup_polymorphic_method         (methodHandle& result, KlassHandle klass, Symbol* name, Symbol* signature,
+                                                 KlassHandle current_klass, Handle* appendix_result_or_null, TRAPS);
 
   static int vtable_index_of_miranda_method(KlassHandle klass, Symbol* name, Symbol* signature, TRAPS);
 
@@ -139,10 +141,9 @@
   // constant pool resolving
   static void check_klass_accessability(KlassHandle ref_klass, KlassHandle sel_klass, TRAPS);
 
-  // static resolving for all calls except interface calls
-  static void resolve_method          (methodHandle& method_result, KlassHandle& klass_result, constantPoolHandle pool, int index, TRAPS);
-  static void resolve_dynamic_method  (methodHandle& resolved_method, KlassHandle& resolved_klass, constantPoolHandle pool, int index, TRAPS);
-  static void resolve_interface_method(methodHandle& method_result, KlassHandle& klass_result, constantPoolHandle pool, int index, TRAPS);
+  // static resolving calls (will not run any Java code); used only from Bytecode_invoke::static_target
+  static void resolve_method_statically(methodHandle& method_result, KlassHandle& klass_result,
+                                        Bytecodes::Code code, constantPoolHandle pool, int index, TRAPS);
 
   // runtime/static resolving for fields
   static void resolve_field(FieldAccessInfo& result, constantPoolHandle pool, int index, Bytecodes::Code byte, bool check_only, TRAPS);
@@ -156,6 +157,8 @@
   static void resolve_special_call  (CallInfo& result,              KlassHandle resolved_klass, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, bool check_access, TRAPS);
   static void resolve_virtual_call  (CallInfo& result, Handle recv, KlassHandle recv_klass, KlassHandle resolved_klass, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, bool check_access, bool check_null_and_abstract, TRAPS);
   static void resolve_interface_call(CallInfo& result, Handle recv, KlassHandle recv_klass, KlassHandle resolved_klass, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, bool check_access, bool check_null_and_abstract, TRAPS);
+  static void resolve_handle_call   (CallInfo& result,                                      KlassHandle resolved_klass, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, TRAPS);
+  static void resolve_dynamic_call  (CallInfo& result,                                      Handle bootstrap_specifier, Symbol* method_name, Symbol* method_signature, KlassHandle current_klass, TRAPS);
 
   // same as above for compile-time resolution; but returns null handle instead of throwing an exception on error
   // also, does not initialize klass (i.e., no side effects)
@@ -177,6 +180,7 @@
   static void resolve_invokevirtual  (CallInfo& result, Handle recv, constantPoolHandle pool, int index, TRAPS);
   static void resolve_invokeinterface(CallInfo& result, Handle recv, constantPoolHandle pool, int index, TRAPS);
   static void resolve_invokedynamic  (CallInfo& result,              constantPoolHandle pool, int index, TRAPS);
+  static void resolve_invokehandle   (CallInfo& result,              constantPoolHandle pool, int index, TRAPS);
 
   static void resolve_invoke         (CallInfo& result, Handle recv, constantPoolHandle pool, int index, Bytecodes::Code byte, TRAPS);
 };
--- a/src/share/vm/interpreter/oopMapCache.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/oopMapCache.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -348,7 +348,7 @@
   if (mask_size() > small_mask_limit) {
     assert(_bit_mask[0] == 0, "bit mask should be new or just flushed");
     _bit_mask[0] = (intptr_t)
-      NEW_C_HEAP_ARRAY(uintptr_t, mask_word_size());
+      NEW_C_HEAP_ARRAY(uintptr_t, mask_word_size(), mtClass);
   }
 }
 
@@ -356,7 +356,7 @@
   if (mask_size() > small_mask_limit && _bit_mask[0] != 0) {
     assert(!Thread::current()->resource_area()->contains((void*)_bit_mask[0]),
       "This bit mask should not be in the resource area");
-    FREE_C_HEAP_ARRAY(uintptr_t, _bit_mask[0]);
+    FREE_C_HEAP_ARRAY(uintptr_t, _bit_mask[0], mtClass);
     debug_only(_bit_mask[0] = 0;)
   }
 }
@@ -506,7 +506,7 @@
 OopMapCache::OopMapCache() :
   _mut(Mutex::leaf, "An OopMapCache lock", true)
 {
-  _array  = NEW_C_HEAP_ARRAY(OopMapCacheEntry, _size);
+  _array  = NEW_C_HEAP_ARRAY(OopMapCacheEntry, _size, mtClass);
   // Cannot call flush for initialization, since flush
   // will check if memory should be deallocated
   for(int i = 0; i < _size; i++) _array[i].initialize();
@@ -520,7 +520,7 @@
   flush();
   // Deallocate array
   NOT_PRODUCT(_total_memory_usage -= sizeof(OopMapCache) + (sizeof(OopMapCacheEntry) * _size);)
-  FREE_C_HEAP_ARRAY(OopMapCacheEntry, _array);
+  FREE_C_HEAP_ARRAY(OopMapCacheEntry, _array, mtClass);
 }
 
 OopMapCacheEntry* OopMapCache::entry_at(int i) const {
@@ -639,9 +639,9 @@
 
 void OopMapCache::compute_one_oop_map(methodHandle method, int bci, InterpreterOopMap* entry) {
   // Due to the invariants above it's tricky to allocate a temporary OopMapCacheEntry on the stack
-  OopMapCacheEntry* tmp = NEW_C_HEAP_ARRAY(OopMapCacheEntry, 1);
+  OopMapCacheEntry* tmp = NEW_C_HEAP_ARRAY(OopMapCacheEntry, 1, mtClass);
   tmp->initialize();
   tmp->fill(method, bci);
   entry->resource_copy(tmp);
-  FREE_C_HEAP_ARRAY(OopMapCacheEntry, tmp);
+  FREE_C_HEAP_ARRAY(OopMapCacheEntry, tmp, mtInternal);
 }
--- a/src/share/vm/interpreter/oopMapCache.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/oopMapCache.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -156,7 +156,7 @@
 #endif
 };
 
-class OopMapCache : public CHeapObj {
+class OopMapCache : public CHeapObj<mtClass> {
  private:
   enum { _size        = 32,     // Use fixed size for now
          _probe_depth = 3       // probe depth in case of collisions
--- a/src/share/vm/interpreter/rewriter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/rewriter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,6 +33,7 @@
 #include "oops/objArrayOop.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/methodComparator.hpp"
+#include "prims/methodHandles.hpp"
 
 // Computes a CPC map (new_index -> original_index) for constant pool entries
 // that are referred to by the interpreter at runtime via the constant pool cache.
@@ -41,10 +42,9 @@
 void Rewriter::compute_index_maps() {
   const int length  = _pool->length();
   init_cp_map(length);
-  jint tag_mask = 0;
+  bool saw_mh_symbol = false;
   for (int i = 0; i < length; i++) {
     int tag = _pool->tag_at(i).value();
-    tag_mask |= (1 << tag);
     switch (tag) {
       case JVM_CONSTANT_InterfaceMethodref:
       case JVM_CONSTANT_Fieldref          : // fall through
@@ -54,13 +54,18 @@
       case JVM_CONSTANT_InvokeDynamic     : // fall through
         add_cp_cache_entry(i);
         break;
+      case JVM_CONSTANT_Utf8:
+        if (_pool->symbol_at(i) == vmSymbols::java_lang_invoke_MethodHandle())
+          saw_mh_symbol = true;
+        break;
     }
   }
 
   guarantee((int)_cp_cache_map.length()-1 <= (int)((u2)-1),
             "all cp cache indexes fit in a u2");
 
-  _have_invoke_dynamic = ((tag_mask & (1 << JVM_CONSTANT_InvokeDynamic)) != 0);
+  if (saw_mh_symbol)
+    _method_handle_invokers.initialize(length, (int)0);
 }
 
 // Unrewrite the bytecodes if an error occurs.
@@ -80,22 +85,6 @@
       oopFactory::new_constantPoolCache(length, CHECK);
   No_Safepoint_Verifier nsv;
   cache->initialize(_cp_cache_map);
-
-  // Don't bother with the next pass if there is no JVM_CONSTANT_InvokeDynamic.
-  if (_have_invoke_dynamic) {
-    for (int i = 0; i < length; i++) {
-      int pool_index = cp_cache_entry_pool_index(i);
-      if (pool_index >= 0 &&
-          _pool->tag_at(pool_index).is_invoke_dynamic()) {
-        int bsm_index = _pool->invoke_dynamic_bootstrap_method_ref_index_at(pool_index);
-        assert(_pool->tag_at(bsm_index).is_method_handle(), "must be a MH constant");
-        // There is a CP cache entry holding the BSM for these calls.
-        int bsm_cache_index = cp_entry_to_cp_cache(bsm_index);
-        cache->entry_at(i)->initialize_bootstrap_method_index_in_cache(bsm_cache_index);
-      }
-    }
-  }
-
   _pool->set_cache(cache);
   cache->set_constant_pool(_pool());
 }
@@ -148,10 +137,53 @@
     int  cp_index    = Bytes::get_Java_u2(p);
     int  cache_index = cp_entry_to_cp_cache(cp_index);
     Bytes::put_native_u2(p, cache_index);
+    if (!_method_handle_invokers.is_empty())
+      maybe_rewrite_invokehandle(p - 1, cp_index, reverse);
   } else {
     int cache_index = Bytes::get_native_u2(p);
     int pool_index = cp_cache_entry_pool_index(cache_index);
     Bytes::put_Java_u2(p, pool_index);
+    if (!_method_handle_invokers.is_empty())
+      maybe_rewrite_invokehandle(p - 1, pool_index, reverse);
+  }
+}
+
+
+// Adjust the invocation bytecode for a signature-polymorphic method (MethodHandle.invoke, etc.)
+void Rewriter::maybe_rewrite_invokehandle(address opc, int cp_index, bool reverse) {
+  if (!reverse) {
+    if ((*opc) == (u1)Bytecodes::_invokevirtual ||
+        // allow invokespecial as an alias, although it would be very odd:
+        (*opc) == (u1)Bytecodes::_invokespecial) {
+      assert(_pool->tag_at(cp_index).is_method(), "wrong index");
+      // Determine whether this is a signature-polymorphic method.
+      if (cp_index >= _method_handle_invokers.length())  return;
+      int status = _method_handle_invokers[cp_index];
+      assert(status >= -1 && status <= 1, "oob tri-state");
+      if (status == 0) {
+        if (_pool->klass_ref_at_noresolve(cp_index) == vmSymbols::java_lang_invoke_MethodHandle() &&
+            MethodHandles::is_signature_polymorphic_name(SystemDictionary::MethodHandle_klass(),
+                                                         _pool->name_ref_at(cp_index)))
+          status = +1;
+        else
+          status = -1;
+        _method_handle_invokers[cp_index] = status;
+      }
+      // We use a special internal bytecode for such methods (if non-static).
+      // The basic reason for this is that such methods need an extra "appendix" argument
+      // to transmit the call site's intended call type.
+      if (status > 0) {
+        (*opc) = (u1)Bytecodes::_invokehandle;
+      }
+    }
+  } else {
+    // Do not need to look at cp_index.
+    if ((*opc) == (u1)Bytecodes::_invokehandle) {
+      (*opc) = (u1)Bytecodes::_invokevirtual;
+      // Ignore corner case of original _invokespecial instruction.
+      // This is safe because (a) the signature polymorphic method was final, and
+      // (b) the implementation of MethodHandle will not call invokespecial on it.
+    }
   }
 }
 
@@ -297,17 +329,18 @@
         case Bytecodes::_invokespecial  : // fall through
         case Bytecodes::_invokestatic   :
         case Bytecodes::_invokeinterface:
+        case Bytecodes::_invokehandle   : // if reverse=true
           rewrite_member_reference(bcp, prefix_length+1, reverse);
           break;
         case Bytecodes::_invokedynamic:
           rewrite_invokedynamic(bcp, prefix_length+1, reverse);
           break;
         case Bytecodes::_ldc:
-        case Bytecodes::_fast_aldc:
+        case Bytecodes::_fast_aldc:  // if reverse=true
           maybe_rewrite_ldc(bcp, prefix_length+1, false, reverse);
           break;
         case Bytecodes::_ldc_w:
-        case Bytecodes::_fast_aldc_w:
+        case Bytecodes::_fast_aldc_w:  // if reverse=true
           maybe_rewrite_ldc(bcp, prefix_length+1, true, reverse);
           break;
         case Bytecodes::_jsr            : // fall through
--- a/src/share/vm/interpreter/rewriter.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/rewriter.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -39,7 +39,7 @@
   objArrayHandle      _methods;
   intArray            _cp_map;
   intStack            _cp_cache_map;
-  bool                _have_invoke_dynamic;
+  intArray            _method_handle_invokers;
 
   void init_cp_map(int length) {
     _cp_map.initialize(length, -1);
@@ -88,6 +88,7 @@
   void scan_method(methodOop m, bool reverse = false);
   void rewrite_Object_init(methodHandle m, TRAPS);
   void rewrite_member_reference(address bcp, int offset, bool reverse = false);
+  void maybe_rewrite_invokehandle(address opc, int cp_index, bool reverse = false);
   void rewrite_invokedynamic(address bcp, int offset, bool reverse = false);
   void maybe_rewrite_ldc(address bcp, int offset, bool is_wide, bool reverse = false);
   // Revert bytecodes in case of an exception.
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -362,7 +362,6 @@
   method_entry(empty)
   method_entry(accessor)
   method_entry(abstract)
-  method_entry(method_handle)
   method_entry(java_lang_math_sin  )
   method_entry(java_lang_math_cos  )
   method_entry(java_lang_math_tan  )
@@ -370,8 +369,16 @@
   method_entry(java_lang_math_sqrt )
   method_entry(java_lang_math_log  )
   method_entry(java_lang_math_log10)
+  method_entry(java_lang_math_exp  )
+  method_entry(java_lang_math_pow  )
   method_entry(java_lang_ref_reference_get)
 
+  // method handle entry kinds are generated later in MethodHandlesAdapterGenerator::generate:
+  for (int i = Interpreter::method_handle_invoke_FIRST; i <= Interpreter::method_handle_invoke_LAST; i++) {
+    Interpreter::MethodKind kind = (Interpreter::MethodKind) i;
+    Interpreter::_entry_table[kind] = Interpreter::_entry_table[Interpreter::abstract];
+  }
+
   // all native method kinds (must be one contiguous block)
   Interpreter::_native_entry_begin = Interpreter::code()->code_end();
   method_entry(native)
--- a/src/share/vm/interpreter/templateTable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/templateTable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -444,7 +444,7 @@
   def(Bytecodes::_invokespecial       , ubcp|disp|clvm|____, vtos, vtos, invokespecial       , f1_byte      );
   def(Bytecodes::_invokestatic        , ubcp|disp|clvm|____, vtos, vtos, invokestatic        , f1_byte      );
   def(Bytecodes::_invokeinterface     , ubcp|disp|clvm|____, vtos, vtos, invokeinterface     , f1_byte      );
-  def(Bytecodes::_invokedynamic       , ubcp|disp|clvm|____, vtos, vtos, invokedynamic       , f1_oop       );
+  def(Bytecodes::_invokedynamic       , ubcp|disp|clvm|____, vtos, vtos, invokedynamic       , f12_oop      );
   def(Bytecodes::_new                 , ubcp|____|clvm|____, vtos, atos, _new                ,  _           );
   def(Bytecodes::_newarray            , ubcp|____|clvm|____, itos, atos, newarray            ,  _           );
   def(Bytecodes::_anewarray           , ubcp|____|clvm|____, itos, atos, anewarray           ,  _           );
@@ -514,6 +514,8 @@
 
   def(Bytecodes::_return_register_finalizer , ____|disp|clvm|____, vtos, vtos, _return       ,  vtos        );
 
+  def(Bytecodes::_invokehandle        , ubcp|disp|clvm|____, vtos, vtos, invokehandle        , f12_oop      );
+
   def(Bytecodes::_shouldnotreachhere   , ____|____|____|____, vtos, vtos, shouldnotreachhere ,  _           );
   // platform specific bytecodes
   pd_initialize();
--- a/src/share/vm/interpreter/templateTable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/interpreter/templateTable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -98,7 +98,7 @@
  public:
   enum Operation { add, sub, mul, div, rem, _and, _or, _xor, shl, shr, ushr };
   enum Condition { equal, not_equal, less, less_equal, greater, greater_equal };
-  enum CacheByte { f1_byte = 1, f2_byte = 2, f1_oop = 0x11 };  // byte_no codes
+  enum CacheByte { f1_byte = 1, f2_byte = 2, f12_oop = 0x12 };  // byte_no codes
 
  private:
   static bool            _is_initialized;        // true if TemplateTable has been initialized
@@ -294,6 +294,7 @@
   static void invokestatic(int byte_no);
   static void invokeinterface(int byte_no);
   static void invokedynamic(int byte_no);
+  static void invokehandle(int byte_no);
   static void fast_invokevfinal(int byte_no);
 
   static void getfield_or_static(int byte_no, bool is_static);
--- a/src/share/vm/libadt/set.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/libadt/set.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -71,7 +71,7 @@
   set.Sort();                   // Sort elements for in-order retrieval
 
   uint len = 128;               // Total string space
-  char *buf = NEW_C_HEAP_ARRAY(char,len);// Some initial string space
+  char *buf = NEW_C_HEAP_ARRAY(char,len, mtCompiler);// Some initial string space
 
   register char *s = buf;       // Current working string pointer
   *s++ = '{';
@@ -86,7 +86,7 @@
       if( buf+len-s < 25 ) {      // Generous trailing space for upcoming numbers
         int offset = (int)(s-buf);// Not enuf space; compute offset into buffer
         len <<= 1;                // Double string size
-        buf = REALLOC_C_HEAP_ARRAY(char,buf,len); // Reallocate doubled size
+        buf = REALLOC_C_HEAP_ARRAY(char,buf,len, mtCompiler); // Reallocate doubled size
         s = buf+offset;         // Get working pointer into new bigger buffer
       }
       if( lo != (uint)-2 ) {    // Startup?  No!  Then print previous range.
@@ -101,7 +101,7 @@
     if( buf+len-s < 25 ) {      // Generous trailing space for upcoming numbers
       int offset = (int)(s-buf);// Not enuf space; compute offset into buffer
       len <<= 1;                // Double string size
-      buf = (char*)ReallocateHeap(buf,len); // Reallocate doubled size
+      buf = (char*)ReallocateHeap(buf,len, mtCompiler); // Reallocate doubled size
       s = buf+offset;           // Get working pointer into new bigger buffer
     }
     if( lo != hi ) sprintf(s,"%d-%d}",lo,hi);
--- a/src/share/vm/libadt/vectset.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/libadt/vectset.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -362,7 +362,7 @@
 };
 
 SetI_ *VectorSet::iterate(uint &elem) const {
-  return new(ResourceObj::C_HEAP) VSetI_(this, elem);
+  return new(ResourceObj::C_HEAP, mtInternal) VSetI_(this, elem);
 }
 
 //=============================================================================
--- a/src/share/vm/memory/allocation.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/allocation.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,10 +26,13 @@
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #include "runtime/task.hpp"
 #include "runtime/threadCritical.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/ostream.hpp"
+
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
 #endif
@@ -43,32 +46,16 @@
 # include "os_bsd.inline.hpp"
 #endif
 
-void* CHeapObj::operator new(size_t size){
-  return (void *) AllocateHeap(size, "CHeapObj-new");
-}
-
-void* CHeapObj::operator new (size_t size, const std::nothrow_t&  nothrow_constant) {
-  char* p = (char*) os::malloc(size);
-#ifdef ASSERT
-  if (PrintMallocFree) trace_heap_malloc(size, "CHeapObj-new", p);
-#endif
-  return p;
-}
-
-void CHeapObj::operator delete(void* p){
- FreeHeap(p);
-}
-
 void* StackObj::operator new(size_t size)  { ShouldNotCallThis(); return 0; };
 void  StackObj::operator delete(void* p)   { ShouldNotCallThis(); };
 void* _ValueObj::operator new(size_t size)  { ShouldNotCallThis(); return 0; };
 void  _ValueObj::operator delete(void* p)   { ShouldNotCallThis(); };
 
-void* ResourceObj::operator new(size_t size, allocation_type type) {
+void* ResourceObj::operator new(size_t size, allocation_type type, MEMFLAGS flags) {
   address res;
   switch (type) {
    case C_HEAP:
-    res = (address)AllocateHeap(size, "C_Heap: ResourceOBJ");
+    res = (address)AllocateHeap(size, flags, CALLER_PC);
     DEBUG_ONLY(set_allocation_type(res, C_HEAP);)
     break;
    case RESOURCE_AREA:
@@ -184,7 +171,7 @@
 
 // MT-safe pool of chunks to reduce malloc/free thrashing
 // NB: not using Mutex because pools are used before Threads are initialized
-class ChunkPool {
+class ChunkPool: public CHeapObj<mtInternal> {
   Chunk*       _first;        // first cached Chunk; its first word points to next chunk
   size_t       _num_chunks;   // number of unused chunks in pool
   size_t       _num_used;     // number of chunks currently checked out
@@ -210,14 +197,16 @@
    ChunkPool(size_t size) : _size(size) { _first = NULL; _num_chunks = _num_used = 0; }
 
   // Allocate a new chunk from the pool (might expand the pool)
-  void* allocate(size_t bytes) {
+  _NOINLINE_ void* allocate(size_t bytes) {
     assert(bytes == _size, "bad size");
     void* p = NULL;
+    // No VM lock can be taken inside ThreadCritical lock, so os::malloc
+    // should be done outside ThreadCritical lock due to NMT
     { ThreadCritical tc;
       _num_used++;
       p = get_first();
-      if (p == NULL) p = os::malloc(bytes);
     }
+    if (p == NULL) p = os::malloc(bytes, mtChunk, CURRENT_PC);
     if (p == NULL)
       vm_exit_out_of_memory(bytes, "ChunkPool::allocate");
 
@@ -238,28 +227,34 @@
 
   // Prune the pool
   void free_all_but(size_t n) {
+    Chunk* cur = NULL;
+    Chunk* next;
+    {
     // if we have more than n chunks, free all of them
     ThreadCritical tc;
     if (_num_chunks > n) {
       // free chunks at end of queue, for better locality
-      Chunk* cur = _first;
+        cur = _first;
       for (size_t i = 0; i < (n - 1) && cur != NULL; i++) cur = cur->next();
 
       if (cur != NULL) {
-        Chunk* next = cur->next();
+          next = cur->next();
         cur->set_next(NULL);
         cur = next;
 
-        // Free all remaining chunks
+          _num_chunks = n;
+        }
+      }
+    }
+
+    // Free all remaining chunks, outside of ThreadCritical
+    // to avoid deadlock with NMT
         while(cur != NULL) {
           next = cur->next();
-          os::free(cur);
-          _num_chunks--;
+      os::free(cur, mtChunk);
           cur = next;
         }
       }
-    }
-  }
 
   // Accessors to preallocated pool's
   static ChunkPool* large_pool()  { assert(_large_pool  != NULL, "must be initialized"); return _large_pool;  }
@@ -323,7 +318,7 @@
    case Chunk::medium_size: return ChunkPool::medium_pool()->allocate(bytes);
    case Chunk::init_size:   return ChunkPool::small_pool()->allocate(bytes);
    default: {
-     void *p =  os::malloc(bytes);
+     void *p =  os::malloc(bytes, mtChunk, CALLER_PC);
      if (p == NULL)
        vm_exit_out_of_memory(bytes, "Chunk::new");
      return p;
@@ -337,7 +332,7 @@
    case Chunk::size:        ChunkPool::large_pool()->free(c); break;
    case Chunk::medium_size: ChunkPool::medium_pool()->free(c); break;
    case Chunk::init_size:   ChunkPool::small_pool()->free(c); break;
-   default:                 os::free(c);
+   default:                 os::free(c, mtChunk);
   }
 }
 
@@ -374,6 +369,7 @@
 }
 
 //------------------------------Arena------------------------------------------
+NOT_PRODUCT(volatile jint Arena::_instance_count = 0;)
 
 Arena::Arena(size_t init_size) {
   size_t round_size = (sizeof (char *)) - 1;
@@ -382,6 +378,7 @@
   _hwm = _chunk->bottom();      // Save the cached hwm, max
   _max = _chunk->top();
   set_size_in_bytes(init_size);
+  NOT_PRODUCT(Atomic::inc(&_instance_count);)
 }
 
 Arena::Arena() {
@@ -389,12 +386,15 @@
   _hwm = _chunk->bottom();      // Save the cached hwm, max
   _max = _chunk->top();
   set_size_in_bytes(Chunk::init_size);
+  NOT_PRODUCT(Atomic::inc(&_instance_count);)
 }
 
 Arena::Arena(Arena *a) : _chunk(a->_chunk), _hwm(a->_hwm), _max(a->_max), _first(a->_first) {
   set_size_in_bytes(a->size_in_bytes());
+  NOT_PRODUCT(Atomic::inc(&_instance_count);)
 }
 
+
 Arena *Arena::move_contents(Arena *copy) {
   copy->destruct_contents();
   copy->_chunk = _chunk;
@@ -409,6 +409,42 @@
 
 Arena::~Arena() {
   destruct_contents();
+  NOT_PRODUCT(Atomic::dec(&_instance_count);)
+}
+
+void* Arena::operator new(size_t size) {
+  assert(false, "Use dynamic memory type binding");
+  return NULL;
+}
+
+void* Arena::operator new (size_t size, const std::nothrow_t&  nothrow_constant) {
+  assert(false, "Use dynamic memory type binding");
+  return NULL;
+}
+
+  // dynamic memory type binding
+void* Arena::operator new(size_t size, MEMFLAGS flags) {
+#ifdef ASSERT
+  void* p = (void*)AllocateHeap(size, flags|otArena, CALLER_PC);
+  if (PrintMallocFree) trace_heap_malloc(size, "Arena-new", p);
+  return p;
+#else
+  return (void *) AllocateHeap(size, flags|otArena, CALLER_PC);
+#endif
+}
+
+void* Arena::operator new(size_t size, const std::nothrow_t& nothrow_constant, MEMFLAGS flags) {
+#ifdef ASSERT
+  void* p = os::malloc(size, flags|otArena, CALLER_PC);
+  if (PrintMallocFree) trace_heap_malloc(size, "Arena-new", p);
+  return p;
+#else
+  return os::malloc(size, flags|otArena, CALLER_PC);
+#endif
+}
+
+void Arena::operator delete(void* p) {
+  FreeHeap(p);
 }
 
 // Destroy this arenas contents and reset to empty
@@ -421,6 +457,14 @@
   reset();
 }
 
+// This is high traffic method, but many calls actually don't
+// change the size
+void Arena::set_size_in_bytes(size_t size) {
+  if (_size_in_bytes != size) {
+    _size_in_bytes = size;
+    MemTracker::record_arena_size((address)this, size);
+  }
+}
 
 // Total of all Chunks in arena
 size_t Arena::used() const {
@@ -448,7 +492,6 @@
   if (_chunk == NULL) {
     signal_out_of_memory(len * Chunk::aligned_overhead_size(), "Arena::grow");
   }
-
   if (k) k->set_next(_chunk);   // Append new chunk to end of linked list
   else _first = _chunk;
   _hwm  = _chunk->bottom();     // Save the cached hwm, max
@@ -538,7 +581,7 @@
   assert(UseMallocOnly, "shouldn't call");
   // use malloc, but save pointer in res. area for later freeing
   char** save = (char**)internal_malloc_4(sizeof(char*));
-  return (*save = (char*)os::malloc(size));
+  return (*save = (char*)os::malloc(size, mtChunk));
 }
 
 // for debugging with UseMallocOnly
--- a/src/share/vm/memory/allocation.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/allocation.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,18 @@
 #define ARENA_ALIGN_MASK (~((size_t)ARENA_ALIGN_M1))
 #define ARENA_ALIGN(x) ((((size_t)(x)) + ARENA_ALIGN_M1) & ARENA_ALIGN_MASK)
 
+
+// noinline attribute
+#ifdef _WINDOWS
+  #define _NOINLINE_  __declspec(noinline)
+#else
+  #if __GNUC__ < 3    // gcc 2.x does not support noinline attribute
+    #define _NOINLINE_
+  #else
+    #define _NOINLINE_ __attribute__ ((noinline))
+  #endif
+#endif
+
 // All classes in the virtual machine must be subclassed
 // by one of the following allocation classes:
 //
@@ -98,12 +110,72 @@
 };
 #endif
 
-class CHeapObj ALLOCATION_SUPER_CLASS_SPEC {
+
+/*
+ * MemoryType bitmap layout:
+ * | 16 15 14 13 12 11 10 09 | 08 07 06 05 | 04 03 02 01 |
+ * |      memory type        |   object    | reserved    |
+ * |                         |     type    |             |
+ */
+enum MemoryType {
+  // Memory type by sub systems. It occupies lower byte.
+  mtNone              = 0x0000,  // undefined
+  mtClass             = 0x0100,  // memory class for Java classes
+  mtThread            = 0x0200,  // memory for thread objects
+  mtThreadStack       = 0x0300,
+  mtCode              = 0x0400,  // memory for generated code
+  mtGC                = 0x0500,  // memory for GC
+  mtCompiler          = 0x0600,  // memory for compiler
+  mtInternal          = 0x0700,  // memory used by VM, but does not belong to
+                                 // any of above categories, and not used for
+                                 // native memory tracking
+  mtOther             = 0x0800,  // memory not used by VM
+  mtSymbol            = 0x0900,  // symbol
+  mtNMT               = 0x0A00,  // memory used by native memory tracking
+  mtChunk             = 0x0B00,  // chunk that holds content of arenas
+  mtJavaHeap          = 0x0C00,  // Java heap
+  mtDontTrack         = 0x0D00,  // memory we donot or cannot track
+  mt_number_of_types  = 0x000C,  // number of memory types
+  mt_masks            = 0x7F00,
+
+  // object type mask
+  otArena             = 0x0010, // an arena object
+  otNMTRecorder       = 0x0020, // memory recorder object
+  ot_masks            = 0x00F0
+};
+
+#define IS_MEMORY_TYPE(flags, type) ((flags & mt_masks) == type)
+#define HAS_VALID_MEMORY_TYPE(flags)((flags & mt_masks) != mtNone)
+#define FLAGS_TO_MEMORY_TYPE(flags) (flags & mt_masks)
+
+#define IS_ARENA_OBJ(flags)         ((flags & ot_masks) == otArena)
+#define IS_NMT_RECORDER(flags)      ((flags & ot_masks) == otNMTRecorder)
+#define NMT_CAN_TRACK(flags)        (!IS_NMT_RECORDER(flags) && !(IS_MEMORY_TYPE(flags, mtDontTrack)))
+
+typedef unsigned short MEMFLAGS;
+
+extern bool NMT_track_callsite;
+
+// debug build does not inline
+#if defined(_DEBUG_)
+  #define CURRENT_PC       (NMT_track_callsite ? os::get_caller_pc(1) : 0)
+  #define CALLER_PC        (NMT_track_callsite ? os::get_caller_pc(2) : 0)
+  #define CALLER_CALLER_PC (NMT_track_callsite ? os::get_caller_pc(3) : 0)
+#else
+  #define CURRENT_PC      (NMT_track_callsite? os::get_caller_pc(0) : 0)
+  #define CALLER_PC       (NMT_track_callsite ? os::get_caller_pc(1) : 0)
+  #define CALLER_CALLER_PC (NMT_track_callsite ? os::get_caller_pc(2) : 0)
+#endif
+
+
+
+template <MEMFLAGS F> class CHeapObj ALLOCATION_SUPER_CLASS_SPEC {
  public:
-  void* operator new(size_t size);
-  void* operator new (size_t size, const std::nothrow_t&  nothrow_constant);
+  _NOINLINE_ void* operator new(size_t size, address caller_pc = 0);
+  _NOINLINE_ void* operator new (size_t size, const std::nothrow_t&  nothrow_constant,
+                               address caller_pc = 0);
+
   void  operator delete(void* p);
-  void* new_array(size_t size);
 };
 
 // Base class for objects allocated on the stack only.
@@ -150,7 +222,7 @@
 
 //------------------------------Chunk------------------------------------------
 // Linked list of raw memory chunks
-class Chunk: public CHeapObj {
+class Chunk: CHeapObj<mtChunk> {
   friend class VMStructs;
 
  protected:
@@ -197,7 +269,7 @@
 
 //------------------------------Arena------------------------------------------
 // Fast allocation of memory
-class Arena: public CHeapObj {
+class Arena : public CHeapObj<mtNone|otArena> {
 protected:
   friend class ResourceMark;
   friend class HandleMark;
@@ -208,7 +280,8 @@
   Chunk *_chunk;                // current chunk
   char *_hwm, *_max;            // High water mark and max in current chunk
   void* grow(size_t x);         // Get a new Chunk of at least size x
-  NOT_PRODUCT(size_t _size_in_bytes;) // Size of arena (used for memory usage tracing)
+  size_t _size_in_bytes;        // Size of arena (used for native memory tracking)
+
   NOT_PRODUCT(static julong _bytes_allocated;) // total #bytes allocated since start
   friend class AllocStats;
   debug_only(void* malloc(size_t size);)
@@ -231,6 +304,15 @@
   void  destruct_contents();
   char* hwm() const             { return _hwm; }
 
+  // new operators
+  void* operator new (size_t size);
+  void* operator new (size_t size, const std::nothrow_t& nothrow_constant);
+
+  // dynamic memory type tagging
+  void* operator new(size_t size, MEMFLAGS flags);
+  void* operator new(size_t size, const std::nothrow_t& nothrow_constant, MEMFLAGS flags);
+  void  operator delete(void* p);
+
   // Fast allocate in the arena.  Common case is: pointer test + increment.
   void* Amalloc(size_t x) {
     assert(is_power_of_2(ARENA_AMALLOC_ALIGNMENT) , "should be a power of 2");
@@ -306,16 +388,20 @@
   size_t used() const;
 
   // Total # of bytes used
-  size_t size_in_bytes() const         NOT_PRODUCT({  return _size_in_bytes; }) PRODUCT_RETURN0;
-  void set_size_in_bytes(size_t size)  NOT_PRODUCT({ _size_in_bytes = size;  }) PRODUCT_RETURN;
+  size_t size_in_bytes() const         {  return _size_in_bytes; };
+  void set_size_in_bytes(size_t size);
+
   static void free_malloced_objects(Chunk* chunk, char* hwm, char* max, char* hwm2)  PRODUCT_RETURN;
   static void free_all(char** start, char** end)                                     PRODUCT_RETURN;
 
+  // how many arena instances
+  NOT_PRODUCT(static volatile jint _instance_count;)
 private:
   // Reset this Arena to empty, access will trigger grow if necessary
   void   reset(void) {
     _first = _chunk = NULL;
     _hwm = _max = NULL;
+    set_size_in_bytes(0);
   }
 };
 
@@ -373,7 +459,7 @@
 #endif // ASSERT
 
  public:
-  void* operator new(size_t size, allocation_type type);
+  void* operator new(size_t size, allocation_type type, MEMFLAGS flags);
   void* operator new(size_t size, Arena *arena) {
       address res = (address)arena->Amalloc(size);
       DEBUG_ONLY(set_allocation_type(res, ARENA);)
@@ -409,17 +495,28 @@
 #define NEW_RESOURCE_OBJ(type)\
   NEW_RESOURCE_ARRAY(type, 1)
 
-#define NEW_C_HEAP_ARRAY(type, size)\
-  (type*) (AllocateHeap((size) * sizeof(type), XSTR(type) " in " __FILE__))
+#define NEW_C_HEAP_ARRAY(type, size, memflags)\
+  (type*) (AllocateHeap((size) * sizeof(type), memflags))
 
-#define REALLOC_C_HEAP_ARRAY(type, old, size)\
-  (type*) (ReallocateHeap((char*)old, (size) * sizeof(type), XSTR(type) " in " __FILE__))
+#define REALLOC_C_HEAP_ARRAY(type, old, size, memflags)\
+  (type*) (ReallocateHeap((char*)old, (size) * sizeof(type), memflags))
+
+#define FREE_C_HEAP_ARRAY(type,old,memflags) \
+  FreeHeap((char*)(old), memflags)
 
-#define FREE_C_HEAP_ARRAY(type,old) \
-  FreeHeap((char*)(old))
+#define NEW_C_HEAP_OBJ(type, memflags)\
+  NEW_C_HEAP_ARRAY(type, 1, memflags)
+
+
+#define NEW_C_HEAP_ARRAY2(type, size, memflags, pc)\
+  (type*) (AllocateHeap((size) * sizeof(type), memflags, pc))
 
-#define NEW_C_HEAP_OBJ(type)\
-  NEW_C_HEAP_ARRAY(type, 1)
+#define REALLOC_C_HEAP_ARRAY2(type, old, size, memflags, pc)\
+  (type*) (ReallocateHeap((char*)old, (size) * sizeof(type), memflags, pc))
+
+#define NEW_C_HEAP_OBJ2(type, memflags, pc)\
+  NEW_C_HEAP_ARRAY2(type, 1, memflags, pc)
+
 
 extern bool warn_new_operator;
 
--- a/src/share/vm/memory/allocation.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/allocation.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -48,33 +48,60 @@
 #endif
 
 // allocate using malloc; will fail if no memory available
-inline char* AllocateHeap(size_t size, const char* name = NULL) {
-  char* p = (char*) os::malloc(size);
+inline char* AllocateHeap(size_t size, MEMFLAGS flags, address pc = 0) {
+  if (pc == 0) {
+    pc = CURRENT_PC;
+  }
+  char* p = (char*) os::malloc(size, flags, pc);
   #ifdef ASSERT
-  if (PrintMallocFree) trace_heap_malloc(size, name, p);
-  #else
-  Unused_Variable(name);
+  if (PrintMallocFree) trace_heap_malloc(size, "AllocateHeap", p);
   #endif
-  if (p == NULL) vm_exit_out_of_memory(size, name);
+  if (p == NULL) vm_exit_out_of_memory(size, "AllocateHeap");
+  return p;
+}
+
+inline char* ReallocateHeap(char *old, size_t size, MEMFLAGS flags) {
+  char* p = (char*) os::realloc(old, size, flags, CURRENT_PC);
+  #ifdef ASSERT
+  if (PrintMallocFree) trace_heap_malloc(size, "ReallocateHeap", p);
+  #endif
+  if (p == NULL) vm_exit_out_of_memory(size, "ReallocateHeap");
   return p;
 }
 
-inline char* ReallocateHeap(char *old, size_t size, const char* name = NULL) {
-  char* p = (char*) os::realloc(old,size);
-  #ifdef ASSERT
-  if (PrintMallocFree) trace_heap_malloc(size, name, p);
-  #else
-  Unused_Variable(name);
-  #endif
-  if (p == NULL) vm_exit_out_of_memory(size, name);
-  return p;
-}
-
-inline void FreeHeap(void* p) {
+inline void FreeHeap(void* p, MEMFLAGS memflags = mtInternal) {
   #ifdef ASSERT
   if (PrintMallocFree) trace_heap_free(p);
   #endif
-  os::free(p);
+  os::free(p, memflags);
 }
 
+
+template <MEMFLAGS F> void* CHeapObj<F>::operator new(size_t size,
+      address caller_pc){
+#ifdef ASSERT
+    void* p = (void*)AllocateHeap(size, F, (caller_pc != 0 ? caller_pc : CALLER_PC));
+    if (PrintMallocFree) trace_heap_malloc(size, "CHeapObj-new", p);
+    return p;
+#else
+    return (void *) AllocateHeap(size, F, (caller_pc != 0 ? caller_pc : CALLER_PC));
+#endif
+  }
+
+template <MEMFLAGS F> void* CHeapObj<F>::operator new (size_t size,
+  const std::nothrow_t&  nothrow_constant, address caller_pc) {
+#ifdef ASSERT
+    void* p = os::malloc(size, F, (caller_pc != 0 ? caller_pc : CALLER_PC));
+    if (PrintMallocFree) trace_heap_malloc(size, "CHeapObj-new", p);
+    return p;
+#else
+    return os::malloc(size, F, (caller_pc != 0 ? caller_pc : CALLER_PC));
+#endif
+}
+
+template <MEMFLAGS F> void CHeapObj<F>::operator delete(void* p){
+   FreeHeap(p, F);
+}
+
+
 #endif // SHARE_VM_MEMORY_ALLOCATION_INLINE_HPP
--- a/src/share/vm/memory/barrierSet.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/barrierSet.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,7 +31,7 @@
 // This class provides the interface between a barrier implementation and
 // the rest of the system.
 
-class BarrierSet: public CHeapObj {
+class BarrierSet: public CHeapObj<mtGC> {
   friend class VMStructs;
 public:
   enum Name {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/binaryTreeDictionary.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/allocationStats.hpp"
+#include "memory/binaryTreeDictionary.hpp"
+#include "runtime/globals.hpp"
+#include "utilities/ostream.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/shared/spaceDecorator.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#endif // SERIALGC
+
+////////////////////////////////////////////////////////////////////////////////
+// A binary tree based search structure for free blocks.
+// This is currently used in the Concurrent Mark&Sweep implementation.
+////////////////////////////////////////////////////////////////////////////////
+
+template <class Chunk>
+TreeChunk<Chunk>* TreeChunk<Chunk>::as_TreeChunk(Chunk* fc) {
+  // Do some assertion checking here.
+  return (TreeChunk<Chunk>*) fc;
+}
+
+template <class Chunk>
+void TreeChunk<Chunk>::verify_tree_chunk_list() const {
+  TreeChunk<Chunk>* nextTC = (TreeChunk<Chunk>*)next();
+  if (prev() != NULL) { // interior list node shouldn'r have tree fields
+    guarantee(embedded_list()->parent() == NULL && embedded_list()->left() == NULL &&
+              embedded_list()->right()  == NULL, "should be clear");
+  }
+  if (nextTC != NULL) {
+    guarantee(as_TreeChunk(nextTC->prev()) == this, "broken chain");
+    guarantee(nextTC->size() == size(), "wrong size");
+    nextTC->verify_tree_chunk_list();
+  }
+}
+
+
+template <class Chunk>
+TreeList<Chunk>* TreeList<Chunk>::as_TreeList(TreeChunk<Chunk>* tc) {
+  // This first free chunk in the list will be the tree list.
+  assert(tc->size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
+  TreeList<Chunk>* tl = tc->embedded_list();
+  tc->set_list(tl);
+#ifdef ASSERT
+  tl->set_protecting_lock(NULL);
+#endif
+  tl->set_hint(0);
+  tl->set_size(tc->size());
+  tl->link_head(tc);
+  tl->link_tail(tc);
+  tl->set_count(1);
+  tl->init_statistics(true /* split_birth */);
+  tl->set_parent(NULL);
+  tl->set_left(NULL);
+  tl->set_right(NULL);
+  return tl;
+}
+
+template <class Chunk>
+TreeList<Chunk>* TreeList<Chunk>::as_TreeList(HeapWord* addr, size_t size) {
+  TreeChunk<Chunk>* tc = (TreeChunk<Chunk>*) addr;
+  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
+  // The space in the heap will have been mangled initially but
+  // is not remangled when a free chunk is returned to the free list
+  // (since it is used to maintain the chunk on the free list).
+  assert((ZapUnusedHeapArea &&
+          SpaceMangler::is_mangled((HeapWord*) tc->size_addr()) &&
+          SpaceMangler::is_mangled((HeapWord*) tc->prev_addr()) &&
+          SpaceMangler::is_mangled((HeapWord*) tc->next_addr())) ||
+          (tc->size() == 0 && tc->prev() == NULL && tc->next() == NULL),
+    "Space should be clear or mangled");
+  tc->set_size(size);
+  tc->link_prev(NULL);
+  tc->link_next(NULL);
+  TreeList<Chunk>* tl = TreeList<Chunk>::as_TreeList(tc);
+  return tl;
+}
+
+template <class Chunk>
+TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc) {
+
+  TreeList<Chunk>* retTL = this;
+  Chunk* list = head();
+  assert(!list || list != list->next(), "Chunk on list twice");
+  assert(tc != NULL, "Chunk being removed is NULL");
+  assert(parent() == NULL || this == parent()->left() ||
+    this == parent()->right(), "list is inconsistent");
+  assert(tc->is_free(), "Header is not marked correctly");
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+
+  Chunk* prevFC = tc->prev();
+  TreeChunk<Chunk>* nextTC = TreeChunk<Chunk>::as_TreeChunk(tc->next());
+  assert(list != NULL, "should have at least the target chunk");
+
+  // Is this the first item on the list?
+  if (tc == list) {
+    // The "getChunk..." functions for a TreeList<Chunk> will not return the
+    // first chunk in the list unless it is the last chunk in the list
+    // because the first chunk is also acting as the tree node.
+    // When coalescing happens, however, the first chunk in the a tree
+    // list can be the start of a free range.  Free ranges are removed
+    // from the free lists so that they are not available to be
+    // allocated when the sweeper yields (giving up the free list lock)
+    // to allow mutator activity.  If this chunk is the first in the
+    // list and is not the last in the list, do the work to copy the
+    // TreeList<Chunk> from the first chunk to the next chunk and update all
+    // the TreeList<Chunk> pointers in the chunks in the list.
+    if (nextTC == NULL) {
+      assert(prevFC == NULL, "Not last chunk in the list");
+      set_tail(NULL);
+      set_head(NULL);
+    } else {
+      // copy embedded list.
+      nextTC->set_embedded_list(tc->embedded_list());
+      retTL = nextTC->embedded_list();
+      // Fix the pointer to the list in each chunk in the list.
+      // This can be slow for a long list.  Consider having
+      // an option that does not allow the first chunk on the
+      // list to be coalesced.
+      for (TreeChunk<Chunk>* curTC = nextTC; curTC != NULL;
+          curTC = TreeChunk<Chunk>::as_TreeChunk(curTC->next())) {
+        curTC->set_list(retTL);
+      }
+      // Fix the parent to point to the new TreeList<Chunk>.
+      if (retTL->parent() != NULL) {
+        if (this == retTL->parent()->left()) {
+          retTL->parent()->set_left(retTL);
+        } else {
+          assert(this == retTL->parent()->right(), "Parent is incorrect");
+          retTL->parent()->set_right(retTL);
+        }
+      }
+      // Fix the children's parent pointers to point to the
+      // new list.
+      assert(right() == retTL->right(), "Should have been copied");
+      if (retTL->right() != NULL) {
+        retTL->right()->set_parent(retTL);
+      }
+      assert(left() == retTL->left(), "Should have been copied");
+      if (retTL->left() != NULL) {
+        retTL->left()->set_parent(retTL);
+      }
+      retTL->link_head(nextTC);
+      assert(nextTC->is_free(), "Should be a free chunk");
+    }
+  } else {
+    if (nextTC == NULL) {
+      // Removing chunk at tail of list
+      link_tail(prevFC);
+    }
+    // Chunk is interior to the list
+    prevFC->link_after(nextTC);
+  }
+
+  // Below this point the embeded TreeList<Chunk> being used for the
+  // tree node may have changed. Don't use "this"
+  // TreeList<Chunk>*.
+  // chunk should still be a free chunk (bit set in _prev)
+  assert(!retTL->head() || retTL->size() == retTL->head()->size(),
+    "Wrong sized chunk in list");
+  debug_only(
+    tc->link_prev(NULL);
+    tc->link_next(NULL);
+    tc->set_list(NULL);
+    bool prev_found = false;
+    bool next_found = false;
+    for (Chunk* curFC = retTL->head();
+         curFC != NULL; curFC = curFC->next()) {
+      assert(curFC != tc, "Chunk is still in list");
+      if (curFC == prevFC) {
+        prev_found = true;
+      }
+      if (curFC == nextTC) {
+        next_found = true;
+      }
+    }
+    assert(prevFC == NULL || prev_found, "Chunk was lost from list");
+    assert(nextTC == NULL || next_found, "Chunk was lost from list");
+    assert(retTL->parent() == NULL ||
+           retTL == retTL->parent()->left() ||
+           retTL == retTL->parent()->right(),
+           "list is inconsistent");
+  )
+  retTL->decrement_count();
+
+  assert(tc->is_free(), "Should still be a free chunk");
+  assert(retTL->head() == NULL || retTL->head()->prev() == NULL,
+    "list invariant");
+  assert(retTL->tail() == NULL || retTL->tail()->next() == NULL,
+    "list invariant");
+  return retTL;
+}
+
+template <class Chunk>
+void TreeList<Chunk>::return_chunk_at_tail(TreeChunk<Chunk>* chunk) {
+  assert(chunk != NULL, "returning NULL chunk");
+  assert(chunk->list() == this, "list should be set for chunk");
+  assert(tail() != NULL, "The tree list is embedded in the first chunk");
+  // which means that the list can never be empty.
+  assert(!verify_chunk_in_free_list(chunk), "Double entry");
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+
+  Chunk* fc = tail();
+  fc->link_after(chunk);
+  link_tail(chunk);
+
+  assert(!tail() || size() == tail()->size(), "Wrong sized chunk in list");
+  increment_count();
+  debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+}
+
+// Add this chunk at the head of the list.  "At the head of the list"
+// is defined to be after the chunk pointer to by head().  This is
+// because the TreeList<Chunk> is embedded in the first TreeChunk<Chunk> in the
+// list.  See the definition of TreeChunk<Chunk>.
+template <class Chunk>
+void TreeList<Chunk>::return_chunk_at_head(TreeChunk<Chunk>* chunk) {
+  assert(chunk->list() == this, "list should be set for chunk");
+  assert(head() != NULL, "The tree list is embedded in the first chunk");
+  assert(chunk != NULL, "returning NULL chunk");
+  assert(!verify_chunk_in_free_list(chunk), "Double entry");
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+
+  Chunk* fc = head()->next();
+  if (fc != NULL) {
+    chunk->link_after(fc);
+  } else {
+    assert(tail() == NULL, "List is inconsistent");
+    link_tail(chunk);
+  }
+  head()->link_after(chunk);
+  assert(!head() || size() == head()->size(), "Wrong sized chunk in list");
+  increment_count();
+  debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+}
+
+template <class Chunk>
+TreeChunk<Chunk>* TreeList<Chunk>::head_as_TreeChunk() {
+  assert(head() == NULL || TreeChunk<Chunk>::as_TreeChunk(head())->list() == this,
+    "Wrong type of chunk?");
+  return TreeChunk<Chunk>::as_TreeChunk(head());
+}
+
+template <class Chunk>
+TreeChunk<Chunk>* TreeList<Chunk>::first_available() {
+  assert(head() != NULL, "The head of the list cannot be NULL");
+  Chunk* fc = head()->next();
+  TreeChunk<Chunk>* retTC;
+  if (fc == NULL) {
+    retTC = head_as_TreeChunk();
+  } else {
+    retTC = TreeChunk<Chunk>::as_TreeChunk(fc);
+  }
+  assert(retTC->list() == this, "Wrong type of chunk.");
+  return retTC;
+}
+
+// Returns the block with the largest heap address amongst
+// those in the list for this size; potentially slow and expensive,
+// use with caution!
+template <class Chunk>
+TreeChunk<Chunk>* TreeList<Chunk>::largest_address() {
+  assert(head() != NULL, "The head of the list cannot be NULL");
+  Chunk* fc = head()->next();
+  TreeChunk<Chunk>* retTC;
+  if (fc == NULL) {
+    retTC = head_as_TreeChunk();
+  } else {
+    // walk down the list and return the one with the highest
+    // heap address among chunks of this size.
+    Chunk* last = fc;
+    while (fc->next() != NULL) {
+      if ((HeapWord*)last < (HeapWord*)fc) {
+        last = fc;
+      }
+      fc = fc->next();
+    }
+    retTC = TreeChunk<Chunk>::as_TreeChunk(last);
+  }
+  assert(retTC->list() == this, "Wrong type of chunk.");
+  return retTC;
+}
+
+template <class Chunk>
+BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(bool adaptive_freelists, bool splay) :
+  _splay(splay), _adaptive_freelists(adaptive_freelists),
+  _total_size(0), _total_free_blocks(0), _root(0) {}
+
+template <class Chunk>
+BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(MemRegion mr,
+                                           bool adaptive_freelists,
+                                           bool splay):
+  _adaptive_freelists(adaptive_freelists), _splay(splay)
+{
+  assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+
+  reset(mr);
+  assert(root()->left() == NULL, "reset check failed");
+  assert(root()->right() == NULL, "reset check failed");
+  assert(root()->head()->next() == NULL, "reset check failed");
+  assert(root()->head()->prev() == NULL, "reset check failed");
+  assert(total_size() == root()->size(), "reset check failed");
+  assert(total_free_blocks() == 1, "reset check failed");
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::inc_total_size(size_t inc) {
+  _total_size = _total_size + inc;
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::dec_total_size(size_t dec) {
+  _total_size = _total_size - dec;
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::reset(MemRegion mr) {
+  assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+  set_root(TreeList<Chunk>::as_TreeList(mr.start(), mr.word_size()));
+  set_total_size(mr.word_size());
+  set_total_free_blocks(1);
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::reset(HeapWord* addr, size_t byte_size) {
+  MemRegion mr(addr, heap_word_size(byte_size));
+  reset(mr);
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::reset() {
+  set_root(NULL);
+  set_total_size(0);
+  set_total_free_blocks(0);
+}
+
+// Get a free block of size at least size from tree, or NULL.
+// If a splay step is requested, the removal algorithm (only) incorporates
+// a splay step as follows:
+// . the search proceeds down the tree looking for a possible
+//   match. At the (closest) matching location, an appropriate splay step is applied
+//   (zig, zig-zig or zig-zag). A chunk of the appropriate size is then returned
+//   if available, and if it's the last chunk, the node is deleted. A deteleted
+//   node is replaced in place by its tree successor.
+template <class Chunk>
+TreeChunk<Chunk>*
+BinaryTreeDictionary<Chunk>::get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay)
+{
+  TreeList<Chunk> *curTL, *prevTL;
+  TreeChunk<Chunk>* retTC = NULL;
+  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+  if (FLSVerifyDictionary) {
+    verify_tree();
+  }
+  // starting at the root, work downwards trying to find match.
+  // Remember the last node of size too great or too small.
+  for (prevTL = curTL = root(); curTL != NULL;) {
+    if (curTL->size() == size) {        // exact match
+      break;
+    }
+    prevTL = curTL;
+    if (curTL->size() < size) {        // proceed to right sub-tree
+      curTL = curTL->right();
+    } else {                           // proceed to left sub-tree
+      assert(curTL->size() > size, "size inconsistency");
+      curTL = curTL->left();
+    }
+  }
+  if (curTL == NULL) { // couldn't find exact match
+
+    if (dither == FreeBlockDictionary<Chunk>::exactly) return NULL;
+
+    // try and find the next larger size by walking back up the search path
+    for (curTL = prevTL; curTL != NULL;) {
+      if (curTL->size() >= size) break;
+      else curTL = curTL->parent();
+    }
+    assert(curTL == NULL || curTL->count() > 0,
+      "An empty list should not be in the tree");
+  }
+  if (curTL != NULL) {
+    assert(curTL->size() >= size, "size inconsistency");
+    if (adaptive_freelists()) {
+
+      // A candidate chunk has been found.  If it is already under
+      // populated, get a chunk associated with the hint for this
+      // chunk.
+      if (curTL->surplus() <= 0) {
+        /* Use the hint to find a size with a surplus, and reset the hint. */
+        TreeList<Chunk>* hintTL = curTL;
+        while (hintTL->hint() != 0) {
+          assert(hintTL->hint() == 0 || hintTL->hint() > hintTL->size(),
+            "hint points in the wrong direction");
+          hintTL = find_list(hintTL->hint());
+          assert(curTL != hintTL, "Infinite loop");
+          if (hintTL == NULL ||
+              hintTL == curTL /* Should not happen but protect against it */ ) {
+            // No useful hint.  Set the hint to NULL and go on.
+            curTL->set_hint(0);
+            break;
+          }
+          assert(hintTL->size() > size, "hint is inconsistent");
+          if (hintTL->surplus() > 0) {
+            // The hint led to a list that has a surplus.  Use it.
+            // Set the hint for the candidate to an overpopulated
+            // size.
+            curTL->set_hint(hintTL->size());
+            // Change the candidate.
+            curTL = hintTL;
+            break;
+          }
+          // The evm code reset the hint of the candidate as
+          // at an interim point.  Why?  Seems like this leaves
+          // the hint pointing to a list that didn't work.
+          // curTL->set_hint(hintTL->size());
+        }
+      }
+    }
+    // don't waste time splaying if chunk's singleton
+    if (splay && curTL->head()->next() != NULL) {
+      semi_splay_step(curTL);
+    }
+    retTC = curTL->first_available();
+    assert((retTC != NULL) && (curTL->count() > 0),
+      "A list in the binary tree should not be NULL");
+    assert(retTC->size() >= size,
+      "A chunk of the wrong size was found");
+    remove_chunk_from_tree(retTC);
+    assert(retTC->is_free(), "Header is not marked correctly");
+  }
+
+  if (FLSVerifyDictionary) {
+    verify();
+  }
+  return retTC;
+}
+
+template <class Chunk>
+TreeList<Chunk>* BinaryTreeDictionary<Chunk>::find_list(size_t size) const {
+  TreeList<Chunk>* curTL;
+  for (curTL = root(); curTL != NULL;) {
+    if (curTL->size() == size) {        // exact match
+      break;
+    }
+
+    if (curTL->size() < size) {        // proceed to right sub-tree
+      curTL = curTL->right();
+    } else {                           // proceed to left sub-tree
+      assert(curTL->size() > size, "size inconsistency");
+      curTL = curTL->left();
+    }
+  }
+  return curTL;
+}
+
+
+template <class Chunk>
+bool BinaryTreeDictionary<Chunk>::verify_chunk_in_free_list(Chunk* tc) const {
+  size_t size = tc->size();
+  TreeList<Chunk>* tl = find_list(size);
+  if (tl == NULL) {
+    return false;
+  } else {
+    return tl->verify_chunk_in_free_list(tc);
+  }
+}
+
+template <class Chunk>
+Chunk* BinaryTreeDictionary<Chunk>::find_largest_dict() const {
+  TreeList<Chunk> *curTL = root();
+  if (curTL != NULL) {
+    while(curTL->right() != NULL) curTL = curTL->right();
+    return curTL->largest_address();
+  } else {
+    return NULL;
+  }
+}
+
+// Remove the current chunk from the tree.  If it is not the last
+// chunk in a list on a tree node, just unlink it.
+// If it is the last chunk in the list (the next link is NULL),
+// remove the node and repair the tree.
+template <class Chunk>
+TreeChunk<Chunk>*
+BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
+  assert(tc != NULL, "Should not call with a NULL chunk");
+  assert(tc->is_free(), "Header is not marked correctly");
+
+  TreeList<Chunk> *newTL, *parentTL;
+  TreeChunk<Chunk>* retTC;
+  TreeList<Chunk>* tl = tc->list();
+  debug_only(
+    bool removing_only_chunk = false;
+    if (tl == _root) {
+      if ((_root->left() == NULL) && (_root->right() == NULL)) {
+        if (_root->count() == 1) {
+          assert(_root->head() == tc, "Should only be this one chunk");
+          removing_only_chunk = true;
+        }
+      }
+    }
+  )
+  assert(tl != NULL, "List should be set");
+  assert(tl->parent() == NULL || tl == tl->parent()->left() ||
+         tl == tl->parent()->right(), "list is inconsistent");
+
+  bool complicated_splice = false;
+
+  retTC = tc;
+  // Removing this chunk can have the side effect of changing the node
+  // (TreeList<Chunk>*) in the tree.  If the node is the root, update it.
+  TreeList<Chunk>* replacementTL = tl->remove_chunk_replace_if_needed(tc);
+  assert(tc->is_free(), "Chunk should still be free");
+  assert(replacementTL->parent() == NULL ||
+         replacementTL == replacementTL->parent()->left() ||
+         replacementTL == replacementTL->parent()->right(),
+         "list is inconsistent");
+  if (tl == root()) {
+    assert(replacementTL->parent() == NULL, "Incorrectly replacing root");
+    set_root(replacementTL);
+  }
+  debug_only(
+    if (tl != replacementTL) {
+      assert(replacementTL->head() != NULL,
+        "If the tree list was replaced, it should not be a NULL list");
+      TreeList<Chunk>* rhl = replacementTL->head_as_TreeChunk()->list();
+      TreeList<Chunk>* rtl = TreeChunk<Chunk>::as_TreeChunk(replacementTL->tail())->list();
+      assert(rhl == replacementTL, "Broken head");
+      assert(rtl == replacementTL, "Broken tail");
+      assert(replacementTL->size() == tc->size(),  "Broken size");
+    }
+  )
+
+  // Does the tree need to be repaired?
+  if (replacementTL->count() == 0) {
+    assert(replacementTL->head() == NULL &&
+           replacementTL->tail() == NULL, "list count is incorrect");
+    // Find the replacement node for the (soon to be empty) node being removed.
+    // if we have a single (or no) child, splice child in our stead
+    if (replacementTL->left() == NULL) {
+      // left is NULL so pick right.  right may also be NULL.
+      newTL = replacementTL->right();
+      debug_only(replacementTL->clear_right();)
+    } else if (replacementTL->right() == NULL) {
+      // right is NULL
+      newTL = replacementTL->left();
+      debug_only(replacementTL->clearLeft();)
+    } else {  // we have both children, so, by patriarchal convention,
+              // my replacement is least node in right sub-tree
+      complicated_splice = true;
+      newTL = remove_tree_minimum(replacementTL->right());
+      assert(newTL != NULL && newTL->left() == NULL &&
+             newTL->right() == NULL, "sub-tree minimum exists");
+    }
+    // newTL is the replacement for the (soon to be empty) node.
+    // newTL may be NULL.
+    // should verify; we just cleanly excised our replacement
+    if (FLSVerifyDictionary) {
+      verify_tree();
+    }
+    // first make newTL my parent's child
+    if ((parentTL = replacementTL->parent()) == NULL) {
+      // newTL should be root
+      assert(tl == root(), "Incorrectly replacing root");
+      set_root(newTL);
+      if (newTL != NULL) {
+        newTL->clear_parent();
+      }
+    } else if (parentTL->right() == replacementTL) {
+      // replacementTL is a right child
+      parentTL->set_right(newTL);
+    } else {                                // replacementTL is a left child
+      assert(parentTL->left() == replacementTL, "should be left child");
+      parentTL->set_left(newTL);
+    }
+    debug_only(replacementTL->clear_parent();)
+    if (complicated_splice) {  // we need newTL to get replacementTL's
+                              // two children
+      assert(newTL != NULL &&
+             newTL->left() == NULL && newTL->right() == NULL,
+            "newTL should not have encumbrances from the past");
+      // we'd like to assert as below:
+      // assert(replacementTL->left() != NULL && replacementTL->right() != NULL,
+      //       "else !complicated_splice");
+      // ... however, the above assertion is too strong because we aren't
+      // guaranteed that replacementTL->right() is still NULL.
+      // Recall that we removed
+      // the right sub-tree minimum from replacementTL.
+      // That may well have been its right
+      // child! So we'll just assert half of the above:
+      assert(replacementTL->left() != NULL, "else !complicated_splice");
+      newTL->set_left(replacementTL->left());
+      newTL->set_right(replacementTL->right());
+      debug_only(
+        replacementTL->clear_right();
+        replacementTL->clearLeft();
+      )
+    }
+    assert(replacementTL->right() == NULL &&
+           replacementTL->left() == NULL &&
+           replacementTL->parent() == NULL,
+        "delete without encumbrances");
+  }
+
+  assert(total_size() >= retTC->size(), "Incorrect total size");
+  dec_total_size(retTC->size());     // size book-keeping
+  assert(total_free_blocks() > 0, "Incorrect total count");
+  set_total_free_blocks(total_free_blocks() - 1);
+
+  assert(retTC != NULL, "null chunk?");
+  assert(retTC->prev() == NULL && retTC->next() == NULL,
+         "should return without encumbrances");
+  if (FLSVerifyDictionary) {
+    verify_tree();
+  }
+  assert(!removing_only_chunk || _root == NULL, "root should be NULL");
+  return TreeChunk<Chunk>::as_TreeChunk(retTC);
+}
+
+// Remove the leftmost node (lm) in the tree and return it.
+// If lm has a right child, link it to the left node of
+// the parent of lm.
+template <class Chunk>
+TreeList<Chunk>* BinaryTreeDictionary<Chunk>::remove_tree_minimum(TreeList<Chunk>* tl) {
+  assert(tl != NULL && tl->parent() != NULL, "really need a proper sub-tree");
+  // locate the subtree minimum by walking down left branches
+  TreeList<Chunk>* curTL = tl;
+  for (; curTL->left() != NULL; curTL = curTL->left());
+  // obviously curTL now has at most one child, a right child
+  if (curTL != root()) {  // Should this test just be removed?
+    TreeList<Chunk>* parentTL = curTL->parent();
+    if (parentTL->left() == curTL) { // curTL is a left child
+      parentTL->set_left(curTL->right());
+    } else {
+      // If the list tl has no left child, then curTL may be
+      // the right child of parentTL.
+      assert(parentTL->right() == curTL, "should be a right child");
+      parentTL->set_right(curTL->right());
+    }
+  } else {
+    // The only use of this method would not pass the root of the
+    // tree (as indicated by the assertion above that the tree list
+    // has a parent) but the specification does not explicitly exclude the
+    // passing of the root so accomodate it.
+    set_root(NULL);
+  }
+  debug_only(
+    curTL->clear_parent();  // Test if this needs to be cleared
+    curTL->clear_right();    // recall, above, left child is already null
+  )
+  // we just excised a (non-root) node, we should still verify all tree invariants
+  if (FLSVerifyDictionary) {
+    verify_tree();
+  }
+  return curTL;
+}
+
+// Based on a simplification of the algorithm by Sleator and Tarjan (JACM 1985).
+// The simplifications are the following:
+// . we splay only when we delete (not when we insert)
+// . we apply a single spay step per deletion/access
+// By doing such partial splaying, we reduce the amount of restructuring,
+// while getting a reasonably efficient search tree (we think).
+// [Measurements will be needed to (in)validate this expectation.]
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::semi_splay_step(TreeList<Chunk>* tc) {
+  // apply a semi-splay step at the given node:
+  // . if root, norting needs to be done
+  // . if child of root, splay once
+  // . else zig-zig or sig-zag depending on path from grandparent
+  if (root() == tc) return;
+  warning("*** Splaying not yet implemented; "
+          "tree operations may be inefficient ***");
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::insert_chunk_in_tree(Chunk* fc) {
+  TreeList<Chunk> *curTL, *prevTL;
+  size_t size = fc->size();
+
+  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "too small to be a TreeList<Chunk>");
+  if (FLSVerifyDictionary) {
+    verify_tree();
+  }
+
+  fc->clear_next();
+  fc->link_prev(NULL);
+
+  // work down from the _root, looking for insertion point
+  for (prevTL = curTL = root(); curTL != NULL;) {
+    if (curTL->size() == size)  // exact match
+      break;
+    prevTL = curTL;
+    if (curTL->size() > size) { // follow left branch
+      curTL = curTL->left();
+    } else {                    // follow right branch
+      assert(curTL->size() < size, "size inconsistency");
+      curTL = curTL->right();
+    }
+  }
+  TreeChunk<Chunk>* tc = TreeChunk<Chunk>::as_TreeChunk(fc);
+  // This chunk is being returned to the binary tree.  Its embedded
+  // TreeList<Chunk> should be unused at this point.
+  tc->initialize();
+  if (curTL != NULL) {          // exact match
+    tc->set_list(curTL);
+    curTL->return_chunk_at_tail(tc);
+  } else {                     // need a new node in tree
+    tc->clear_next();
+    tc->link_prev(NULL);
+    TreeList<Chunk>* newTL = TreeList<Chunk>::as_TreeList(tc);
+    assert(((TreeChunk<Chunk>*)tc)->list() == newTL,
+      "List was not initialized correctly");
+    if (prevTL == NULL) {      // we are the only tree node
+      assert(root() == NULL, "control point invariant");
+      set_root(newTL);
+    } else {                   // insert under prevTL ...
+      if (prevTL->size() < size) {   // am right child
+        assert(prevTL->right() == NULL, "control point invariant");
+        prevTL->set_right(newTL);
+      } else {                       // am left child
+        assert(prevTL->size() > size && prevTL->left() == NULL, "cpt pt inv");
+        prevTL->set_left(newTL);
+      }
+    }
+  }
+  assert(tc->list() != NULL, "Tree list should be set");
+
+  inc_total_size(size);
+  // Method 'total_size_in_tree' walks through the every block in the
+  // tree, so it can cause significant performance loss if there are
+  // many blocks in the tree
+  assert(!FLSVerifyDictionary || total_size_in_tree(root()) == total_size(), "_total_size inconsistency");
+  set_total_free_blocks(total_free_blocks() + 1);
+  if (FLSVerifyDictionary) {
+    verify_tree();
+  }
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::max_chunk_size() const {
+  FreeBlockDictionary<Chunk>::verify_par_locked();
+  TreeList<Chunk>* tc = root();
+  if (tc == NULL) return 0;
+  for (; tc->right() != NULL; tc = tc->right());
+  return tc->size();
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::total_list_length(TreeList<Chunk>* tl) const {
+  size_t res;
+  res = tl->count();
+#ifdef ASSERT
+  size_t cnt;
+  Chunk* tc = tl->head();
+  for (cnt = 0; tc != NULL; tc = tc->next(), cnt++);
+  assert(res == cnt, "The count is not being maintained correctly");
+#endif
+  return res;
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::total_size_in_tree(TreeList<Chunk>* tl) const {
+  if (tl == NULL)
+    return 0;
+  return (tl->size() * total_list_length(tl)) +
+         total_size_in_tree(tl->left())    +
+         total_size_in_tree(tl->right());
+}
+
+template <class Chunk>
+double BinaryTreeDictionary<Chunk>::sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const {
+  if (tl == NULL) {
+    return 0.0;
+  }
+  double size = (double)(tl->size());
+  double curr = size * size * total_list_length(tl);
+  curr += sum_of_squared_block_sizes(tl->left());
+  curr += sum_of_squared_block_sizes(tl->right());
+  return curr;
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::total_free_blocks_in_tree(TreeList<Chunk>* tl) const {
+  if (tl == NULL)
+    return 0;
+  return total_list_length(tl) +
+         total_free_blocks_in_tree(tl->left()) +
+         total_free_blocks_in_tree(tl->right());
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::num_free_blocks() const {
+  assert(total_free_blocks_in_tree(root()) == total_free_blocks(),
+         "_total_free_blocks inconsistency");
+  return total_free_blocks();
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::tree_height_helper(TreeList<Chunk>* tl) const {
+  if (tl == NULL)
+    return 0;
+  return 1 + MAX2(tree_height_helper(tl->left()),
+                  tree_height_helper(tl->right()));
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::treeHeight() const {
+  return tree_height_helper(root());
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::total_nodes_helper(TreeList<Chunk>* tl) const {
+  if (tl == NULL) {
+    return 0;
+  }
+  return 1 + total_nodes_helper(tl->left()) +
+    total_nodes_helper(tl->right());
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::total_nodes_in_tree(TreeList<Chunk>* tl) const {
+  return total_nodes_helper(root());
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::dict_census_udpate(size_t size, bool split, bool birth){
+  TreeList<Chunk>* nd = find_list(size);
+  if (nd) {
+    if (split) {
+      if (birth) {
+        nd->increment_split_births();
+        nd->increment_surplus();
+      }  else {
+        nd->increment_split_deaths();
+        nd->decrement_surplus();
+      }
+    } else {
+      if (birth) {
+        nd->increment_coal_births();
+        nd->increment_surplus();
+      } else {
+        nd->increment_coal_deaths();
+        nd->decrement_surplus();
+      }
+    }
+  }
+  // A list for this size may not be found (nd == 0) if
+  //   This is a death where the appropriate list is now
+  //     empty and has been removed from the list.
+  //   This is a birth associated with a LinAB.  The chunk
+  //     for the LinAB is not in the dictionary.
+}
+
+template <class Chunk>
+bool BinaryTreeDictionary<Chunk>::coal_dict_over_populated(size_t size) {
+  if (FLSAlwaysCoalesceLarge) return true;
+
+  TreeList<Chunk>* list_of_size = find_list(size);
+  // None of requested size implies overpopulated.
+  return list_of_size == NULL || list_of_size->coal_desired() <= 0 ||
+         list_of_size->count() > list_of_size->coal_desired();
+}
+
+// Closures for walking the binary tree.
+//   do_list() walks the free list in a node applying the closure
+//     to each free chunk in the list
+//   do_tree() walks the nodes in the binary tree applying do_list()
+//     to each list at each node.
+
+template <class Chunk>
+class TreeCensusClosure : public StackObj {
+ protected:
+  virtual void do_list(FreeList<Chunk>* fl) = 0;
+ public:
+  virtual void do_tree(TreeList<Chunk>* tl) = 0;
+};
+
+template <class Chunk>
+class AscendTreeCensusClosure : public TreeCensusClosure<Chunk> {
+  using TreeCensusClosure<Chunk>::do_list;
+ public:
+  void do_tree(TreeList<Chunk>* tl) {
+    if (tl != NULL) {
+      do_tree(tl->left());
+      do_list(tl);
+      do_tree(tl->right());
+    }
+  }
+};
+
+template <class Chunk>
+class DescendTreeCensusClosure : public TreeCensusClosure<Chunk> {
+  using TreeCensusClosure<Chunk>::do_list;
+ public:
+  void do_tree(TreeList<Chunk>* tl) {
+    if (tl != NULL) {
+      do_tree(tl->right());
+      do_list(tl);
+      do_tree(tl->left());
+    }
+  }
+};
+
+// For each list in the tree, calculate the desired, desired
+// coalesce, count before sweep, and surplus before sweep.
+template <class Chunk>
+class BeginSweepClosure : public AscendTreeCensusClosure<Chunk> {
+  double _percentage;
+  float _inter_sweep_current;
+  float _inter_sweep_estimate;
+  float _intra_sweep_estimate;
+
+ public:
+  BeginSweepClosure(double p, float inter_sweep_current,
+                              float inter_sweep_estimate,
+                              float intra_sweep_estimate) :
+   _percentage(p),
+   _inter_sweep_current(inter_sweep_current),
+   _inter_sweep_estimate(inter_sweep_estimate),
+   _intra_sweep_estimate(intra_sweep_estimate) { }
+
+  void do_list(FreeList<Chunk>* fl) {
+    double coalSurplusPercent = _percentage;
+    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
+    fl->set_coal_desired((ssize_t)((double)fl->desired() * coalSurplusPercent));
+    fl->set_before_sweep(fl->count());
+    fl->set_bfr_surp(fl->surplus());
+  }
+};
+
+// Used to search the tree until a condition is met.
+// Similar to TreeCensusClosure but searches the
+// tree and returns promptly when found.
+
+template <class Chunk>
+class TreeSearchClosure : public StackObj {
+ protected:
+  virtual bool do_list(FreeList<Chunk>* fl) = 0;
+ public:
+  virtual bool do_tree(TreeList<Chunk>* tl) = 0;
+};
+
+#if 0 //  Don't need this yet but here for symmetry.
+template <class Chunk>
+class AscendTreeSearchClosure : public TreeSearchClosure {
+ public:
+  bool do_tree(TreeList<Chunk>* tl) {
+    if (tl != NULL) {
+      if (do_tree(tl->left())) return true;
+      if (do_list(tl)) return true;
+      if (do_tree(tl->right())) return true;
+    }
+    return false;
+  }
+};
+#endif
+
+template <class Chunk>
+class DescendTreeSearchClosure : public TreeSearchClosure<Chunk> {
+  using TreeSearchClosure<Chunk>::do_list;
+ public:
+  bool do_tree(TreeList<Chunk>* tl) {
+    if (tl != NULL) {
+      if (do_tree(tl->right())) return true;
+      if (do_list(tl)) return true;
+      if (do_tree(tl->left())) return true;
+    }
+    return false;
+  }
+};
+
+// Searches the tree for a chunk that ends at the
+// specified address.
+template <class Chunk>
+class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk> {
+  HeapWord* _target;
+  Chunk* _found;
+
+ public:
+  EndTreeSearchClosure(HeapWord* target) : _target(target), _found(NULL) {}
+  bool do_list(FreeList<Chunk>* fl) {
+    Chunk* item = fl->head();
+    while (item != NULL) {
+      if (item->end() == _target) {
+        _found = item;
+        return true;
+      }
+      item = item->next();
+    }
+    return false;
+  }
+  Chunk* found() { return _found; }
+};
+
+template <class Chunk>
+Chunk* BinaryTreeDictionary<Chunk>::find_chunk_ends_at(HeapWord* target) const {
+  EndTreeSearchClosure<Chunk> etsc(target);
+  bool found_target = etsc.do_tree(root());
+  assert(found_target || etsc.found() == NULL, "Consistency check");
+  assert(!found_target || etsc.found() != NULL, "Consistency check");
+  return etsc.found();
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::begin_sweep_dict_census(double coalSurplusPercent,
+  float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
+  BeginSweepClosure<Chunk> bsc(coalSurplusPercent, inter_sweep_current,
+                                            inter_sweep_estimate,
+                                            intra_sweep_estimate);
+  bsc.do_tree(root());
+}
+
+// Closures and methods for calculating total bytes returned to the
+// free lists in the tree.
+#ifndef PRODUCT
+template <class Chunk>
+class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+   public:
+  void do_list(FreeList<Chunk>* fl) {
+    fl->set_returned_bytes(0);
+  }
+};
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::initialize_dict_returned_bytes() {
+  InitializeDictReturnedBytesClosure<Chunk> idrb;
+  idrb.do_tree(root());
+}
+
+template <class Chunk>
+class ReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+  size_t _dict_returned_bytes;
+ public:
+  ReturnedBytesClosure() { _dict_returned_bytes = 0; }
+  void do_list(FreeList<Chunk>* fl) {
+    _dict_returned_bytes += fl->returned_bytes();
+  }
+  size_t dict_returned_bytes() { return _dict_returned_bytes; }
+};
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::sum_dict_returned_bytes() {
+  ReturnedBytesClosure<Chunk> rbc;
+  rbc.do_tree(root());
+
+  return rbc.dict_returned_bytes();
+}
+
+// Count the number of entries in the tree.
+template <class Chunk>
+class treeCountClosure : public DescendTreeCensusClosure<Chunk> {
+ public:
+  uint count;
+  treeCountClosure(uint c) { count = c; }
+  void do_list(FreeList<Chunk>* fl) {
+    count++;
+  }
+};
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::total_count() {
+  treeCountClosure<Chunk> ctc(0);
+  ctc.do_tree(root());
+  return ctc.count;
+}
+#endif // PRODUCT
+
+// Calculate surpluses for the lists in the tree.
+template <class Chunk>
+class setTreeSurplusClosure : public AscendTreeCensusClosure<Chunk> {
+  double percentage;
+ public:
+  setTreeSurplusClosure(double v) { percentage = v; }
+  void do_list(FreeList<Chunk>* fl) {
+    double splitSurplusPercent = percentage;
+    fl->set_surplus(fl->count() -
+                   (ssize_t)((double)fl->desired() * splitSurplusPercent));
+  }
+};
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::set_tree_surplus(double splitSurplusPercent) {
+  setTreeSurplusClosure<Chunk> sts(splitSurplusPercent);
+  sts.do_tree(root());
+}
+
+// Set hints for the lists in the tree.
+template <class Chunk>
+class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk> {
+  size_t hint;
+ public:
+  setTreeHintsClosure(size_t v) { hint = v; }
+  void do_list(FreeList<Chunk>* fl) {
+    fl->set_hint(hint);
+    assert(fl->hint() == 0 || fl->hint() > fl->size(),
+      "Current hint is inconsistent");
+    if (fl->surplus() > 0) {
+      hint = fl->size();
+    }
+  }
+};
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::set_tree_hints(void) {
+  setTreeHintsClosure<Chunk> sth(0);
+  sth.do_tree(root());
+}
+
+// Save count before previous sweep and splits and coalesces.
+template <class Chunk>
+class clearTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
+  void do_list(FreeList<Chunk>* fl) {
+    fl->set_prev_sweep(fl->count());
+    fl->set_coal_births(0);
+    fl->set_coal_deaths(0);
+    fl->set_split_births(0);
+    fl->set_split_deaths(0);
+  }
+};
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::clear_tree_census(void) {
+  clearTreeCensusClosure<Chunk> ctc;
+  ctc.do_tree(root());
+}
+
+// Do reporting and post sweep clean up.
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::end_sweep_dict_census(double splitSurplusPercent) {
+  // Does walking the tree 3 times hurt?
+  set_tree_surplus(splitSurplusPercent);
+  set_tree_hints();
+  if (PrintGC && Verbose) {
+    report_statistics();
+  }
+  clear_tree_census();
+}
+
+// Print summary statistics
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::report_statistics() const {
+  FreeBlockDictionary<Chunk>::verify_par_locked();
+  gclog_or_tty->print("Statistics for BinaryTreeDictionary:\n"
+         "------------------------------------\n");
+  size_t total_size = total_chunk_size(debug_only(NULL));
+  size_t    free_blocks = num_free_blocks();
+  gclog_or_tty->print("Total Free Space: %d\n", total_size);
+  gclog_or_tty->print("Max   Chunk Size: %d\n", max_chunk_size());
+  gclog_or_tty->print("Number of Blocks: %d\n", free_blocks);
+  if (free_blocks > 0) {
+    gclog_or_tty->print("Av.  Block  Size: %d\n", total_size/free_blocks);
+  }
+  gclog_or_tty->print("Tree      Height: %d\n", treeHeight());
+}
+
+// Print census information - counts, births, deaths, etc.
+// for each list in the tree.  Also print some summary
+// information.
+template <class Chunk>
+class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
+  int _print_line;
+  size_t _total_free;
+  FreeList<Chunk> _total;
+
+ public:
+  PrintTreeCensusClosure() {
+    _print_line = 0;
+    _total_free = 0;
+  }
+  FreeList<Chunk>* total() { return &_total; }
+  size_t total_free() { return _total_free; }
+  void do_list(FreeList<Chunk>* fl) {
+    if (++_print_line >= 40) {
+      FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    _total_free +=            fl->count()            * fl->size()        ;
+    total()->set_count(      total()->count()       + fl->count()      );
+    total()->set_bfr_surp(    total()->bfr_surp()     + fl->bfr_surp()    );
+    total()->set_surplus(    total()->split_deaths() + fl->surplus()    );
+    total()->set_desired(    total()->desired()     + fl->desired()    );
+    total()->set_prev_sweep(  total()->prev_sweep()   + fl->prev_sweep()  );
+    total()->set_before_sweep(total()->before_sweep() + fl->before_sweep());
+    total()->set_coal_births( total()->coal_births()  + fl->coal_births() );
+    total()->set_coal_deaths( total()->coal_deaths()  + fl->coal_deaths() );
+    total()->set_split_births(total()->split_births() + fl->split_births());
+    total()->set_split_deaths(total()->split_deaths() + fl->split_deaths());
+  }
+};
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::print_dict_census(void) const {
+
+  gclog_or_tty->print("\nBinaryTree\n");
+  FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
+  PrintTreeCensusClosure<Chunk> ptc;
+  ptc.do_tree(root());
+
+  FreeList<Chunk>* total = ptc.total();
+  FreeList<Chunk>::print_labels_on(gclog_or_tty, " ");
+  total->print_on(gclog_or_tty, "TOTAL\t");
+  gclog_or_tty->print(
+              "total_free(words): " SIZE_FORMAT_W(16)
+              " growth: %8.5f  deficit: %8.5f\n",
+              ptc.total_free(),
+              (double)(total->split_births() + total->coal_births()
+                     - total->split_deaths() - total->coal_deaths())
+              /(total->prev_sweep() != 0 ? (double)total->prev_sweep() : 1.0),
+             (double)(total->desired() - total->count())
+             /(total->desired() != 0 ? (double)total->desired() : 1.0));
+}
+
+template <class Chunk>
+class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk> {
+  outputStream* _st;
+  int _print_line;
+
+ public:
+  PrintFreeListsClosure(outputStream* st) {
+    _st = st;
+    _print_line = 0;
+  }
+  void do_list(FreeList<Chunk>* fl) {
+    if (++_print_line >= 40) {
+      FreeList<Chunk>::print_labels_on(_st, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    size_t sz = fl->size();
+    for (Chunk* fc = fl->head(); fc != NULL;
+         fc = fc->next()) {
+      _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                    fc, (HeapWord*)fc + sz,
+                    fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+};
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::print_free_lists(outputStream* st) const {
+
+  FreeList<Chunk>::print_labels_on(st, "size");
+  PrintFreeListsClosure<Chunk> pflc(st);
+  pflc.do_tree(root());
+}
+
+// Verify the following tree invariants:
+// . _root has no parent
+// . parent and child point to each other
+// . each node's key correctly related to that of its child(ren)
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::verify_tree() const {
+  guarantee(root() == NULL || total_free_blocks() == 0 ||
+    total_size() != 0, "_total_size should't be 0?");
+  guarantee(root() == NULL || root()->parent() == NULL, "_root shouldn't have parent");
+  verify_tree_helper(root());
+}
+
+template <class Chunk>
+size_t BinaryTreeDictionary<Chunk>::verify_prev_free_ptrs(TreeList<Chunk>* tl) {
+  size_t ct = 0;
+  for (Chunk* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
+    ct++;
+    assert(curFC->prev() == NULL || curFC->prev()->is_free(),
+      "Chunk should be free");
+  }
+  return ct;
+}
+
+// Note: this helper is recursive rather than iterative, so use with
+// caution on very deep trees; and watch out for stack overflow errors;
+// In general, to be used only for debugging.
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::verify_tree_helper(TreeList<Chunk>* tl) const {
+  if (tl == NULL)
+    return;
+  guarantee(tl->size() != 0, "A list must has a size");
+  guarantee(tl->left()  == NULL || tl->left()->parent()  == tl,
+         "parent<-/->left");
+  guarantee(tl->right() == NULL || tl->right()->parent() == tl,
+         "parent<-/->right");;
+  guarantee(tl->left() == NULL  || tl->left()->size()    <  tl->size(),
+         "parent !> left");
+  guarantee(tl->right() == NULL || tl->right()->size()   >  tl->size(),
+         "parent !< left");
+  guarantee(tl->head() == NULL || tl->head()->is_free(), "!Free");
+  guarantee(tl->head() == NULL || tl->head_as_TreeChunk()->list() == tl,
+    "list inconsistency");
+  guarantee(tl->count() > 0 || (tl->head() == NULL && tl->tail() == NULL),
+    "list count is inconsistent");
+  guarantee(tl->count() > 1 || tl->head() == tl->tail(),
+    "list is incorrectly constructed");
+  size_t count = verify_prev_free_ptrs(tl);
+  guarantee(count == (size_t)tl->count(), "Node count is incorrect");
+  if (tl->head() != NULL) {
+    tl->head_as_TreeChunk()->verify_tree_chunk_list();
+  }
+  verify_tree_helper(tl->left());
+  verify_tree_helper(tl->right());
+}
+
+template <class Chunk>
+void BinaryTreeDictionary<Chunk>::verify() const {
+  verify_tree();
+  guarantee(total_size() == total_size_in_tree(root()), "Total Size inconsistency");
+}
+
+#ifndef SERIALGC
+// Explicitly instantiate these types for FreeChunk.
+template class BinaryTreeDictionary<FreeChunk>;
+template class TreeChunk<FreeChunk>;
+template class TreeList<FreeChunk>;
+#endif // SERIALGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/binaryTreeDictionary.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_BINARYTREEDICTIONARY_HPP
+#define SHARE_VM_MEMORY_BINARYTREEDICTIONARY_HPP
+
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/freeList.hpp"
+
+/*
+ * A binary tree based search structure for free blocks.
+ * This is currently used in the Concurrent Mark&Sweep implementation, but
+ * will be used for free block management for metadata.
+ */
+
+// A TreeList is a FreeList which can be used to maintain a
+// binary tree of free lists.
+
+template <class Chunk> class TreeChunk;
+template <class Chunk> class BinaryTreeDictionary;
+template <class Chunk> class AscendTreeCensusClosure;
+template <class Chunk> class DescendTreeCensusClosure;
+template <class Chunk> class DescendTreeSearchClosure;
+
+template <class Chunk>
+class TreeList: public FreeList<Chunk> {
+  friend class TreeChunk<Chunk>;
+  friend class BinaryTreeDictionary<Chunk>;
+  friend class AscendTreeCensusClosure<Chunk>;
+  friend class DescendTreeCensusClosure<Chunk>;
+  friend class DescendTreeSearchClosure<Chunk>;
+
+  TreeList<Chunk>* _parent;
+  TreeList<Chunk>* _left;
+  TreeList<Chunk>* _right;
+
+ protected:
+  TreeList<Chunk>* parent() const { return _parent; }
+  TreeList<Chunk>* left()   const { return _left;   }
+  TreeList<Chunk>* right()  const { return _right;  }
+
+  // Explicitly import these names into our namespace to fix name lookup with templates
+  using FreeList<Chunk>::head;
+  using FreeList<Chunk>::set_head;
+
+  using FreeList<Chunk>::tail;
+  using FreeList<Chunk>::set_tail;
+  using FreeList<Chunk>::link_tail;
+
+  using FreeList<Chunk>::increment_count;
+  NOT_PRODUCT(using FreeList<Chunk>::increment_returned_bytes_by;)
+  using FreeList<Chunk>::verify_chunk_in_free_list;
+  using FreeList<Chunk>::size;
+
+  // Accessors for links in tree.
+
+  void set_left(TreeList<Chunk>* tl) {
+    _left   = tl;
+    if (tl != NULL)
+      tl->set_parent(this);
+  }
+  void set_right(TreeList<Chunk>* tl) {
+    _right  = tl;
+    if (tl != NULL)
+      tl->set_parent(this);
+  }
+  void set_parent(TreeList<Chunk>* tl)  { _parent = tl;   }
+
+  void clearLeft()               { _left = NULL;   }
+  void clear_right()              { _right = NULL;  }
+  void clear_parent()             { _parent = NULL; }
+  void initialize()              { clearLeft(); clear_right(), clear_parent(); }
+
+  // For constructing a TreeList from a Tree chunk or
+  // address and size.
+  static TreeList<Chunk>* as_TreeList(TreeChunk<Chunk>* tc);
+  static TreeList<Chunk>* as_TreeList(HeapWord* addr, size_t size);
+
+  // Returns the head of the free list as a pointer to a TreeChunk.
+  TreeChunk<Chunk>* head_as_TreeChunk();
+
+  // Returns the first available chunk in the free list as a pointer
+  // to a TreeChunk.
+  TreeChunk<Chunk>* first_available();
+
+  // Returns the block with the largest heap address amongst
+  // those in the list for this size; potentially slow and expensive,
+  // use with caution!
+  TreeChunk<Chunk>* largest_address();
+
+  // remove_chunk_replace_if_needed() removes the given "tc" from the TreeList.
+  // If "tc" is the first chunk in the list, it is also the
+  // TreeList that is the node in the tree.  remove_chunk_replace_if_needed()
+  // returns the possibly replaced TreeList* for the node in
+  // the tree.  It also updates the parent of the original
+  // node to point to the new node.
+  TreeList<Chunk>* remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc);
+  // See FreeList.
+  void return_chunk_at_head(TreeChunk<Chunk>* tc);
+  void return_chunk_at_tail(TreeChunk<Chunk>* tc);
+};
+
+// A TreeChunk is a subclass of a Chunk that additionally
+// maintains a pointer to the free list on which it is currently
+// linked.
+// A TreeChunk is also used as a node in the binary tree.  This
+// allows the binary tree to be maintained without any additional
+// storage (the free chunks are used).  In a binary tree the first
+// chunk in the free list is also the tree node.  Note that the
+// TreeChunk has an embedded TreeList for this purpose.  Because
+// the first chunk in the list is distinguished in this fashion
+// (also is the node in the tree), it is the last chunk to be found
+// on the free list for a node in the tree and is only removed if
+// it is the last chunk on the free list.
+
+template <class Chunk>
+class TreeChunk : public Chunk {
+  friend class TreeList<Chunk>;
+  TreeList<Chunk>* _list;
+  TreeList<Chunk> _embedded_list;  // if non-null, this chunk is on _list
+ protected:
+  TreeList<Chunk>* embedded_list() const { return (TreeList<Chunk>*) &_embedded_list; }
+  void set_embedded_list(TreeList<Chunk>* v) { _embedded_list = *v; }
+ public:
+  TreeList<Chunk>* list() { return _list; }
+  void set_list(TreeList<Chunk>* v) { _list = v; }
+  static TreeChunk<Chunk>* as_TreeChunk(Chunk* fc);
+  // Initialize fields in a TreeChunk that should be
+  // initialized when the TreeChunk is being added to
+  // a free list in the tree.
+  void initialize() { embedded_list()->initialize(); }
+
+  Chunk* next() const { return Chunk::next(); }
+  Chunk* prev() const { return Chunk::prev(); }
+  size_t size() const volatile { return Chunk::size(); }
+
+  // debugging
+  void verify_tree_chunk_list() const;
+};
+
+
+template <class Chunk>
+class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
+  friend class VMStructs;
+  bool       _splay;
+  size_t     _total_size;
+  size_t     _total_free_blocks;
+  TreeList<Chunk>* _root;
+  bool       _adaptive_freelists;
+
+  // private accessors
+  bool splay() const { return _splay; }
+  void set_splay(bool v) { _splay = v; }
+  void set_total_size(size_t v) { _total_size = v; }
+  virtual void inc_total_size(size_t v);
+  virtual void dec_total_size(size_t v);
+  size_t total_free_blocks() const { return _total_free_blocks; }
+  void set_total_free_blocks(size_t v) { _total_free_blocks = v; }
+  TreeList<Chunk>* root() const { return _root; }
+  void set_root(TreeList<Chunk>* v) { _root = v; }
+  bool adaptive_freelists() { return _adaptive_freelists; }
+
+  // This field is added and can be set to point to the
+  // the Mutex used to synchronize access to the
+  // dictionary so that assertion checking can be done.
+  // For example it is set to point to _parDictionaryAllocLock.
+  NOT_PRODUCT(Mutex* _lock;)
+
+  // Remove a chunk of size "size" or larger from the tree and
+  // return it.  If the chunk
+  // is the last chunk of that size, remove the node for that size
+  // from the tree.
+  TreeChunk<Chunk>* get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay);
+  // Return a list of the specified size or NULL from the tree.
+  // The list is not removed from the tree.
+  TreeList<Chunk>* find_list (size_t size) const;
+  // Remove this chunk from the tree.  If the removal results
+  // in an empty list in the tree, remove the empty list.
+  TreeChunk<Chunk>* remove_chunk_from_tree(TreeChunk<Chunk>* tc);
+  // Remove the node in the trees starting at tl that has the
+  // minimum value and return it.  Repair the tree as needed.
+  TreeList<Chunk>* remove_tree_minimum(TreeList<Chunk>* tl);
+  void       semi_splay_step(TreeList<Chunk>* tl);
+  // Add this free chunk to the tree.
+  void       insert_chunk_in_tree(Chunk* freeChunk);
+ public:
+
+  static const size_t min_tree_chunk_size  = sizeof(TreeChunk<Chunk>)/HeapWordSize;
+
+  void       verify_tree() const;
+  // verify that the given chunk is in the tree.
+  bool       verify_chunk_in_free_list(Chunk* tc) const;
+ private:
+  void          verify_tree_helper(TreeList<Chunk>* tl) const;
+  static size_t verify_prev_free_ptrs(TreeList<Chunk>* tl);
+
+  // Returns the total number of chunks in the list.
+  size_t     total_list_length(TreeList<Chunk>* tl) const;
+  // Returns the total number of words in the chunks in the tree
+  // starting at "tl".
+  size_t     total_size_in_tree(TreeList<Chunk>* tl) const;
+  // Returns the sum of the square of the size of each block
+  // in the tree starting at "tl".
+  double     sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const;
+  // Returns the total number of free blocks in the tree starting
+  // at "tl".
+  size_t     total_free_blocks_in_tree(TreeList<Chunk>* tl) const;
+  size_t     num_free_blocks() const;
+  size_t     treeHeight() const;
+  size_t     tree_height_helper(TreeList<Chunk>* tl) const;
+  size_t     total_nodes_in_tree(TreeList<Chunk>* tl) const;
+  size_t     total_nodes_helper(TreeList<Chunk>* tl) const;
+
+ public:
+  // Constructor
+  BinaryTreeDictionary(bool adaptive_freelists, bool splay = false);
+  BinaryTreeDictionary(MemRegion mr, bool adaptive_freelists, bool splay = false);
+
+  // Public accessors
+  size_t total_size() const { return _total_size; }
+
+  // Reset the dictionary to the initial conditions with
+  // a single free chunk.
+  void       reset(MemRegion mr);
+  void       reset(HeapWord* addr, size_t size);
+  // Reset the dictionary to be empty.
+  void       reset();
+
+  // Return a chunk of size "size" or greater from
+  // the tree.
+  // want a better dynamic splay strategy for the future.
+  Chunk* get_chunk(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither) {
+    FreeBlockDictionary<Chunk>::verify_par_locked();
+    Chunk* res = get_chunk_from_tree(size, dither, splay());
+    assert(res == NULL || res->is_free(),
+           "Should be returning a free chunk");
+    return res;
+  }
+
+  void return_chunk(Chunk* chunk) {
+    FreeBlockDictionary<Chunk>::verify_par_locked();
+    insert_chunk_in_tree(chunk);
+  }
+
+  void remove_chunk(Chunk* chunk) {
+    FreeBlockDictionary<Chunk>::verify_par_locked();
+    remove_chunk_from_tree((TreeChunk<Chunk>*)chunk);
+    assert(chunk->is_free(), "Should still be a free chunk");
+  }
+
+  size_t     max_chunk_size() const;
+  size_t     total_chunk_size(debug_only(const Mutex* lock)) const {
+    debug_only(
+      if (lock != NULL && lock->owned_by_self()) {
+        assert(total_size_in_tree(root()) == total_size(),
+               "_total_size inconsistency");
+      }
+    )
+    return total_size();
+  }
+
+  size_t     min_size() const {
+    return min_tree_chunk_size;
+  }
+
+  double     sum_of_squared_block_sizes() const {
+    return sum_of_squared_block_sizes(root());
+  }
+
+  Chunk* find_chunk_ends_at(HeapWord* target) const;
+
+  // Find the list with size "size" in the binary tree and update
+  // the statistics in the list according to "split" (chunk was
+  // split or coalesce) and "birth" (chunk was added or removed).
+  void       dict_census_udpate(size_t size, bool split, bool birth);
+  // Return true if the dictionary is overpopulated (more chunks of
+  // this size than desired) for size "size".
+  bool       coal_dict_over_populated(size_t size);
+  // Methods called at the beginning of a sweep to prepare the
+  // statistics for the sweep.
+  void       begin_sweep_dict_census(double coalSurplusPercent,
+                                  float inter_sweep_current,
+                                  float inter_sweep_estimate,
+                                  float intra_sweep_estimate);
+  // Methods called after the end of a sweep to modify the
+  // statistics for the sweep.
+  void       end_sweep_dict_census(double splitSurplusPercent);
+  // Return the largest free chunk in the tree.
+  Chunk* find_largest_dict() const;
+  // Accessors for statistics
+  void       set_tree_surplus(double splitSurplusPercent);
+  void       set_tree_hints(void);
+  // Reset statistics for all the lists in the tree.
+  void       clear_tree_census(void);
+  // Print the statistcis for all the lists in the tree.  Also may
+  // print out summaries.
+  void       print_dict_census(void) const;
+  void       print_free_lists(outputStream* st) const;
+
+  // For debugging.  Returns the sum of the _returned_bytes for
+  // all lists in the tree.
+  size_t     sum_dict_returned_bytes()     PRODUCT_RETURN0;
+  // Sets the _returned_bytes for all the lists in the tree to zero.
+  void       initialize_dict_returned_bytes()      PRODUCT_RETURN;
+  // For debugging.  Return the total number of chunks in the dictionary.
+  size_t     total_count()       PRODUCT_RETURN0;
+
+  void       report_statistics() const;
+
+  void       verify() const;
+};
+
+#endif // SHARE_VM_MEMORY_BINARYTREEDICTIONARY_HPP
--- a/src/share/vm/memory/blockOffsetTable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/blockOffsetTable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,6 +30,7 @@
 #include "memory/universe.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
+#include "services/memTracker.hpp"
 
 //////////////////////////////////////////////////////////////////////
 // BlockOffsetSharedArray
@@ -44,6 +45,9 @@
   if (!rs.is_reserved()) {
     vm_exit_during_initialization("Could not reserve enough space for heap offset array");
   }
+
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+
   if (!_vs.initialize(rs, 0)) {
     vm_exit_during_initialization("Could not reserve enough space for heap offset array");
   }
--- a/src/share/vm/memory/blockOffsetTable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/blockOffsetTable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -100,7 +100,7 @@
 //////////////////////////////////////////////////////////////////////////
 // BlockOffsetSharedArray
 //////////////////////////////////////////////////////////////////////////
-class BlockOffsetSharedArray: public CHeapObj {
+class BlockOffsetSharedArray: public CHeapObj<mtGC> {
   friend class BlockOffsetArray;
   friend class BlockOffsetArrayNonContigSpace;
   friend class BlockOffsetArrayContigSpace;
@@ -289,7 +289,7 @@
   };
 
   static size_t power_to_cards_back(uint i) {
-    return (size_t)(1 << (LogBase * i));
+    return (size_t)1 << (LogBase * i);
   }
   static size_t power_to_words_back(uint i) {
     return power_to_cards_back(i) * N_words;
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,6 +33,7 @@
 #include "runtime/java.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
+#include "services/memTracker.hpp"
 #ifdef COMPILER1
 #include "c1/c1_LIR.hpp"
 #include "c1/c1_LIRGenerator.hpp"
@@ -90,6 +91,9 @@
   const size_t rs_align = _page_size == (size_t) os::vm_page_size() ? 0 :
     MAX2(_page_size, (size_t) os::vm_allocation_granularity());
   ReservedSpace heap_rs(_byte_map_size, rs_align, false);
+
+  MemTracker::record_virtual_memory_type((address)heap_rs.base(), mtGC);
+
   os::trace_page_sizes("card table", _guard_index + 1, _guard_index + 1,
                        _page_size, heap_rs.base(), heap_rs.size());
   if (!heap_rs.is_reserved()) {
@@ -113,16 +117,17 @@
     // Do better than this for Merlin
     vm_exit_out_of_memory(_page_size, "card table last card");
   }
+
   *guard_card = last_card;
 
    _lowest_non_clean =
-    NEW_C_HEAP_ARRAY(CardArr, max_covered_regions);
+    NEW_C_HEAP_ARRAY(CardArr, max_covered_regions, mtGC);
   _lowest_non_clean_chunk_size =
-    NEW_C_HEAP_ARRAY(size_t, max_covered_regions);
+    NEW_C_HEAP_ARRAY(size_t, max_covered_regions, mtGC);
   _lowest_non_clean_base_chunk_index =
-    NEW_C_HEAP_ARRAY(uintptr_t, max_covered_regions);
+    NEW_C_HEAP_ARRAY(uintptr_t, max_covered_regions, mtGC);
   _last_LNC_resizing_collection =
-    NEW_C_HEAP_ARRAY(int, max_covered_regions);
+    NEW_C_HEAP_ARRAY(int, max_covered_regions, mtGC);
   if (_lowest_non_clean == NULL
       || _lowest_non_clean_chunk_size == NULL
       || _lowest_non_clean_base_chunk_index == NULL
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -72,6 +72,9 @@
     CT_MR_BS_last_reserved      = 16
   };
 
+  // a word's worth (row) of clean card values
+  static const intptr_t clean_card_row = (intptr_t)(-1);
+
   // dirty and precleaned are equivalent wrt younger_refs_iter.
   static bool card_is_dirty_wrt_gen_iter(jbyte cv) {
     return cv == dirty_card || cv == precleaned_card;
--- a/src/share/vm/memory/cardTableRS.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/cardTableRS.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -173,6 +173,10 @@
             SharedHeap::heap()->workers()->active_workers()), "Mismatch");
 }
 
+bool ClearNoncleanCardWrapper::is_word_aligned(jbyte* entry) {
+  return (((intptr_t)entry) & (BytesPerWord-1)) == 0;
+}
+
 void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
   assert(mr.word_size() > 0, "Error");
   assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned");
@@ -194,6 +198,17 @@
         const MemRegion mrd(start_of_non_clean, end_of_non_clean);
         _dirty_card_closure->do_MemRegion(mrd);
       }
+
+      // fast forward through potential continuous whole-word range of clean cards beginning at a word-boundary
+      if (is_word_aligned(cur_entry)) {
+        jbyte* cur_row = cur_entry - BytesPerWord;
+        while (cur_row >= limit && *((intptr_t*)cur_row) ==  CardTableRS::clean_card_row()) {
+          cur_row -= BytesPerWord;
+        }
+        cur_entry = cur_row + BytesPerWord;
+        cur_hw = _ct->addr_for(cur_entry);
+      }
+
       // Reset the dirty window, while continuing to look
       // for the next dirty card that will start a
       // new dirty window.
--- a/src/share/vm/memory/cardTableRS.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/cardTableRS.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,6 +45,10 @@
     return CardTableModRefBS::clean_card;
   }
 
+  static intptr_t clean_card_row() {
+    return CardTableModRefBS::clean_card_row;
+  }
+
   static bool
   card_is_dirty_wrt_gen_iter(jbyte cv) {
     return CardTableModRefBS::card_is_dirty_wrt_gen_iter(cv);
@@ -176,6 +180,8 @@
   // Work methods called by the clear_card()
   inline bool clear_card_serial(jbyte* entry);
   inline bool clear_card_parallel(jbyte* entry);
+  // check alignment of pointer
+  bool is_word_aligned(jbyte* entry);
 
 public:
   ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct);
--- a/src/share/vm/memory/collectorPolicy.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/collectorPolicy.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -56,7 +56,7 @@
 class PermanentGenerationSpec;
 class MarkSweepPolicy;
 
-class CollectorPolicy : public CHeapObj {
+class CollectorPolicy : public CHeapObj<mtGC> {
  protected:
   PermanentGenerationSpec *_permanent_generation;
   GCPolicyCounters* _gc_policy_counters;
--- a/src/share/vm/memory/compactingPermGenGen.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/compactingPermGenGen.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -444,11 +444,11 @@
 }
 
 
-void CompactingPermGenGen::verify(bool allow_dirty) {
-  the_space()->verify(allow_dirty);
+void CompactingPermGenGen::verify() {
+  the_space()->verify();
   if (!SharedSkipVerify && spec()->enable_shared_spaces()) {
-    ro_space()->verify(allow_dirty);
-    rw_space()->verify(allow_dirty);
+    ro_space()->verify();
+    rw_space()->verify();
   }
 }
 
--- a/src/share/vm/memory/compactingPermGenGen.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/compactingPermGenGen.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -230,7 +230,7 @@
                                       void* new_vtable_start,
                                       void* obj);
 
-  void verify(bool allow_dirty);
+  void verify();
 
   // Serialization
   static void initialize_oops() KERNEL_RETURN;
--- a/src/share/vm/memory/defNewGeneration.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/defNewGeneration.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -548,7 +548,7 @@
 
   init_assuming_no_promotion_failure();
 
-  TraceTime t1("GC", PrintGC && !PrintGCDetails, true, gclog_or_tty);
+  TraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, gclog_or_tty);
   // Capture heap used before collection (for printing).
   size_t gch_prev_used = gch->used();
 
@@ -939,10 +939,10 @@
   }
 }
 
-void DefNewGeneration::verify(bool allow_dirty) {
-  eden()->verify(allow_dirty);
-  from()->verify(allow_dirty);
-    to()->verify(allow_dirty);
+void DefNewGeneration::verify() {
+  eden()->verify();
+  from()->verify();
+    to()->verify();
 }
 
 void DefNewGeneration::print_on(outputStream* st) const {
--- a/src/share/vm/memory/defNewGeneration.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/defNewGeneration.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -89,8 +89,8 @@
 
   // Together, these keep <object with a preserved mark, mark value> pairs.
   // They should always contain the same number of elements.
-  Stack<oop>     _objs_with_preserved_marks;
-  Stack<markOop> _preserved_marks_of_objs;
+  Stack<oop, mtGC>     _objs_with_preserved_marks;
+  Stack<markOop, mtGC> _preserved_marks_of_objs;
 
   // Promotion failure handling
   OopClosure *_promo_failure_scan_stack_closure;
@@ -98,7 +98,7 @@
     _promo_failure_scan_stack_closure = scan_stack_closure;
   }
 
-  Stack<oop> _promo_failure_scan_stack;
+  Stack<oop, mtGC> _promo_failure_scan_stack;
   void drain_promo_failure_scan_stack(void);
   bool _promo_failure_drain_in_progress;
 
@@ -340,7 +340,7 @@
   // PrintHeapAtGC support.
   void print_on(outputStream* st) const;
 
-  void verify(bool allow_dirty);
+  void verify();
 
   bool promo_failure_scan_is_complete() const {
     return _promo_failure_scan_stack.is_empty();
--- a/src/share/vm/memory/dump.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/dump.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -231,8 +231,6 @@
     if (obj->is_constMethod()) {
       mark_object(obj);
       mark_object(constMethodOop(obj)->stackmap_data());
-      // Exception tables are needed by ci code during compilation.
-      mark_object(constMethodOop(obj)->exception_table());
     }
 
     // Mark objects referenced by klass objects which are read-only.
@@ -297,16 +295,14 @@
 
       if (obj->blueprint()->oop_is_instanceKlass()) {
         instanceKlass* ik = instanceKlass::cast((klassOop)obj);
-        typeArrayOop inner_classes = ik->inner_classes();
-        if (inner_classes != NULL) {
-          constantPoolOop constants = ik->constants();
-          int n = inner_classes->length();
-          for (int i = 0; i < n; i += instanceKlass::inner_class_next_offset) {
-            int ioff = i + instanceKlass::inner_class_inner_name_offset;
-            int index = inner_classes->ushort_at(ioff);
-            if (index != 0) {
-              _closure->do_symbol(constants->symbol_at_addr(index));
-            }
+        instanceKlassHandle ik_h((klassOop)obj);
+        InnerClassesIterator iter(ik_h);
+        constantPoolOop constants = ik->constants();
+        for (; !iter.done(); iter.next()) {
+          int index = iter.inner_name_index();
+
+          if (index != 0) {
+            _closure->do_symbol(constants->symbol_at_addr(index));
           }
         }
       }
@@ -515,7 +511,6 @@
       for(i = 0; i < methods->length(); i++) {
         methodOop m = methodOop(methods->obj_at(i));
         mark_and_move_for_policy(OP_favor_startup, m->constMethod(), _move_ro);
-        mark_and_move_for_policy(OP_favor_runtime, m->constMethod()->exception_table(), _move_ro);
         mark_and_move_for_policy(OP_favor_runtime, m->constMethod()->stackmap_data(), _move_ro);
       }
 
@@ -1490,12 +1485,11 @@
 
     // sun.io.Converters
     static const char obj_array_sig[] = "[[Ljava/lang/Object;";
-    SymbolTable::lookup(obj_array_sig, (int)strlen(obj_array_sig), THREAD);
+    (void)SymbolTable::new_permanent_symbol(obj_array_sig, THREAD);
 
     // java.util.HashMap
     static const char map_entry_array_sig[] = "[Ljava/util/Map$Entry;";
-    SymbolTable::lookup(map_entry_array_sig, (int)strlen(map_entry_array_sig),
-                        THREAD);
+    (void)SymbolTable::new_permanent_symbol(map_entry_array_sig, THREAD);
 
     tty->print("Loading classes to share ... ");
     while ((fgets(class_name, sizeof class_name, file)) != NULL) {
@@ -1514,7 +1508,7 @@
       computed_jsum = jsum(computed_jsum, class_name, (const int)name_len - 1);
 
       // Got a class name - load it.
-      TempNewSymbol class_name_symbol = SymbolTable::new_symbol(class_name, THREAD);
+      Symbol* class_name_symbol = SymbolTable::new_permanent_symbol(class_name, THREAD);
       guarantee(!HAS_PENDING_EXCEPTION, "Exception creating a symbol.");
       klassOop klass = SystemDictionary::resolve_or_null(class_name_symbol,
                                                          THREAD);
--- a/src/share/vm/memory/filemap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/filemap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,7 +44,7 @@
 
 
 
-class FileMapInfo : public CHeapObj {
+class FileMapInfo : public CHeapObj<mtInternal> {
 private:
   enum {
     _invalid_version = -1,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/freeBlockDictionary.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#endif // SERIALGC
+#include "memory/freeBlockDictionary.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "thread_linux.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_solaris
+# include "thread_solaris.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_windows
+# include "thread_windows.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_bsd
+# include "thread_bsd.inline.hpp"
+#endif
+
+#ifndef PRODUCT
+template <class Chunk> Mutex* FreeBlockDictionary<Chunk>::par_lock() const {
+  return _lock;
+}
+
+template <class Chunk> void FreeBlockDictionary<Chunk>::set_par_lock(Mutex* lock) {
+  _lock = lock;
+}
+
+template <class Chunk> void FreeBlockDictionary<Chunk>::verify_par_locked() const {
+#ifdef ASSERT
+  if (ParallelGCThreads > 0) {
+    Thread* my_thread = Thread::current();
+    if (my_thread->is_GC_task_thread()) {
+      assert(par_lock() != NULL, "Should be using locking?");
+      assert_lock_strong(par_lock());
+    }
+  }
+#endif // ASSERT
+}
+#endif
+
+#ifndef SERIALGC
+// Explicitly instantiate for FreeChunk
+template class FreeBlockDictionary<FreeChunk>;
+#endif // SERIALGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/freeBlockDictionary.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_FREEBLOCKDICTIONARY_HPP
+#define SHARE_VM_MEMORY_FREEBLOCKDICTIONARY_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/mutex.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+
+// A FreeBlockDictionary is an abstract superclass that will allow
+// a number of alternative implementations in the future.
+template <class Chunk>
+class FreeBlockDictionary: public CHeapObj<mtGC> {
+ public:
+  enum Dither {
+    atLeast,
+    exactly,
+    roughly
+  };
+  enum DictionaryChoice {
+    dictionaryBinaryTree = 0,
+    dictionarySplayTree  = 1,
+    dictionarySkipList   = 2
+  };
+
+ private:
+  NOT_PRODUCT(Mutex* _lock;)
+
+ public:
+  virtual void       remove_chunk(Chunk* fc) = 0;
+  virtual Chunk*     get_chunk(size_t size, Dither dither = atLeast) = 0;
+  virtual void       return_chunk(Chunk* chunk) = 0;
+  virtual size_t     total_chunk_size(debug_only(const Mutex* lock)) const = 0;
+  virtual size_t     max_chunk_size()   const = 0;
+  virtual size_t     min_size()        const = 0;
+  // Reset the dictionary to the initial conditions for a single
+  // block.
+  virtual void       reset(HeapWord* addr, size_t size) = 0;
+  virtual void       reset() = 0;
+
+  virtual void       dict_census_udpate(size_t size, bool split, bool birth) = 0;
+  virtual bool       coal_dict_over_populated(size_t size) = 0;
+  virtual void       begin_sweep_dict_census(double coalSurplusPercent,
+                       float inter_sweep_current, float inter_sweep_estimate,
+                       float intra__sweep_current) = 0;
+  virtual void       end_sweep_dict_census(double splitSurplusPercent) = 0;
+  virtual Chunk*     find_largest_dict() const = 0;
+  // verify that the given chunk is in the dictionary.
+  virtual bool verify_chunk_in_free_list(Chunk* tc) const = 0;
+
+  // Sigma_{all_free_blocks} (block_size^2)
+  virtual double sum_of_squared_block_sizes() const = 0;
+
+  virtual Chunk* find_chunk_ends_at(HeapWord* target) const = 0;
+  virtual void inc_total_size(size_t v) = 0;
+  virtual void dec_total_size(size_t v) = 0;
+
+  NOT_PRODUCT (
+    virtual size_t   sum_dict_returned_bytes() = 0;
+    virtual void     initialize_dict_returned_bytes() = 0;
+    virtual size_t   total_count() = 0;
+  )
+
+  virtual void       report_statistics() const {
+    gclog_or_tty->print("No statistics available");
+  }
+
+  virtual void       print_dict_census() const = 0;
+  virtual void       print_free_lists(outputStream* st) const = 0;
+
+  virtual void       verify()         const = 0;
+
+  Mutex* par_lock()                const PRODUCT_RETURN0;
+  void   set_par_lock(Mutex* lock)       PRODUCT_RETURN;
+  void   verify_par_locked()       const PRODUCT_RETURN;
+};
+
+#endif // SHARE_VM_MEMORY_FREEBLOCKDICTIONARY_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/freeList.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/freeList.hpp"
+#include "memory/sharedHeap.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/vmThread.hpp"
+
+#ifndef SERIALGC
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#endif // SERIALGC
+
+// Free list.  A FreeList is used to access a linked list of chunks
+// of space in the heap.  The head and tail are maintained so that
+// items can be (as in the current implementation) added at the
+// at the tail of the list and removed from the head of the list to
+// maintain a FIFO queue.
+
+template <class Chunk>
+FreeList<Chunk>::FreeList() :
+  _head(NULL), _tail(NULL)
+#ifdef ASSERT
+  , _protecting_lock(NULL)
+#endif
+{
+  _size         = 0;
+  _count        = 0;
+  _hint         = 0;
+  init_statistics();
+}
+
+template <class Chunk>
+FreeList<Chunk>::FreeList(Chunk* fc) :
+  _head(fc), _tail(fc)
+#ifdef ASSERT
+  , _protecting_lock(NULL)
+#endif
+{
+  _size         = fc->size();
+  _count        = 1;
+  _hint         = 0;
+  init_statistics();
+#ifndef PRODUCT
+  _allocation_stats.set_returned_bytes(size() * HeapWordSize);
+#endif
+}
+
+template <class Chunk>
+void FreeList<Chunk>::reset(size_t hint) {
+  set_count(0);
+  set_head(NULL);
+  set_tail(NULL);
+  set_hint(hint);
+}
+
+template <class Chunk>
+void FreeList<Chunk>::init_statistics(bool split_birth) {
+  _allocation_stats.initialize(split_birth);
+}
+
+template <class Chunk>
+Chunk* FreeList<Chunk>::get_chunk_at_head() {
+  assert_proper_lock_protection();
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+  Chunk* fc = head();
+  if (fc != NULL) {
+    Chunk* nextFC = fc->next();
+    if (nextFC != NULL) {
+      // The chunk fc being removed has a "next".  Set the "next" to the
+      // "prev" of fc.
+      nextFC->link_prev(NULL);
+    } else { // removed tail of list
+      link_tail(NULL);
+    }
+    link_head(nextFC);
+    decrement_count();
+  }
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+  return fc;
+}
+
+
+template <class Chunk>
+void FreeList<Chunk>::getFirstNChunksFromList(size_t n, FreeList<Chunk>* fl) {
+  assert_proper_lock_protection();
+  assert(fl->count() == 0, "Precondition");
+  if (count() > 0) {
+    int k = 1;
+    fl->set_head(head()); n--;
+    Chunk* tl = head();
+    while (tl->next() != NULL && n > 0) {
+      tl = tl->next(); n--; k++;
+    }
+    assert(tl != NULL, "Loop Inv.");
+
+    // First, fix up the list we took from.
+    Chunk* new_head = tl->next();
+    set_head(new_head);
+    set_count(count() - k);
+    if (new_head == NULL) {
+      set_tail(NULL);
+    } else {
+      new_head->link_prev(NULL);
+    }
+    // Now we can fix up the tail.
+    tl->link_next(NULL);
+    // And return the result.
+    fl->set_tail(tl);
+    fl->set_count(k);
+  }
+}
+
+// Remove this chunk from the list
+template <class Chunk>
+void FreeList<Chunk>::remove_chunk(Chunk*fc) {
+   assert_proper_lock_protection();
+   assert(head() != NULL, "Remove from empty list");
+   assert(fc != NULL, "Remove a NULL chunk");
+   assert(size() == fc->size(), "Wrong list");
+   assert(head() == NULL || head()->prev() == NULL, "list invariant");
+   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+
+   Chunk* prevFC = fc->prev();
+   Chunk* nextFC = fc->next();
+   if (nextFC != NULL) {
+     // The chunk fc being removed has a "next".  Set the "next" to the
+     // "prev" of fc.
+     nextFC->link_prev(prevFC);
+   } else { // removed tail of list
+     link_tail(prevFC);
+   }
+   if (prevFC == NULL) { // removed head of list
+     link_head(nextFC);
+     assert(nextFC == NULL || nextFC->prev() == NULL,
+       "Prev of head should be NULL");
+   } else {
+     prevFC->link_next(nextFC);
+     assert(tail() != prevFC || prevFC->next() == NULL,
+       "Next of tail should be NULL");
+   }
+   decrement_count();
+   assert(((head() == NULL) + (tail() == NULL) + (count() == 0)) % 3 == 0,
+          "H/T/C Inconsistency");
+   // clear next and prev fields of fc, debug only
+   NOT_PRODUCT(
+     fc->link_prev(NULL);
+     fc->link_next(NULL);
+   )
+   assert(fc->is_free(), "Should still be a free chunk");
+   assert(head() == NULL || head()->prev() == NULL, "list invariant");
+   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+   assert(head() == NULL || head()->size() == size(), "wrong item on list");
+   assert(tail() == NULL || tail()->size() == size(), "wrong item on list");
+}
+
+// Add this chunk at the head of the list.
+template <class Chunk>
+void FreeList<Chunk>::return_chunk_at_head(Chunk* chunk, bool record_return) {
+  assert_proper_lock_protection();
+  assert(chunk != NULL, "insert a NULL chunk");
+  assert(size() == chunk->size(), "Wrong size");
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+
+  Chunk* oldHead = head();
+  assert(chunk != oldHead, "double insertion");
+  chunk->link_after(oldHead);
+  link_head(chunk);
+  if (oldHead == NULL) { // only chunk in list
+    assert(tail() == NULL, "inconsistent FreeList");
+    link_tail(chunk);
+  }
+  increment_count(); // of # of chunks in list
+  DEBUG_ONLY(
+    if (record_return) {
+      increment_returned_bytes_by(size()*HeapWordSize);
+    }
+  )
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+  assert(head() == NULL || head()->size() == size(), "wrong item on list");
+  assert(tail() == NULL || tail()->size() == size(), "wrong item on list");
+}
+
+template <class Chunk>
+void FreeList<Chunk>::return_chunk_at_head(Chunk* chunk) {
+  assert_proper_lock_protection();
+  return_chunk_at_head(chunk, true);
+}
+
+// Add this chunk at the tail of the list.
+template <class Chunk>
+void FreeList<Chunk>::return_chunk_at_tail(Chunk* chunk, bool record_return) {
+  assert_proper_lock_protection();
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+  assert(chunk != NULL, "insert a NULL chunk");
+  assert(size() == chunk->size(), "wrong size");
+
+  Chunk* oldTail = tail();
+  assert(chunk != oldTail, "double insertion");
+  if (oldTail != NULL) {
+    oldTail->link_after(chunk);
+  } else { // only chunk in list
+    assert(head() == NULL, "inconsistent FreeList");
+    link_head(chunk);
+  }
+  link_tail(chunk);
+  increment_count();  // of # of chunks in list
+  DEBUG_ONLY(
+    if (record_return) {
+      increment_returned_bytes_by(size()*HeapWordSize);
+    }
+  )
+  assert(head() == NULL || head()->prev() == NULL, "list invariant");
+  assert(tail() == NULL || tail()->next() == NULL, "list invariant");
+  assert(head() == NULL || head()->size() == size(), "wrong item on list");
+  assert(tail() == NULL || tail()->size() == size(), "wrong item on list");
+}
+
+template <class Chunk>
+void FreeList<Chunk>::return_chunk_at_tail(Chunk* chunk) {
+  return_chunk_at_tail(chunk, true);
+}
+
+template <class Chunk>
+void FreeList<Chunk>::prepend(FreeList<Chunk>* fl) {
+  assert_proper_lock_protection();
+  if (fl->count() > 0) {
+    if (count() == 0) {
+      set_head(fl->head());
+      set_tail(fl->tail());
+      set_count(fl->count());
+    } else {
+      // Both are non-empty.
+      Chunk* fl_tail = fl->tail();
+      Chunk* this_head = head();
+      assert(fl_tail->next() == NULL, "Well-formedness of fl");
+      fl_tail->link_next(this_head);
+      this_head->link_prev(fl_tail);
+      set_head(fl->head());
+      set_count(count() + fl->count());
+    }
+    fl->set_head(NULL);
+    fl->set_tail(NULL);
+    fl->set_count(0);
+  }
+}
+
+// verify_chunk_in_free_list() is used to verify that an item is in this free list.
+// It is used as a debugging aid.
+template <class Chunk>
+bool FreeList<Chunk>::verify_chunk_in_free_list(Chunk* fc) const {
+  // This is an internal consistency check, not part of the check that the
+  // chunk is in the free lists.
+  guarantee(fc->size() == size(), "Wrong list is being searched");
+  Chunk* curFC = head();
+  while (curFC) {
+    // This is an internal consistency check.
+    guarantee(size() == curFC->size(), "Chunk is in wrong list.");
+    if (fc == curFC) {
+      return true;
+    }
+    curFC = curFC->next();
+  }
+  return false;
+}
+
+#ifndef PRODUCT
+template <class Chunk>
+void FreeList<Chunk>::verify_stats() const {
+  // The +1 of the LH comparand is to allow some "looseness" in
+  // checking: we usually call this interface when adding a block
+  // and we'll subsequently update the stats; we cannot update the
+  // stats beforehand because in the case of the large-block BT
+  // dictionary for example, this might be the first block and
+  // in that case there would be no place that we could record
+  // the stats (which are kept in the block itself).
+  assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births()
+          + _allocation_stats.coal_births() + 1)   // Total Production Stock + 1
+         >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths()
+             + (ssize_t)count()),                // Total Current Stock + depletion
+         err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
+                 " violates Conservation Principle: "
+                 "prev_sweep(" SIZE_FORMAT ")"
+                 " + split_births(" SIZE_FORMAT ")"
+                 " + coal_births(" SIZE_FORMAT ") + 1 >= "
+                 " split_deaths(" SIZE_FORMAT ")"
+                 " coal_deaths(" SIZE_FORMAT ")"
+                 " + count(" SSIZE_FORMAT ")",
+                 this, _size, _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
+                 _allocation_stats.split_births(), _allocation_stats.split_deaths(),
+                 _allocation_stats.coal_deaths(), count()));
+}
+
+template <class Chunk>
+void FreeList<Chunk>::assert_proper_lock_protection_work() const {
+  assert(_protecting_lock != NULL, "Don't call this directly");
+  assert(ParallelGCThreads > 0, "Don't call this directly");
+  Thread* thr = Thread::current();
+  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+    // assert that we are holding the freelist lock
+  } else if (thr->is_GC_task_thread()) {
+    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  } else if (thr->is_Java_thread()) {
+    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+  } else {
+    ShouldNotReachHere();  // unaccounted thread type?
+  }
+}
+#endif
+
+// Print the "label line" for free list stats.
+template <class Chunk>
+void FreeList<Chunk>::print_labels_on(outputStream* st, const char* c) {
+  st->print("%16s\t", c);
+  st->print("%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"
+            "%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"    "%14s\t"    "\n",
+            "bfrsurp", "surplus", "desired", "prvSwep", "bfrSwep",
+            "count",   "cBirths", "cDeaths", "sBirths", "sDeaths");
+}
+
+// Print the AllocationStats for the given free list. If the second argument
+// to the call is a non-null string, it is printed in the first column;
+// otherwise, if the argument is null (the default), then the size of the
+// (free list) block is printed in the first column.
+template <class Chunk>
+void FreeList<Chunk>::print_on(outputStream* st, const char* c) const {
+  if (c != NULL) {
+    st->print("%16s", c);
+  } else {
+    st->print(SIZE_FORMAT_W(16), size());
+  }
+  st->print("\t"
+           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
+           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
+           bfr_surp(),             surplus(),             desired(),             prev_sweep(),           before_sweep(),
+           count(),               coal_births(),          coal_deaths(),          split_births(),         split_deaths());
+}
+
+#ifndef SERIALGC
+// Needs to be after the definitions have been seen.
+template class FreeList<FreeChunk>;
+#endif // SERIALGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/freeList.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_FREELIST_HPP
+#define SHARE_VM_MEMORY_FREELIST_HPP
+
+#include "gc_implementation/shared/allocationStats.hpp"
+
+class CompactibleFreeListSpace;
+
+// A class for maintaining a free list of Chunk's.  The FreeList
+// maintains a the structure of the list (head, tail, etc.) plus
+// statistics for allocations from the list.  The links between items
+// are not part of FreeList.  The statistics are
+// used to make decisions about coalescing Chunk's when they
+// are swept during collection.
+//
+// See the corresponding .cpp file for a description of the specifics
+// for that implementation.
+
+class Mutex;
+template <class Chunk> class TreeList;
+template <class Chunk> class PrintTreeCensusClosure;
+
+template <class Chunk>
+class FreeList VALUE_OBJ_CLASS_SPEC {
+  friend class CompactibleFreeListSpace;
+  friend class VMStructs;
+  friend class PrintTreeCensusClosure<Chunk>;
+
+ private:
+  Chunk*        _head;          // Head of list of free chunks
+  Chunk*        _tail;          // Tail of list of free chunks
+  size_t        _size;          // Size in Heap words of each chunk
+  ssize_t       _count;         // Number of entries in list
+  size_t        _hint;          // next larger size list with a positive surplus
+
+  AllocationStats _allocation_stats; // allocation-related statistics
+
+#ifdef ASSERT
+  Mutex*        _protecting_lock;
+#endif
+
+  // Asserts false if the protecting lock (if any) is not held.
+  void assert_proper_lock_protection_work() const PRODUCT_RETURN;
+  void assert_proper_lock_protection() const {
+#ifdef ASSERT
+    if (_protecting_lock != NULL)
+      assert_proper_lock_protection_work();
+#endif
+  }
+
+  // Initialize the allocation statistics.
+ protected:
+  void init_statistics(bool split_birth = false);
+  void set_count(ssize_t v) { _count = v;}
+  void increment_count()    {
+    _count++;
+  }
+
+  void decrement_count() {
+    _count--;
+    assert(_count >= 0, "Count should not be negative");
+  }
+
+ public:
+  // Constructor
+  // Construct a list without any entries.
+  FreeList();
+  // Construct a list with "fc" as the first (and lone) entry in the list.
+  FreeList(Chunk* fc);
+
+  // Reset the head, tail, hint, and count of a free list.
+  void reset(size_t hint);
+
+  // Declare the current free list to be protected by the given lock.
+#ifdef ASSERT
+  void set_protecting_lock(Mutex* protecting_lock) {
+    _protecting_lock = protecting_lock;
+  }
+#endif
+
+  // Accessors.
+  Chunk* head() const {
+    assert_proper_lock_protection();
+    return _head;
+  }
+  void set_head(Chunk* v) {
+    assert_proper_lock_protection();
+    _head = v;
+    assert(!_head || _head->size() == _size, "bad chunk size");
+  }
+  // Set the head of the list and set the prev field of non-null
+  // values to NULL.
+  void link_head(Chunk* v) {
+    assert_proper_lock_protection();
+    set_head(v);
+    // If this method is not used (just set the head instead),
+    // this check can be avoided.
+    if (v != NULL) {
+      v->link_prev(NULL);
+    }
+  }
+
+  Chunk* tail() const {
+    assert_proper_lock_protection();
+    return _tail;
+  }
+  void set_tail(Chunk* v) {
+    assert_proper_lock_protection();
+    _tail = v;
+    assert(!_tail || _tail->size() == _size, "bad chunk size");
+  }
+  // Set the tail of the list and set the next field of non-null
+  // values to NULL.
+  void link_tail(Chunk* v) {
+    assert_proper_lock_protection();
+    set_tail(v);
+    if (v != NULL) {
+      v->clear_next();
+    }
+  }
+
+  // No locking checks in read-accessors: lock-free reads (only) are benign.
+  // Readers are expected to have the lock if they are doing work that
+  // requires atomicity guarantees in sections of code.
+  size_t size() const {
+    return _size;
+  }
+  void set_size(size_t v) {
+    assert_proper_lock_protection();
+    _size = v;
+  }
+  ssize_t count() const {
+    return _count;
+  }
+  size_t hint() const {
+    return _hint;
+  }
+  void set_hint(size_t v) {
+    assert_proper_lock_protection();
+    assert(v == 0 || _size < v, "Bad hint"); _hint = v;
+  }
+
+  // Accessors for statistics
+  AllocationStats* allocation_stats() {
+    assert_proper_lock_protection();
+    return &_allocation_stats;
+  }
+
+  ssize_t desired() const {
+    return _allocation_stats.desired();
+  }
+  void set_desired(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_desired(v);
+  }
+  void compute_desired(float inter_sweep_current,
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
+    assert_proper_lock_protection();
+    _allocation_stats.compute_desired(_count,
+                                      inter_sweep_current,
+                                      inter_sweep_estimate,
+                                      intra_sweep_estimate);
+  }
+  ssize_t coal_desired() const {
+    return _allocation_stats.coal_desired();
+  }
+  void set_coal_desired(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_desired(v);
+  }
+
+  ssize_t surplus() const {
+    return _allocation_stats.surplus();
+  }
+  void set_surplus(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_surplus(v);
+  }
+  void increment_surplus() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_surplus();
+  }
+  void decrement_surplus() {
+    assert_proper_lock_protection();
+    _allocation_stats.decrement_surplus();
+  }
+
+  ssize_t bfr_surp() const {
+    return _allocation_stats.bfr_surp();
+  }
+  void set_bfr_surp(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_bfr_surp(v);
+  }
+  ssize_t prev_sweep() const {
+    return _allocation_stats.prev_sweep();
+  }
+  void set_prev_sweep(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_prev_sweep(v);
+  }
+  ssize_t before_sweep() const {
+    return _allocation_stats.before_sweep();
+  }
+  void set_before_sweep(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_before_sweep(v);
+  }
+
+  ssize_t coal_births() const {
+    return _allocation_stats.coal_births();
+  }
+  void set_coal_births(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_births(v);
+  }
+  void increment_coal_births() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_coal_births();
+  }
+
+  ssize_t coal_deaths() const {
+    return _allocation_stats.coal_deaths();
+  }
+  void set_coal_deaths(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_deaths(v);
+  }
+  void increment_coal_deaths() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_coal_deaths();
+  }
+
+  ssize_t split_births() const {
+    return _allocation_stats.split_births();
+  }
+  void set_split_births(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_split_births(v);
+  }
+  void increment_split_births() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_split_births();
+  }
+
+  ssize_t split_deaths() const {
+    return _allocation_stats.split_deaths();
+  }
+  void set_split_deaths(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_split_deaths(v);
+  }
+  void increment_split_deaths() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_split_deaths();
+  }
+
+  NOT_PRODUCT(
+    // For debugging.  The "_returned_bytes" in all the lists are summed
+    // and compared with the total number of bytes swept during a
+    // collection.
+    size_t returned_bytes() const { return _allocation_stats.returned_bytes(); }
+    void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); }
+    void increment_returned_bytes_by(size_t v) {
+      _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v);
+    }
+  )
+
+  // Unlink head of list and return it.  Returns NULL if
+  // the list is empty.
+  Chunk* get_chunk_at_head();
+
+  // Remove the first "n" or "count", whichever is smaller, chunks from the
+  // list, setting "fl", which is required to be empty, to point to them.
+  void getFirstNChunksFromList(size_t n, FreeList<Chunk>* fl);
+
+  // Unlink this chunk from it's free list
+  void remove_chunk(Chunk* fc);
+
+  // Add this chunk to this free list.
+  void return_chunk_at_head(Chunk* fc);
+  void return_chunk_at_tail(Chunk* fc);
+
+  // Similar to returnChunk* but also records some diagnostic
+  // information.
+  void return_chunk_at_head(Chunk* fc, bool record_return);
+  void return_chunk_at_tail(Chunk* fc, bool record_return);
+
+  // Prepend "fl" (whose size is required to be the same as that of "this")
+  // to the front of "this" list.
+  void prepend(FreeList<Chunk>* fl);
+
+  // Verify that the chunk is in the list.
+  // found.  Return NULL if "fc" is not found.
+  bool verify_chunk_in_free_list(Chunk* fc) const;
+
+  // Stats verification
+  void verify_stats() const PRODUCT_RETURN;
+
+  // Printing support
+  static void print_labels_on(outputStream* st, const char* c);
+  void print_on(outputStream* st, const char* c = NULL) const;
+};
+
+#endif // SHARE_VM_MEMORY_FREELIST_HPP
--- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -480,26 +480,15 @@
   const size_t perm_prev_used = perm_gen()->used();
 
   print_heap_before_gc();
-  if (Verbose) {
-    gclog_or_tty->print_cr("GC Cause: %s", GCCause::to_string(gc_cause()));
-  }
 
   {
     FlagSetting fl(_is_gc_active, true);
 
     bool complete = full && (max_level == (n_gens()-1));
-    const char* gc_cause_str = "GC ";
-    if (complete) {
-      GCCause::Cause cause = gc_cause();
-      if (cause == GCCause::_java_lang_system_gc) {
-        gc_cause_str = "Full GC (System) ";
-      } else {
-        gc_cause_str = "Full GC ";
-      }
-    }
+    const char* gc_cause_prefix = complete ? "Full GC" : "GC";
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    TraceTime t(gc_cause_str, PrintGCDetails, false, gclog_or_tty);
+    TraceTime t(GCCauseString(gc_cause_prefix, gc_cause()), PrintGCDetails, false, gclog_or_tty);
 
     gc_prologue(complete);
     increment_total_collections(complete);
@@ -688,11 +677,6 @@
 #ifdef TRACESPINNING
   ParallelTaskTerminator::print_termination_counts();
 #endif
-
-  if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
-    tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
-    vm_exit(-1);
-  }
 }
 
 HeapWord* GenCollectedHeap::satisfy_failed_allocation(size_t size, bool is_tlab) {
@@ -1247,18 +1231,18 @@
   return _gens[level]->gc_stats();
 }
 
-void GenCollectedHeap::verify(bool allow_dirty, bool silent, VerifyOption option /* ignored */) {
+void GenCollectedHeap::verify(bool silent, VerifyOption option /* ignored */) {
   if (!silent) {
     gclog_or_tty->print("permgen ");
   }
-  perm_gen()->verify(allow_dirty);
+  perm_gen()->verify();
   for (int i = _n_gens-1; i >= 0; i--) {
     Generation* g = _gens[i];
     if (!silent) {
       gclog_or_tty->print(g->name());
       gclog_or_tty->print(" ");
     }
-    g->verify(allow_dirty);
+    g->verify();
   }
   if (!silent) {
     gclog_or_tty->print("remset ");
--- a/src/share/vm/memory/genCollectedHeap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -357,7 +357,7 @@
   void prepare_for_verify();
 
   // Override.
-  void verify(bool allow_dirty, bool silent, VerifyOption option);
+  void verify(bool silent, VerifyOption option);
 
   // Override.
   virtual void print_on(outputStream* st) const;
--- a/src/share/vm/memory/genMarkSweep.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/genMarkSweep.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -76,7 +76,7 @@
   _ref_processor = rp;
   rp->setup_policy(clear_all_softrefs);
 
-  TraceTime t1("Full GC", PrintGC && !PrintGCDetails, true, gclog_or_tty);
+  TraceTime t1(GCCauseString("Full GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, gclog_or_tty);
 
   // When collecting the permanent generation methodOops may be moving,
   // so we either have to flush all bcp data or convert it into bci.
@@ -203,21 +203,21 @@
 
 #ifdef VALIDATE_MARK_SWEEP
   if (ValidateMarkSweep) {
-    _root_refs_stack    = new (ResourceObj::C_HEAP) GrowableArray<void*>(100, true);
-    _other_refs_stack   = new (ResourceObj::C_HEAP) GrowableArray<void*>(100, true);
-    _adjusted_pointers  = new (ResourceObj::C_HEAP) GrowableArray<void*>(100, true);
-    _live_oops          = new (ResourceObj::C_HEAP) GrowableArray<oop>(100, true);
-    _live_oops_moved_to = new (ResourceObj::C_HEAP) GrowableArray<oop>(100, true);
-    _live_oops_size     = new (ResourceObj::C_HEAP) GrowableArray<size_t>(100, true);
+    _root_refs_stack    = new (ResourceObj::C_HEAP, mtGC) GrowableArray<void*>(100, true);
+    _other_refs_stack   = new (ResourceObj::C_HEAP, mtGC) GrowableArray<void*>(100, true);
+    _adjusted_pointers  = new (ResourceObj::C_HEAP, mtGC) GrowableArray<void*>(100, true);
+    _live_oops          = new (ResourceObj::C_HEAP, mtGC) GrowableArray<oop>(100, true);
+    _live_oops_moved_to = new (ResourceObj::C_HEAP, mtGC) GrowableArray<oop>(100, true);
+    _live_oops_size     = new (ResourceObj::C_HEAP, mtGC) GrowableArray<size_t>(100, true);
   }
   if (RecordMarkSweepCompaction) {
     if (_cur_gc_live_oops == NULL) {
-      _cur_gc_live_oops           = new(ResourceObj::C_HEAP) GrowableArray<HeapWord*>(100, true);
-      _cur_gc_live_oops_moved_to  = new(ResourceObj::C_HEAP) GrowableArray<HeapWord*>(100, true);
-      _cur_gc_live_oops_size      = new(ResourceObj::C_HEAP) GrowableArray<size_t>(100, true);
-      _last_gc_live_oops          = new(ResourceObj::C_HEAP) GrowableArray<HeapWord*>(100, true);
-      _last_gc_live_oops_moved_to = new(ResourceObj::C_HEAP) GrowableArray<HeapWord*>(100, true);
-      _last_gc_live_oops_size     = new(ResourceObj::C_HEAP) GrowableArray<size_t>(100, true);
+      _cur_gc_live_oops           = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
+      _cur_gc_live_oops_moved_to  = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
+      _cur_gc_live_oops_size      = new(ResourceObj::C_HEAP, mtGC) GrowableArray<size_t>(100, true);
+      _last_gc_live_oops          = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
+      _last_gc_live_oops_moved_to = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
+      _last_gc_live_oops_size     = new(ResourceObj::C_HEAP, mtGC) GrowableArray<size_t>(100, true);
     } else {
       _cur_gc_live_oops->clear();
       _cur_gc_live_oops_moved_to->clear();
--- a/src/share/vm/memory/genOopClosures.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/genOopClosures.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -34,10 +34,10 @@
 class CardTableModRefBS;
 class DefNewGeneration;
 
-template<class E, unsigned int N> class GenericTaskQueue;
-typedef GenericTaskQueue<oop, TASKQUEUE_SIZE> OopTaskQueue;
-template<class T> class GenericTaskQueueSet;
-typedef GenericTaskQueueSet<OopTaskQueue> OopTaskQueueSet;
+template<class E, MEMFLAGS F, unsigned int N> class GenericTaskQueue;
+typedef GenericTaskQueue<oop, mtGC, TASKQUEUE_SIZE> OopTaskQueue;
+template<class T, MEMFLAGS F> class GenericTaskQueueSet;
+typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;
 
 // Closure for iterating roots from a particular generation
 // Note: all classes deriving from this MUST call this do_barrier
--- a/src/share/vm/memory/genRemSet.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/genRemSet.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,7 @@
 class OopsInGenClosure;
 class CardTableRS;
 
-class GenRemSet: public CHeapObj {
+class GenRemSet: public CHeapObj<mtGC> {
   friend class Generation;
 
   BarrierSet* _bs;
--- a/src/share/vm/memory/generation.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/generation.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -696,8 +696,8 @@
   the_space()->set_top_for_allocations();
 }
 
-void OneContigSpaceCardGeneration::verify(bool allow_dirty) {
-  the_space()->verify(allow_dirty);
+void OneContigSpaceCardGeneration::verify() {
+  the_space()->verify();
 }
 
 void OneContigSpaceCardGeneration::print_on(outputStream* st)  const {
--- a/src/share/vm/memory/generation.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/generation.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -86,7 +86,7 @@
 };
 
 
-class Generation: public CHeapObj {
+class Generation: public CHeapObj<mtGC> {
   friend class VMStructs;
  private:
   jlong _time_of_last_gc; // time when last gc on this generation happened (ms)
@@ -599,7 +599,7 @@
   virtual void print() const;
   virtual void print_on(outputStream* st) const;
 
-  virtual void verify(bool allow_dirty) = 0;
+  virtual void verify() = 0;
 
   struct StatRecord {
     int invocations;
@@ -753,7 +753,7 @@
 
   virtual void record_spaces_top();
 
-  virtual void verify(bool allow_dirty);
+  virtual void verify();
   virtual void print_on(outputStream* st) const;
 };
 
--- a/src/share/vm/memory/generationSpec.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/generationSpec.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -68,7 +68,7 @@
       ConcurrentMarkSweepGeneration* g = NULL;
       g = new ConcurrentMarkSweepGeneration(rs,
                  init_size(), level, ctrs, UseCMSAdaptiveFreeLists,
-                 (FreeBlockDictionary::DictionaryChoice)CMSDictionaryChoice);
+                 (FreeBlockDictionary<FreeChunk>::DictionaryChoice)CMSDictionaryChoice);
 
       g->initialize_performance_counters();
 
@@ -88,7 +88,7 @@
       ASConcurrentMarkSweepGeneration* g = NULL;
       g = new ASConcurrentMarkSweepGeneration(rs,
                  init_size(), level, ctrs, UseCMSAdaptiveFreeLists,
-                 (FreeBlockDictionary::DictionaryChoice)CMSDictionaryChoice);
+                 (FreeBlockDictionary<FreeChunk>::DictionaryChoice)CMSDictionaryChoice);
 
       g->initialize_performance_counters();
 
@@ -175,7 +175,7 @@
       }
       // XXXPERM
       return new CMSPermGen(perm_rs, init_size, ctrs,
-                   (FreeBlockDictionary::DictionaryChoice)CMSDictionaryChoice);
+                   (FreeBlockDictionary<FreeChunk>::DictionaryChoice)CMSDictionaryChoice);
     }
 #endif // SERIALGC
     default:
--- a/src/share/vm/memory/generationSpec.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/generationSpec.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -32,7 +32,7 @@
 // some generation-specific behavior.  This is done here rather than as a
 // virtual function of Generation because these methods are needed in
 // initialization of the Generations.
-class GenerationSpec : public CHeapObj {
+class GenerationSpec : public CHeapObj<mtGC> {
   friend class VMStructs;
 private:
   Generation::Name _name;
@@ -71,7 +71,7 @@
 // The specification of a permanent generation. This class is very
 // similar to GenerationSpec in use. Due to PermGen's not being a
 // true Generation, we cannot combine the spec classes either.
-class PermanentGenerationSpec : public CHeapObj {
+class PermanentGenerationSpec : public CHeapObj<mtGC> {
   friend class VMStructs;
 private:
   PermGen::Name    _name;
--- a/src/share/vm/memory/heap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/heap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,7 +26,7 @@
 #include "memory/heap.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/os.hpp"
-
+#include "services/memTracker.hpp"
 
 size_t CodeHeap::header_size() {
   return sizeof(HeapBlock);
@@ -130,6 +130,9 @@
   if (!_segmap.initialize(align_to_page_size(_number_of_reserved_segments), align_to_page_size(_number_of_committed_segments))) {
     return false;
   }
+
+  MemTracker::record_virtual_memory_type((address)_segmap.low_boundary(), mtCode);
+
   assert(_segmap.committed_size() >= (size_t) _number_of_committed_segments, "could not commit  enough space for segment map");
   assert(_segmap.reserved_size()  >= (size_t) _number_of_reserved_segments , "could not reserve enough space for segment map");
   assert(_segmap.reserved_size()  >= _segmap.committed_size()     , "just checking");
--- a/src/share/vm/memory/heap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/heap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -77,7 +77,7 @@
   void set_link(FreeBlock* link)             { _link = link; }
 };
 
-class CodeHeap : public CHeapObj {
+class CodeHeap : public CHeapObj<mtCode> {
   friend class VMStructs;
  private:
   VirtualSpace _memory;                          // the memory holding the blocks
--- a/src/share/vm/memory/heapInspection.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/heapInspection.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -116,7 +116,7 @@
 KlassInfoTable::KlassInfoTable(int size, HeapWord* ref) {
   _size = 0;
   _ref = ref;
-  _buckets = NEW_C_HEAP_ARRAY(KlassInfoBucket, size);
+  _buckets = NEW_C_HEAP_ARRAY(KlassInfoBucket, size, mtInternal);
   if (_buckets != NULL) {
     _size = size;
     for (int index = 0; index < _size; index++) {
@@ -130,7 +130,7 @@
     for (int index = 0; index < _size; index++) {
       _buckets[index].empty();
     }
-    FREE_C_HEAP_ARRAY(KlassInfoBucket, _buckets);
+    FREE_C_HEAP_ARRAY(KlassInfoBucket, _buckets, mtInternal);
     _size = 0;
   }
 }
@@ -179,7 +179,7 @@
 
 KlassInfoHisto::KlassInfoHisto(const char* title, int estimatedCount) :
   _title(title) {
-  _elements = new (ResourceObj::C_HEAP) GrowableArray<KlassInfoEntry*>(estimatedCount,true);
+  _elements = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<KlassInfoEntry*>(estimatedCount,true);
 }
 
 KlassInfoHisto::~KlassInfoHisto() {
--- a/src/share/vm/memory/heapInspection.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/heapInspection.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,7 +44,7 @@
 // to KlassInfoEntry's and is used to sort
 // the entries.
 
-class KlassInfoEntry: public CHeapObj {
+class KlassInfoEntry: public CHeapObj<mtInternal> {
  private:
   KlassInfoEntry* _next;
   klassOop        _klass;
@@ -72,7 +72,7 @@
   virtual void do_cinfo(KlassInfoEntry* cie) = 0;
 };
 
-class KlassInfoBucket: public CHeapObj {
+class KlassInfoBucket: public CHeapObj<mtInternal> {
  private:
   KlassInfoEntry* _list;
   KlassInfoEntry* list()           { return _list; }
--- a/src/share/vm/memory/memRegion.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/memRegion.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -99,8 +99,8 @@
 
 class MemRegionClosureRO: public MemRegionClosure {
 public:
-  void* operator new(size_t size, ResourceObj::allocation_type type) {
-        return ResourceObj::operator new(size, type);
+  void* operator new(size_t size, ResourceObj::allocation_type type, MEMFLAGS flags) {
+        return ResourceObj::operator new(size, type, flags);
   }
   void* operator new(size_t size, Arena *arena) {
         return ResourceObj::operator new(size, arena);
--- a/src/share/vm/memory/oopFactory.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/oopFactory.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -127,23 +127,28 @@
 klassOop oopFactory::new_instanceKlass(Symbol* name, int vtable_len, int itable_len,
                                        int static_field_size,
                                        unsigned int nonstatic_oop_map_count,
-                                       ReferenceType rt, TRAPS) {
+                                       AccessFlags access_flags,
+                                       ReferenceType rt,
+                                       KlassHandle host_klass, TRAPS) {
   instanceKlassKlass* ikk = instanceKlassKlass::cast(Universe::instanceKlassKlassObj());
-  return ikk->allocate_instance_klass(name, vtable_len, itable_len, static_field_size, nonstatic_oop_map_count, rt, CHECK_NULL);
+  return ikk->allocate_instance_klass(name, vtable_len, itable_len,
+                                      static_field_size, nonstatic_oop_map_count,
+                                      access_flags, rt, host_klass, CHECK_NULL);
 }
 
 
 constMethodOop oopFactory::new_constMethod(int byte_code_size,
                                            int compressed_line_number_size,
                                            int localvariable_table_length,
+                                           int exception_table_length,
                                            int checked_exceptions_length,
                                            bool is_conc_safe,
                                            TRAPS) {
   klassOop cmkObj = Universe::constMethodKlassObj();
   constMethodKlass* cmk = constMethodKlass::cast(cmkObj);
   return cmk->allocate(byte_code_size, compressed_line_number_size,
-                       localvariable_table_length, checked_exceptions_length,
-                       is_conc_safe,
+                       localvariable_table_length, exception_table_length,
+                       checked_exceptions_length, is_conc_safe,
                        CHECK_NULL);
 }
 
@@ -151,6 +156,7 @@
 methodOop oopFactory::new_method(int byte_code_size, AccessFlags access_flags,
                                  int compressed_line_number_size,
                                  int localvariable_table_length,
+                                 int exception_table_length,
                                  int checked_exceptions_length,
                                  bool is_conc_safe,
                                  TRAPS) {
@@ -160,6 +166,7 @@
   constMethodOop cm = new_constMethod(byte_code_size,
                                       compressed_line_number_size,
                                       localvariable_table_length,
+                                      exception_table_length,
                                       checked_exceptions_length,
                                       is_conc_safe, CHECK_NULL);
   constMethodHandle rw(THREAD, cm);
--- a/src/share/vm/memory/oopFactory.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/oopFactory.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,13 +77,16 @@
                                            int vtable_len, int itable_len,
                                            int static_field_size,
                                            unsigned int nonstatic_oop_map_count,
-                                           ReferenceType rt, TRAPS);
+                                           AccessFlags access_flags,
+                                           ReferenceType rt,
+                                           KlassHandle host_klass, TRAPS);
 
   // Methods
 private:
   static constMethodOop  new_constMethod(int byte_code_size,
                                          int compressed_line_number_size,
                                          int localvariable_table_length,
+                                         int exception_table_length,
                                          int checked_exceptions_length,
                                          bool is_conc_safe,
                                          TRAPS);
@@ -95,6 +98,7 @@
                                     AccessFlags access_flags,
                                     int compressed_line_number_size,
                                     int localvariable_table_length,
+                                    int exception_table_length,
                                     int checked_exceptions_length,
                                     bool is_conc_safe,
                                     TRAPS);
--- a/src/share/vm/memory/permGen.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/permGen.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -42,7 +42,7 @@
 
 // PermGen models the part of the heap used to allocate class meta-data.
 
-class PermGen : public CHeapObj {
+class PermGen : public CHeapObj<mtGC> {
   friend class VMStructs;
  protected:
   size_t _capacity_expansion_limit;  // maximum expansion allowed without a
--- a/src/share/vm/memory/referencePolicy.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/referencePolicy.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,7 +29,7 @@
 // should be cleared.
 
 
-class ReferencePolicy : public CHeapObj {
+class ReferencePolicy : public CHeapObj<mtGC> {
  public:
   virtual bool should_clear_reference(oop p, jlong timestamp_clock) {
     ShouldNotReachHere();
--- a/src/share/vm/memory/referenceProcessor.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/referenceProcessor.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -108,7 +108,8 @@
   _num_q               = MAX2(1U, mt_processing_degree);
   _max_num_q           = MAX2(_num_q, mt_discovery_degree);
   _discovered_refs     = NEW_C_HEAP_ARRAY(DiscoveredList,
-                                          _max_num_q * number_of_subclasses_of_ref());
+            _max_num_q * number_of_subclasses_of_ref(), mtGC);
+
   if (_discovered_refs == NULL) {
     vm_exit_during_initialization("Could not allocated RefProc Array");
   }
--- a/src/share/vm/memory/referenceProcessor.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/referenceProcessor.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -203,7 +203,7 @@
   }
 };
 
-class ReferenceProcessor : public CHeapObj {
+class ReferenceProcessor : public CHeapObj<mtGC> {
  protected:
   // Compatibility with pre-4965777 JDK's
   static bool _pending_list_uses_discovered_field;
--- a/src/share/vm/memory/resourceArea.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/resourceArea.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -75,7 +75,7 @@
     if (UseMallocOnly) {
       // use malloc, but save pointer in res. area for later freeing
       char** save = (char**)internal_malloc_4(sizeof(char*));
-      return (*save = (char*)os::malloc(size));
+      return (*save = (char*)os::malloc(size, mtThread));
     }
 #endif
     return (char*)Amalloc(size);
@@ -93,18 +93,17 @@
   ResourceArea *_area;          // Resource area to stack allocate
   Chunk *_chunk;                // saved arena chunk
   char *_hwm, *_max;
-  NOT_PRODUCT(size_t _size_in_bytes;)
+  size_t _size_in_bytes;
 
   void initialize(Thread *thread) {
     _area = thread->resource_area();
     _chunk = _area->_chunk;
     _hwm = _area->_hwm;
     _max= _area->_max;
-    NOT_PRODUCT(_size_in_bytes = _area->size_in_bytes();)
+    _size_in_bytes = _area->size_in_bytes();
     debug_only(_area->_nesting++;)
     assert( _area->_nesting > 0, "must stack allocate RMs" );
   }
-
  public:
 
 #ifndef ASSERT
@@ -120,7 +119,7 @@
 
   ResourceMark( ResourceArea *r ) :
     _area(r), _chunk(r->_chunk), _hwm(r->_hwm), _max(r->_max) {
-    NOT_PRODUCT(_size_in_bytes = _area->size_in_bytes();)
+    _size_in_bytes = r->_size_in_bytes;
     debug_only(_area->_nesting++;)
     assert( _area->_nesting > 0, "must stack allocate RMs" );
   }
@@ -148,7 +147,7 @@
 
  private:
   void free_malloced_objects()                                         PRODUCT_RETURN;
-  size_t size_in_bytes()       NOT_PRODUCT({ return _size_in_bytes; }) PRODUCT_RETURN0;
+  size_t size_in_bytes() { return _size_in_bytes; }
 };
 
 //------------------------------DeoptResourceMark-----------------------------------
@@ -180,19 +179,19 @@
 // and they would be stack allocated. This leaves open the possibilty of accidental
 // misuse so we simple duplicate the ResourceMark functionality here.
 
-class DeoptResourceMark: public CHeapObj {
+class DeoptResourceMark: public CHeapObj<mtInternal> {
 protected:
   ResourceArea *_area;          // Resource area to stack allocate
   Chunk *_chunk;                // saved arena chunk
   char *_hwm, *_max;
-  NOT_PRODUCT(size_t _size_in_bytes;)
+  size_t _size_in_bytes;
 
   void initialize(Thread *thread) {
     _area = thread->resource_area();
     _chunk = _area->_chunk;
     _hwm = _area->_hwm;
     _max= _area->_max;
-    NOT_PRODUCT(_size_in_bytes = _area->size_in_bytes();)
+    _size_in_bytes = _area->size_in_bytes();
     debug_only(_area->_nesting++;)
     assert( _area->_nesting > 0, "must stack allocate RMs" );
   }
@@ -212,7 +211,7 @@
 
   DeoptResourceMark( ResourceArea *r ) :
     _area(r), _chunk(r->_chunk), _hwm(r->_hwm), _max(r->_max) {
-    NOT_PRODUCT(_size_in_bytes = _area->size_in_bytes();)
+    _size_in_bytes = _area->size_in_bytes();
     debug_only(_area->_nesting++;)
     assert( _area->_nesting > 0, "must stack allocate RMs" );
   }
@@ -240,7 +239,7 @@
 
  private:
   void free_malloced_objects()                                         PRODUCT_RETURN;
-  size_t size_in_bytes()       NOT_PRODUCT({ return _size_in_bytes; }) PRODUCT_RETURN0;
+  size_t size_in_bytes() { return _size_in_bytes; };
 };
 
 #endif // SHARE_VM_MEMORY_RESOURCEAREA_HPP
--- a/src/share/vm/memory/restore.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/restore.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -132,7 +132,7 @@
   buffer += sizeof(intptr_t);
   int number_of_entries = *(intptr_t*)buffer;
   buffer += sizeof(intptr_t);
-  SymbolTable::create_table((HashtableBucket*)buffer, symbolTableLen,
+  SymbolTable::create_table((HashtableBucket<mtSymbol>*)buffer, symbolTableLen,
                             number_of_entries);
   buffer += symbolTableLen;
 
@@ -144,7 +144,7 @@
   buffer += sizeof(intptr_t);
   number_of_entries = *(intptr_t*)buffer;
   buffer += sizeof(intptr_t);
-  StringTable::create_table((HashtableBucket*)buffer, stringTableLen,
+  StringTable::create_table((HashtableBucket<mtSymbol>*)buffer, stringTableLen,
                             number_of_entries);
   buffer += stringTableLen;
 
@@ -157,7 +157,7 @@
   buffer += sizeof(intptr_t);
   number_of_entries = *(intptr_t*)buffer;
   buffer += sizeof(intptr_t);
-  SystemDictionary::set_shared_dictionary((HashtableBucket*)buffer,
+  SystemDictionary::set_shared_dictionary((HashtableBucket<mtClass>*)buffer,
                                           sharedDictionaryLen,
                                           number_of_entries);
   buffer += sharedDictionaryLen;
@@ -171,7 +171,7 @@
   buffer += sizeof(intptr_t);
   number_of_entries = *(intptr_t*)buffer;
   buffer += sizeof(intptr_t);
-  ClassLoader::create_package_info_table((HashtableBucket*)buffer, pkgInfoLen,
+  ClassLoader::create_package_info_table((HashtableBucket<mtClass>*)buffer, pkgInfoLen,
                                          number_of_entries);
   buffer += pkgInfoLen;
   ClassLoader::verify();
--- a/src/share/vm/memory/space.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/space.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -531,7 +531,7 @@
               bottom(), top(), _offsets.threshold(), end());
 }
 
-void ContiguousSpace::verify(bool allow_dirty) const {
+void ContiguousSpace::verify() const {
   HeapWord* p = bottom();
   HeapWord* t = top();
   HeapWord* prev_p = NULL;
@@ -965,27 +965,12 @@
   initialize(mr, SpaceDecorator::Clear, SpaceDecorator::Mangle);
 }
 
-
-class VerifyOldOopClosure : public OopClosure {
- public:
-  oop  _the_obj;
-  bool _allow_dirty;
-  void do_oop(oop* p) {
-    _the_obj->verify_old_oop(p, _allow_dirty);
-  }
-  void do_oop(narrowOop* p) {
-    _the_obj->verify_old_oop(p, _allow_dirty);
-  }
-};
-
 #define OBJ_SAMPLE_INTERVAL 0
 #define BLOCK_SAMPLE_INTERVAL 100
 
-void OffsetTableContigSpace::verify(bool allow_dirty) const {
+void OffsetTableContigSpace::verify() const {
   HeapWord* p = bottom();
   HeapWord* prev_p = NULL;
-  VerifyOldOopClosure blk;      // Does this do anything?
-  blk._allow_dirty = allow_dirty;
   int objs = 0;
   int blocks = 0;
 
@@ -1007,8 +992,6 @@
 
     if (objs == OBJ_SAMPLE_INTERVAL) {
       oop(p)->verify();
-      blk._the_obj = oop(p);
-      oop(p)->oop_iterate(&blk);
       objs = 0;
     } else {
       objs++;
--- a/src/share/vm/memory/space.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/space.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -105,7 +105,7 @@
 // bottom() <= top() <= end()
 // top() is inclusive and end() is exclusive.
 
-class Space: public CHeapObj {
+class Space: public CHeapObj<mtGC> {
   friend class VMStructs;
  protected:
   HeapWord* _bottom;
@@ -306,7 +306,7 @@
   }
 
   // Debugging
-  virtual void verify(bool allow_dirty) const = 0;
+  virtual void verify() const = 0;
 };
 
 // A MemRegionClosure (ResourceObj) whose "do_MemRegion" function applies an
@@ -880,10 +880,17 @@
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
   // iterates on objects up to the safe limit
   HeapWord* object_iterate_careful(ObjectClosureCareful* cl);
-  inline HeapWord* concurrent_iteration_safe_limit();
+  HeapWord* concurrent_iteration_safe_limit() {
+    assert(_concurrent_iteration_safe_limit <= top(),
+           "_concurrent_iteration_safe_limit update missed");
+    return _concurrent_iteration_safe_limit;
+  }
   // changes the safe limit, all objects from bottom() to the new
   // limit should be properly initialized
-  inline void set_concurrent_iteration_safe_limit(HeapWord* new_limit);
+  void set_concurrent_iteration_safe_limit(HeapWord* new_limit) {
+    assert(new_limit <= top(), "uninitialized objects in the safe range");
+    _concurrent_iteration_safe_limit = new_limit;
+  }
 
 #ifndef SERIALGC
   // In support of parallel oop_iterate.
@@ -948,7 +955,7 @@
   }
 
   // Debugging
-  virtual void verify(bool allow_dirty) const;
+  virtual void verify() const;
 
   // Used to increase collection frequency.  "factor" of 0 means entire
   // space.
@@ -1100,7 +1107,7 @@
   virtual void print_on(outputStream* st) const;
 
   // Debugging
-  void verify(bool allow_dirty) const;
+  void verify() const;
 
   // Shared space support
   void serialize_block_offset_array_offsets(SerializeOopClosure* soc);
--- a/src/share/vm/memory/space.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/space.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -67,17 +67,4 @@
   return _offsets.block_start(p);
 }
 
-inline HeapWord* ContiguousSpace::concurrent_iteration_safe_limit()
-{
-  assert(_concurrent_iteration_safe_limit <= top(),
-         "_concurrent_iteration_safe_limit update missed");
-  return _concurrent_iteration_safe_limit;
-}
-
-inline void ContiguousSpace::set_concurrent_iteration_safe_limit(HeapWord* new_limit)
-{
-  assert(new_limit <= top(), "uninitialized objects in the safe range");
-  _concurrent_iteration_safe_limit = new_limit;
-}
-
 #endif // SHARE_VM_MEMORY_SPACE_INLINE_HPP
--- a/src/share/vm/memory/tenuredGeneration.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/tenuredGeneration.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "gc_implementation/shared/collectorCounters.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
 #include "memory/generation.inline.hpp"
@@ -65,7 +65,7 @@
   if (UseParNewGC && ParallelGCThreads > 0) {
     typedef ParGCAllocBufferWithBOT* ParGCAllocBufferWithBOTPtr;
     _alloc_buffers = NEW_C_HEAP_ARRAY(ParGCAllocBufferWithBOTPtr,
-                                      ParallelGCThreads);
+                                      ParallelGCThreads, mtGC);
     if (_alloc_buffers == NULL)
       vm_exit_during_initialization("Could not allocate alloc_buffers");
     for (uint i = 0; i < ParallelGCThreads; i++) {
--- a/src/share/vm/memory/threadLocalAllocBuffer.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,7 +36,7 @@
 //            It is thread-private at any time, but maybe multiplexed over
 //            time across multiple threads. The park()/unpark() pair is
 //            used to make it avaiable for such multiplexing.
-class ThreadLocalAllocBuffer: public CHeapObj {
+class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
   friend class VMStructs;
 private:
   HeapWord* _start;                              // address of TLAB
@@ -172,7 +172,7 @@
   void verify();
 };
 
-class GlobalTLABStats: public CHeapObj {
+class GlobalTLABStats: public CHeapObj<mtThread> {
 private:
 
   // Accumulate perfdata in private variables because
--- a/src/share/vm/memory/universe.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/universe.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -764,7 +764,7 @@
 
   FileMapInfo* mapinfo = NULL;
   if (UseSharedSpaces) {
-    mapinfo = NEW_C_HEAP_OBJ(FileMapInfo);
+    mapinfo = NEW_C_HEAP_OBJ(FileMapInfo, mtInternal);
     memset(mapinfo, 0, sizeof(FileMapInfo));
 
     // Open the shared archive file, read and validate the header. If
@@ -1326,7 +1326,7 @@
   st->print_cr("}");
 }
 
-void Universe::verify(bool allow_dirty, bool silent, VerifyOption option) {
+void Universe::verify(bool silent, VerifyOption option) {
   if (SharedSkipVerify) {
     return;
   }
@@ -1350,7 +1350,7 @@
   if (!silent) gclog_or_tty->print("[Verifying ");
   if (!silent) gclog_or_tty->print("threads ");
   Threads::verify();
-  heap()->verify(allow_dirty, silent, option);
+  heap()->verify(silent, option);
 
   if (!silent) gclog_or_tty->print("syms ");
   SymbolTable::verify();
@@ -1546,7 +1546,7 @@
     // This is the first previous version so make some space.
     // Start with 2 elements under the assumption that the class
     // won't be redefined much.
-    _prev_methods = new (ResourceObj::C_HEAP) GrowableArray<jweak>(2, true);
+    _prev_methods = new (ResourceObj::C_HEAP, mtClass) GrowableArray<jweak>(2, true);
   }
 
   // RC_TRACE macro has an embedded ResourceMark
--- a/src/share/vm/memory/universe.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/memory/universe.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,7 +43,7 @@
 // Common parts of a methodOop cache. This cache safely interacts with
 // the RedefineClasses API.
 //
-class CommonMethodOopCache : public CHeapObj {
+class CommonMethodOopCache : public CHeapObj<mtClass> {
   // We save the klassOop and the idnum of methodOop in order to get
   // the current cached methodOop.
  private:
@@ -412,7 +412,7 @@
 
   // Debugging
   static bool verify_in_progress() { return _verify_in_progress; }
-  static void verify(bool allow_dirty = true, bool silent = false,
+  static void verify(bool silent = false,
                      VerifyOption option = VerifyOption_Default );
   static int  verify_count()       { return _verify_count; }
   // The default behavior is to call print_on() on gclog_or_tty.
@@ -455,7 +455,7 @@
   static int base_vtable_size()               { return _base_vtable_size; }
 };
 
-class DeferredObjAllocEvent : public CHeapObj {
+class DeferredObjAllocEvent : public CHeapObj<mtInternal> {
   private:
     oop    _oop;
     size_t _bytesize;
--- a/src/share/vm/oops/constMethodKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/constMethodKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,6 +65,7 @@
 constMethodOop constMethodKlass::allocate(int byte_code_size,
                                           int compressed_line_number_size,
                                           int localvariable_table_length,
+                                          int exception_table_length,
                                           int checked_exceptions_length,
                                           bool is_conc_safe,
                                           TRAPS) {
@@ -72,6 +73,7 @@
   int size = constMethodOopDesc::object_size(byte_code_size,
                                              compressed_line_number_size,
                                              localvariable_table_length,
+                                             exception_table_length,
                                              checked_exceptions_length);
   KlassHandle h_k(THREAD, as_klassOop());
   constMethodOop cm = (constMethodOop)
@@ -80,14 +82,14 @@
   No_Safepoint_Verifier no_safepoint;
   cm->set_interpreter_kind(Interpreter::invalid);
   cm->init_fingerprint();
-  cm->set_method(NULL);
+  cm->set_constants(NULL);
   cm->set_stackmap_data(NULL);
-  cm->set_exception_table(NULL);
   cm->set_code_size(byte_code_size);
   cm->set_constMethod_size(size);
   cm->set_inlined_tables_length(checked_exceptions_length,
                                 compressed_line_number_size,
-                                localvariable_table_length);
+                                localvariable_table_length,
+                                exception_table_length);
   assert(cm->size() == size, "wrong size for object");
   cm->set_is_conc_safe(is_conc_safe);
   cm->set_partially_loaded();
@@ -98,9 +100,8 @@
 void constMethodKlass::oop_follow_contents(oop obj) {
   assert (obj->is_constMethod(), "object must be constMethod");
   constMethodOop cm = constMethodOop(obj);
-  MarkSweep::mark_and_push(cm->adr_method());
+  MarkSweep::mark_and_push(cm->adr_constants());
   MarkSweep::mark_and_push(cm->adr_stackmap_data());
-  MarkSweep::mark_and_push(cm->adr_exception_table());
   // Performance tweak: We skip iterating over the klass pointer since we
   // know that Universe::constMethodKlassObj never moves.
 }
@@ -110,9 +111,8 @@
                                            oop obj) {
   assert (obj->is_constMethod(), "object must be constMethod");
   constMethodOop cm_oop = constMethodOop(obj);
-  PSParallelCompact::mark_and_push(cm, cm_oop->adr_method());
+  PSParallelCompact::mark_and_push(cm, cm_oop->adr_constants());
   PSParallelCompact::mark_and_push(cm, cm_oop->adr_stackmap_data());
-  PSParallelCompact::mark_and_push(cm, cm_oop->adr_exception_table());
   // Performance tweak: We skip iterating over the klass pointer since we
   // know that Universe::constMethodKlassObj never moves.
 }
@@ -121,9 +121,8 @@
 int constMethodKlass::oop_oop_iterate(oop obj, OopClosure* blk) {
   assert (obj->is_constMethod(), "object must be constMethod");
   constMethodOop cm = constMethodOop(obj);
-  blk->do_oop(cm->adr_method());
+  blk->do_oop(cm->adr_constants());
   blk->do_oop(cm->adr_stackmap_data());
-  blk->do_oop(cm->adr_exception_table());
   // Get size before changing pointers.
   // Don't call size() or oop_size() since that is a virtual call.
   int size = cm->object_size();
@@ -135,12 +134,10 @@
   assert (obj->is_constMethod(), "object must be constMethod");
   constMethodOop cm = constMethodOop(obj);
   oop* adr;
-  adr = cm->adr_method();
+  adr = cm->adr_constants();
   if (mr.contains(adr)) blk->do_oop(adr);
   adr = cm->adr_stackmap_data();
   if (mr.contains(adr)) blk->do_oop(adr);
-  adr = cm->adr_exception_table();
-  if (mr.contains(adr)) blk->do_oop(adr);
   // Get size before changing pointers.
   // Don't call size() or oop_size() since that is a virtual call.
   int size = cm->object_size();
@@ -153,9 +150,8 @@
 int constMethodKlass::oop_adjust_pointers(oop obj) {
   assert(obj->is_constMethod(), "should be constMethod");
   constMethodOop cm = constMethodOop(obj);
-  MarkSweep::adjust_pointer(cm->adr_method());
+  MarkSweep::adjust_pointer(cm->adr_constants());
   MarkSweep::adjust_pointer(cm->adr_stackmap_data());
-  MarkSweep::adjust_pointer(cm->adr_exception_table());
   // Get size before changing pointers.
   // Don't call size() or oop_size() since that is a virtual call.
   int size = cm->object_size();
@@ -188,9 +184,8 @@
   assert(obj->is_constMethod(), "must be constMethod");
   Klass::oop_print_on(obj, st);
   constMethodOop m = constMethodOop(obj);
-  st->print(" - method:       " INTPTR_FORMAT " ", (address)m->method());
-  m->method()->print_value_on(st); st->cr();
-  st->print(" - exceptions:   " INTPTR_FORMAT "\n", (address)m->exception_table());
+  st->print(" - constants:       " INTPTR_FORMAT " ", (address)m->constants());
+  m->constants()->print_value_on(st); st->cr();
   if (m->has_stackmap_table()) {
     st->print(" - stackmap data:       ");
     m->stackmap_data()->print_value_on(st);
@@ -223,13 +218,11 @@
   // Verification can occur during oop construction before the method or
   // other fields have been initialized.
   if (!obj->partially_loaded()) {
-    guarantee(m->method()->is_perm(), "should be in permspace");
-    guarantee(m->method()->is_method(), "should be method");
+    guarantee(m->constants()->is_perm(), "should be in permspace");
+    guarantee(m->constants()->is_constantPool(), "should be constant pool");
     typeArrayOop stackmap_data = m->stackmap_data();
     guarantee(stackmap_data == NULL ||
               stackmap_data->is_perm(),  "should be in permspace");
-    guarantee(m->exception_table()->is_perm(), "should be in permspace");
-    guarantee(m->exception_table()->is_typeArray(), "should be type array");
 
     address m_end = (address)((oop*) m + m->size());
     address compressed_table_start = m->code_end();
@@ -244,11 +237,15 @@
       compressed_table_end += stream.position();
     }
     guarantee(compressed_table_end <= m_end, "invalid method layout");
-    // Verify checked exceptions and local variable tables
+    // Verify checked exceptions, exception table and local variable tables
     if (m->has_checked_exceptions()) {
       u2* addr = m->checked_exceptions_length_addr();
       guarantee(*addr > 0 && (address) addr >= compressed_table_end && (address) addr < m_end, "invalid method layout");
     }
+    if (m->has_exception_handler()) {
+      u2* addr = m->exception_table_length_addr();
+      guarantee(*addr > 0 && (address) addr >= compressed_table_end && (address) addr < m_end, "invalid method layout");
+    }
     if (m->has_localvariable_table()) {
       u2* addr = m->localvariable_table_length_addr();
       guarantee(*addr > 0 && (address) addr >= compressed_table_end && (address) addr < m_end, "invalid method layout");
@@ -257,12 +254,12 @@
     u2* uncompressed_table_start;
     if (m->has_localvariable_table()) {
       uncompressed_table_start = (u2*) m->localvariable_table_start();
-    } else {
-      if (m->has_checked_exceptions()) {
+    } else if (m->has_exception_handler()) {
+      uncompressed_table_start = (u2*) m->exception_table_start();
+    } else if (m->has_checked_exceptions()) {
         uncompressed_table_start = (u2*) m->checked_exceptions_start();
-      } else {
+    } else {
         uncompressed_table_start = (u2*) m_end;
-      }
     }
     int gap = (intptr_t) uncompressed_table_start - (intptr_t) compressed_table_end;
     int max_gap = align_object_size(1)*BytesPerWord;
@@ -273,8 +270,8 @@
 bool constMethodKlass::oop_partially_loaded(oop obj) const {
   assert(obj->is_constMethod(), "object must be klass");
   constMethodOop m = constMethodOop(obj);
-  // check whether exception_table points to self (flag for partially loaded)
-  return m->exception_table() == (typeArrayOop)obj;
+  // check whether stackmap_data points to self (flag for partially loaded)
+  return m->stackmap_data() == (typeArrayOop)obj;
 }
 
 
@@ -282,6 +279,6 @@
 void constMethodKlass::oop_set_partially_loaded(oop obj) {
   assert(obj->is_constMethod(), "object must be klass");
   constMethodOop m = constMethodOop(obj);
-  // Temporarily set exception_table to point to self
-  m->set_exception_table((typeArrayOop)obj);
+  // Temporarily set stackmap_data to point to self
+  m->set_stackmap_data((typeArrayOop)obj);
 }
--- a/src/share/vm/oops/constMethodKlass.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/constMethodKlass.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,6 +46,7 @@
   DEFINE_ALLOCATE_PERMANENT(constMethodKlass);
   constMethodOop allocate(int byte_code_size, int compressed_line_number_size,
                           int localvariable_table_length,
+                          int exception_table_length,
                           int checked_exceptions_length,
                           bool is_conc_safe,
                           TRAPS);
--- a/src/share/vm/oops/constMethodOop.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/constMethodOop.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
 int constMethodOopDesc::object_size(int code_size,
                                     int compressed_line_number_size,
                                     int local_variable_table_length,
+                                    int exception_table_length,
                                     int checked_exceptions_length) {
   int extra_bytes = code_size;
   if (compressed_line_number_size > 0) {
@@ -49,10 +50,18 @@
     extra_bytes +=
               local_variable_table_length * sizeof(LocalVariableTableElement);
   }
+  if (exception_table_length > 0) {
+    extra_bytes += sizeof(u2);
+    extra_bytes += exception_table_length * sizeof(ExceptionTableElement);
+  }
   int extra_words = align_size_up(extra_bytes, BytesPerWord) / BytesPerWord;
   return align_object_size(header_size() + extra_words);
 }
 
+methodOop constMethodOopDesc::method() const {
+    return instanceKlass::cast(_constants->pool_holder())->method_with_idnum(
+                               _method_idnum);
+  }
 
 // linenumber table - note that length is unknown until decompression,
 // see class CompressedLineNumberReadStream.
@@ -69,23 +78,40 @@
   return last_u2_element();
 }
 
-u2* constMethodOopDesc::localvariable_table_length_addr() const {
-  assert(has_localvariable_table(), "called only if table is present");
+u2* constMethodOopDesc::exception_table_length_addr() const {
+  assert(has_exception_handler(), "called only if table is present");
   if (has_checked_exceptions()) {
     // If checked_exception present, locate immediately before them.
     return (u2*) checked_exceptions_start() - 1;
   } else {
-    // Else, the linenumber table is at the end of the constMethod.
+    // Else, the exception table is at the end of the constMethod.
     return last_u2_element();
   }
 }
 
+u2* constMethodOopDesc::localvariable_table_length_addr() const {
+  assert(has_localvariable_table(), "called only if table is present");
+  if (has_exception_handler()) {
+    // If exception_table present, locate immediately before them.
+    return (u2*) exception_table_start() - 1;
+  } else {
+    if (has_checked_exceptions()) {
+      // If checked_exception present, locate immediately before them.
+      return (u2*) checked_exceptions_start() - 1;
+    } else {
+      // Else, the linenumber table is at the end of the constMethod.
+      return last_u2_element();
+    }
+  }
+}
+
 
 // Update the flags to indicate the presence of these optional fields.
 void constMethodOopDesc::set_inlined_tables_length(
                                               int checked_exceptions_len,
                                               int compressed_line_number_size,
-                                              int localvariable_table_len) {
+                                              int localvariable_table_len,
+                                              int exception_table_len) {
   // Must be done in the order below, otherwise length_addr accessors
   // will not work. Only set bit in header if length is positive.
   assert(_flags == 0, "Error");
@@ -96,6 +122,10 @@
     _flags |= _has_checked_exceptions;
     *(checked_exceptions_length_addr()) = checked_exceptions_len;
   }
+  if (exception_table_len > 0) {
+    _flags |= _has_exception_table;
+    *(exception_table_length_addr()) = exception_table_len;
+  }
   if (localvariable_table_len > 0) {
     _flags |= _has_localvariable_table;
     *(localvariable_table_length_addr()) = localvariable_table_len;
@@ -129,3 +159,15 @@
   addr -= length * sizeof(LocalVariableTableElement) / sizeof(u2);
   return (LocalVariableTableElement*) addr;
 }
+
+int constMethodOopDesc::exception_table_length() const {
+  return has_exception_handler() ? *(exception_table_length_addr()) : 0;
+}
+
+ExceptionTableElement* constMethodOopDesc::exception_table_start() const {
+  u2* addr = exception_table_length_addr();
+  u2 length = *addr;
+  assert(length > 0, "should only be called if table is present");
+  addr -= length * sizeof(ExceptionTableElement) / sizeof(u2);
+  return (ExceptionTableElement*)addr;
+}
--- a/src/share/vm/oops/constMethodOop.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/constMethodOop.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,9 +41,8 @@
 // |------------------------------------------------------|
 // | fingerprint 1                                        |
 // | fingerprint 2                                        |
-// | method                         (oop)                 |
+// | constants                      (oop)                 |
 // | stackmap_data                  (oop)                 |
-// | exception_table                (oop)                 |
 // | constMethod_size                                     |
 // | interp_kind  | flags    | code_size                  |
 // | name index              | signature index            |
@@ -64,7 +63,13 @@
 // |  (length is u2, elements are 6-tuples of u2)         |
 // |  (see class LocalVariableTableElement)               |
 // |  (access flags bit tells whether table is present)   |
-// |  (indexed from end of contMethodOop)                 |
+// |  (indexed from end of constMethodOop)                |
+// |------------------------------------------------------|
+// | exception table + length (length last)               |
+// |  (length is u2, elements are 4-tuples of u2)         |
+// |  (see class ExceptionTableElement)                   |
+// |  (access flags bit tells whether table is present)   |
+// |  (indexed from end of constMethodOop)                |
 // |------------------------------------------------------|
 // | checked exceptions elements + length (length last)   |
 // |  (length is u2, elements are u2)                     |
@@ -93,6 +98,15 @@
 };
 
 
+// Utitily class describing elements in exception table
+class ExceptionTableElement VALUE_OBJ_CLASS_SPEC {
+ public:
+  u2 start_pc;
+  u2 end_pc;
+  u2 handler_pc;
+  u2 catch_type_index;
+};
+
 class constMethodOopDesc : public oopDesc {
   friend class constMethodKlass;
   friend class VMStructs;
@@ -100,7 +114,8 @@
   enum {
     _has_linenumber_table = 1,
     _has_checked_exceptions = 2,
-    _has_localvariable_table = 4
+    _has_localvariable_table = 4,
+    _has_exception_table = 8
   };
 
   // Bit vector of signature
@@ -113,25 +128,19 @@
   volatile bool     _is_conc_safe; // if true, safe for concurrent GC processing
 
 public:
-  oop* oop_block_beg() const { return adr_method(); }
-  oop* oop_block_end() const { return adr_exception_table() + 1; }
+  oop* oop_block_beg() const { return adr_constants(); }
+  oop* oop_block_end() const { return adr_stackmap_data() + 1; }
 
 private:
   //
   // The oop block.  See comment in klass.hpp before making changes.
   //
 
-  // Backpointer to non-const methodOop (needed for some JVMTI operations)
-  methodOop         _method;
+  constantPoolOop   _constants;                  // Constant pool
 
   // Raw stackmap data for the method
   typeArrayOop      _stackmap_data;
 
-  // The exception handler table. 4-tuples of ints [start_pc, end_pc,
-  // handler_pc, catch_type index] For methods with no exceptions the
-  // table is pointing to Universe::the_empty_int_array
-  typeArrayOop      _exception_table;
-
   //
   // End of the oop block.
   //
@@ -153,7 +162,8 @@
   // Inlined tables
   void set_inlined_tables_length(int checked_exceptions_len,
                                  int compressed_line_number_size,
-                                 int localvariable_table_len);
+                                 int localvariable_table_len,
+                                 int exception_table_len);
 
   bool has_linenumber_table() const
     { return (_flags & _has_linenumber_table) != 0; }
@@ -164,13 +174,19 @@
   bool has_localvariable_table() const
     { return (_flags & _has_localvariable_table) != 0; }
 
+  bool has_exception_handler() const
+    { return (_flags & _has_exception_table) != 0; }
+
   void set_interpreter_kind(int kind)      { _interpreter_kind = kind; }
   int  interpreter_kind(void) const        { return _interpreter_kind; }
 
-  // backpointer to non-const methodOop
-  methodOop method() const                 { return _method; }
-  void set_method(methodOop m)             { oop_store_without_check((oop*)&_method, (oop) m); }
+  // constant pool
+  constantPoolOop constants() const        { return _constants; }
+  void set_constants(constantPoolOop c)    {
+    oop_store_without_check((oop*)&_constants, (oop)c);
+  }
 
+  methodOop method() const;
 
   // stackmap table data
   typeArrayOop stackmap_data() const { return _stackmap_data; }
@@ -179,11 +195,6 @@
   }
   bool has_stackmap_table() const { return _stackmap_data != NULL; }
 
-  // exception handler table
-  typeArrayOop exception_table() const           { return _exception_table; }
-  void set_exception_table(typeArrayOop e)       { oop_store_without_check((oop*) &_exception_table, (oop) e); }
-  bool has_exception_handler() const             { return exception_table() != NULL && exception_table()->length() > 0; }
-
   void init_fingerprint() {
     const uint64_t initval = CONST64(0x8000000000000000);
     _fingerprint = initval;
@@ -233,6 +244,7 @@
   // Object size needed
   static int object_size(int code_size, int compressed_line_number_size,
                          int local_variable_table_length,
+                         int exception_table_length,
                          int checked_exceptions_length);
 
   int object_size() const                 { return _constMethod_size; }
@@ -254,6 +266,7 @@
   u_char* compressed_linenumber_table() const;         // not preserved by gc
   u2* checked_exceptions_length_addr() const;
   u2* localvariable_table_length_addr() const;
+  u2* exception_table_length_addr() const;
 
   // checked exceptions
   int checked_exceptions_length() const;
@@ -263,6 +276,10 @@
   int localvariable_table_length() const;
   LocalVariableTableElement* localvariable_table_start() const;
 
+  // exception table
+  int exception_table_length() const;
+  ExceptionTableElement* exception_table_start() const;
+
   // byte codes
   void    set_code(address code) {
     if (code_size() > 0) {
@@ -278,13 +295,12 @@
                             { return in_ByteSize(sizeof(constMethodOopDesc)); }
 
   // interpreter support
-  static ByteSize exception_table_offset()
-               { return byte_offset_of(constMethodOopDesc, _exception_table); }
+  static ByteSize constants_offset()
+               { return byte_offset_of(constMethodOopDesc, _constants); }
 
   // Garbage collection support
-  oop*  adr_method() const             { return (oop*)&_method;          }
+  oop*  adr_constants() const          { return (oop*)&_constants; }
   oop*  adr_stackmap_data() const      { return (oop*)&_stackmap_data;   }
-  oop*  adr_exception_table() const    { return (oop*)&_exception_table; }
   bool is_conc_safe() { return _is_conc_safe; }
   void set_is_conc_safe(bool v) { _is_conc_safe = v; }
 
--- a/src/share/vm/oops/constantPoolOop.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/constantPoolOop.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -267,25 +267,61 @@
 
 
 methodOop constantPoolOopDesc::method_at_if_loaded(constantPoolHandle cpool,
-                                                   int which, Bytecodes::Code invoke_code) {
+                                                   int which) {
   assert(!constantPoolCacheOopDesc::is_secondary_index(which), "no indy instruction here");
   if (cpool->cache() == NULL)  return NULL;  // nothing to load yet
-  int cache_index = which - CPCACHE_INDEX_TAG;
+  int cache_index = get_cpcache_index(which);
   if (!(cache_index >= 0 && cache_index < cpool->cache()->length())) {
     if (PrintMiscellaneous && (Verbose||WizardMode)) {
-      tty->print_cr("bad operand %d for %d in:", which, invoke_code); cpool->print();
+      tty->print_cr("bad operand %d in:", which); cpool->print();
     }
     return NULL;
   }
   ConstantPoolCacheEntry* e = cpool->cache()->entry_at(cache_index);
-  if (invoke_code != Bytecodes::_illegal)
-    return e->get_method_if_resolved(invoke_code, cpool);
-  Bytecodes::Code bc;
-  if ((bc = e->bytecode_1()) != (Bytecodes::Code)0)
-    return e->get_method_if_resolved(bc, cpool);
-  if ((bc = e->bytecode_2()) != (Bytecodes::Code)0)
-    return e->get_method_if_resolved(bc, cpool);
-  return NULL;
+  return e->method_if_resolved(cpool);
+}
+
+
+bool constantPoolOopDesc::has_appendix_at_if_loaded(constantPoolHandle cpool, int which) {
+  if (cpool->cache() == NULL)  return false;  // nothing to load yet
+  // XXX Is there a simpler way to get to the secondary entry?
+  ConstantPoolCacheEntry* e;
+  if (constantPoolCacheOopDesc::is_secondary_index(which)) {
+    e = cpool->cache()->secondary_entry_at(which);
+  } else {
+    int cache_index = get_cpcache_index(which);
+    if (!(cache_index >= 0 && cache_index < cpool->cache()->length())) {
+      if (PrintMiscellaneous && (Verbose||WizardMode)) {
+        tty->print_cr("bad operand %d in:", which); cpool->print();
+      }
+      return false;
+    }
+    e = cpool->cache()->entry_at(cache_index);
+  }
+  return e->has_appendix();
+}
+
+
+oop constantPoolOopDesc::appendix_at_if_loaded(constantPoolHandle cpool, int which) {
+  if (cpool->cache() == NULL)  return NULL;  // nothing to load yet
+  // XXX Is there a simpler way to get to the secondary entry?
+  ConstantPoolCacheEntry* e;
+  if (constantPoolCacheOopDesc::is_secondary_index(which)) {
+    e = cpool->cache()->secondary_entry_at(which);
+  } else {
+    int cache_index = get_cpcache_index(which);
+    if (!(cache_index >= 0 && cache_index < cpool->cache()->length())) {
+      if (PrintMiscellaneous && (Verbose||WizardMode)) {
+        tty->print_cr("bad operand %d in:", which); cpool->print();
+      }
+      return NULL;
+    }
+    e = cpool->cache()->entry_at(cache_index);
+  }
+  if (!e->has_appendix()) {
+    return NULL;
+  }
+  return e->f1_as_instance();
 }
 
 
@@ -481,7 +517,7 @@
   if (cache_index >= 0) {
     assert(index == _no_index_sentinel, "only one kind of index at a time");
     ConstantPoolCacheEntry* cpc_entry = this_oop->cache()->entry_at(cache_index);
-    result_oop = cpc_entry->f1();
+    result_oop = cpc_entry->f1_as_instance();
     if (result_oop != NULL) {
       return decode_exception_from_f1(result_oop, THREAD);
       // That was easy...
@@ -553,12 +589,7 @@
                       index, this_oop->method_type_index_at(index),
                       signature->as_C_string());
       KlassHandle klass(THREAD, this_oop->pool_holder());
-      bool ignore_is_on_bcp = false;
-      Handle value = SystemDictionary::find_method_handle_type(signature,
-                                                               klass,
-                                                               false,
-                                                               ignore_is_on_bcp,
-                                                               THREAD);
+      Handle value = SystemDictionary::find_method_handle_type(signature, klass, THREAD);
       if (HAS_PENDING_EXCEPTION) {
         throw_exception = Handle(THREAD, PENDING_EXCEPTION);
         CLEAR_PENDING_EXCEPTION;
@@ -608,7 +639,7 @@
     result_oop = NULL;  // safety
     ObjectLocker ol(this_oop, THREAD);
     ConstantPoolCacheEntry* cpc_entry = this_oop->cache()->entry_at(cache_index);
-    result_oop = cpc_entry->f1();
+    result_oop = cpc_entry->f1_as_instance();
     // Benign race condition:  f1 may already be filled in while we were trying to lock.
     // The important thing here is that all threads pick up the same result.
     // It doesn't matter which racing thread wins, as long as only one
@@ -627,6 +658,45 @@
   }
 }
 
+
+oop constantPoolOopDesc::resolve_bootstrap_specifier_at_impl(constantPoolHandle this_oop, int index, TRAPS) {
+  assert(this_oop->tag_at(index).is_invoke_dynamic(), "Corrupted constant pool");
+
+  Handle bsm;
+  int argc;
+  {
+    // JVM_CONSTANT_InvokeDynamic is an ordered pair of [bootm, name&type], plus optional arguments
+    // The bootm, being a JVM_CONSTANT_MethodHandle, has its own cache entry.
+    // It is accompanied by the optional arguments.
+    int bsm_index = this_oop->invoke_dynamic_bootstrap_method_ref_index_at(index);
+    oop bsm_oop = this_oop->resolve_possibly_cached_constant_at(bsm_index, CHECK_NULL);
+    if (!java_lang_invoke_MethodHandle::is_instance(bsm_oop)) {
+      THROW_MSG_NULL(vmSymbols::java_lang_LinkageError(), "BSM not an MethodHandle");
+    }
+
+    // Extract the optional static arguments.
+    argc = this_oop->invoke_dynamic_argument_count_at(index);
+    if (argc == 0)  return bsm_oop;
+
+    bsm = Handle(THREAD, bsm_oop);
+  }
+
+  objArrayHandle info;
+  {
+    objArrayOop info_oop = oopFactory::new_objArray(SystemDictionary::Object_klass(), 1+argc, CHECK_NULL);
+    info = objArrayHandle(THREAD, info_oop);
+  }
+
+  info->obj_at_put(0, bsm());
+  for (int i = 0; i < argc; i++) {
+    int arg_index = this_oop->invoke_dynamic_argument_index_at(index, i);
+    oop arg_oop = this_oop->resolve_possibly_cached_constant_at(arg_index, CHECK_NULL);
+    info->obj_at_put(1+i, arg_oop);
+  }
+
+  return info();
+}
+
 oop constantPoolOopDesc::string_at_impl(constantPoolHandle this_oop, int which, TRAPS) {
   oop str = NULL;
   CPSlot entry = this_oop->slot_at(which);
--- a/src/share/vm/oops/constantPoolOop.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/constantPoolOop.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -642,6 +642,11 @@
     return resolve_constant_at_impl(h_this, pool_index, _possible_index_sentinel, THREAD);
   }
 
+  oop resolve_bootstrap_specifier_at(int index, TRAPS) {
+    constantPoolHandle h_this(THREAD, this);
+    return resolve_bootstrap_specifier_at_impl(h_this, index, THREAD);
+  }
+
   // Klass name matches name at offset
   bool klass_name_at_matches(instanceKlassHandle k, int which);
 
@@ -666,12 +671,13 @@
   friend class SystemDictionary;
 
   // Used by compiler to prevent classloading.
-  static methodOop method_at_if_loaded        (constantPoolHandle this_oop, int which,
-                                               Bytecodes::Code bc = Bytecodes::_illegal);
-  static klassOop klass_at_if_loaded          (constantPoolHandle this_oop, int which);
-  static klassOop klass_ref_at_if_loaded      (constantPoolHandle this_oop, int which);
+  static methodOop       method_at_if_loaded      (constantPoolHandle this_oop, int which);
+  static bool      has_appendix_at_if_loaded      (constantPoolHandle this_oop, int which);
+  static oop           appendix_at_if_loaded      (constantPoolHandle this_oop, int which);
+  static klassOop         klass_at_if_loaded      (constantPoolHandle this_oop, int which);
+  static klassOop     klass_ref_at_if_loaded      (constantPoolHandle this_oop, int which);
   // Same as above - but does LinkResolving.
-  static klassOop klass_ref_at_if_loaded_check(constantPoolHandle this_oop, int which, TRAPS);
+  static klassOop     klass_ref_at_if_loaded_check(constantPoolHandle this_oop, int which, TRAPS);
 
   // Routines currently used for annotations (only called by jvm.cpp) but which might be used in the
   // future by other Java code. These take constant pool indices rather than
@@ -697,6 +703,8 @@
   enum { CPCACHE_INDEX_TAG = 0 };        // in product mode, this zero value is a no-op
 #endif //ASSERT
 
+  static int get_cpcache_index(int index) { return index - CPCACHE_INDEX_TAG; }
+
  private:
 
   Symbol* impl_name_ref_at(int which, bool uncached);
@@ -729,6 +737,7 @@
   static void resolve_string_constants_impl(constantPoolHandle this_oop, TRAPS);
 
   static oop resolve_constant_at_impl(constantPoolHandle this_oop, int index, int cache_index, TRAPS);
+  static oop resolve_bootstrap_specifier_at_impl(constantPoolHandle this_oop, int index, TRAPS);
 
  public:
   // Merging constantPoolOop support:
@@ -764,7 +773,7 @@
                         unsigned char *bytes);
 };
 
-class SymbolHashMapEntry : public CHeapObj {
+class SymbolHashMapEntry : public CHeapObj<mtSymbol> {
  private:
   unsigned int        _hash;   // 32-bit hash for item
   SymbolHashMapEntry* _next;   // Next element in the linked list for this bucket
@@ -790,7 +799,7 @@
 }; // End SymbolHashMapEntry class
 
 
-class SymbolHashMapBucket : public CHeapObj {
+class SymbolHashMapBucket : public CHeapObj<mtSymbol> {
 
 private:
   SymbolHashMapEntry*    _entry;
@@ -803,7 +812,7 @@
 }; // End SymbolHashMapBucket class
 
 
-class SymbolHashMap: public CHeapObj {
+class SymbolHashMap: public CHeapObj<mtSymbol> {
 
  private:
   // Default number of entries in the table
@@ -816,7 +825,7 @@
 
   void initialize_table(int table_size) {
     _table_size = table_size;
-    _buckets = NEW_C_HEAP_ARRAY(SymbolHashMapBucket, table_size);
+    _buckets = NEW_C_HEAP_ARRAY(SymbolHashMapBucket, table_size, mtSymbol);
     for (int index = 0; index < table_size; index++) {
       _buckets[index].clear();
     }
--- a/src/share/vm/oops/cpCacheOop.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/cpCacheOop.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "oops/objArrayOop.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/handles.inline.hpp"
 
 
@@ -44,68 +45,61 @@
 
 void ConstantPoolCacheEntry::initialize_secondary_entry(int main_index) {
   assert(0 <= main_index && main_index < 0x10000, "sanity check");
-  _indices = (main_index << 16);
+  _indices = (main_index << main_cp_index_bits);
   assert(main_entry_index() == main_index, "");
 }
 
-int ConstantPoolCacheEntry::as_flags(TosState state, bool is_final,
-                    bool is_vfinal, bool is_volatile,
-                    bool is_method_interface, bool is_method) {
-  int f = state;
-
-  assert( state < number_of_states, "Invalid state in as_flags");
-
-  f <<= 1;
-  if (is_final) f |= 1;
-  f <<= 1;
-  if (is_vfinal) f |= 1;
-  f <<= 1;
-  if (is_volatile) f |= 1;
-  f <<= 1;
-  if (is_method_interface) f |= 1;
-  f <<= 1;
-  if (is_method) f |= 1;
-  f <<= ConstantPoolCacheEntry::hotSwapBit;
+int ConstantPoolCacheEntry::make_flags(TosState state,
+                                       int option_bits,
+                                       int field_index_or_method_params) {
+  assert(state < number_of_states, "Invalid state in make_flags");
+  int f = ((int)state << tos_state_shift) | option_bits | field_index_or_method_params;
   // Preserve existing flag bit values
+  // The low bits are a field offset, or else the method parameter size.
 #ifdef ASSERT
-  int old_state = ((_flags >> tosBits) & 0x0F);
-  assert(old_state == 0 || old_state == state,
+  TosState old_state = flag_state();
+  assert(old_state == (TosState)0 || old_state == state,
          "inconsistent cpCache flags state");
 #endif
   return (_flags | f) ;
 }
 
 void ConstantPoolCacheEntry::set_bytecode_1(Bytecodes::Code code) {
+  assert(!is_secondary_entry(), "must not overwrite main_entry_index");
 #ifdef ASSERT
   // Read once.
   volatile Bytecodes::Code c = bytecode_1();
   assert(c == 0 || c == code || code == 0, "update must be consistent");
 #endif
   // Need to flush pending stores here before bytecode is written.
-  OrderAccess::release_store_ptr(&_indices, _indices | ((u_char)code << 16));
+  OrderAccess::release_store_ptr(&_indices, _indices | ((u_char)code << bytecode_1_shift));
 }
 
 void ConstantPoolCacheEntry::set_bytecode_2(Bytecodes::Code code) {
+  assert(!is_secondary_entry(), "must not overwrite main_entry_index");
 #ifdef ASSERT
   // Read once.
   volatile Bytecodes::Code c = bytecode_2();
   assert(c == 0 || c == code || code == 0, "update must be consistent");
 #endif
   // Need to flush pending stores here before bytecode is written.
-  OrderAccess::release_store_ptr(&_indices, _indices | ((u_char)code << 24));
+  OrderAccess::release_store_ptr(&_indices, _indices | ((u_char)code << bytecode_2_shift));
 }
 
-// Atomically sets f1 if it is still NULL, otherwise it keeps the
-// current value.
-void ConstantPoolCacheEntry::set_f1_if_null_atomic(oop f1) {
+// Sets f1, ordering with previous writes.
+void ConstantPoolCacheEntry::release_set_f1(oop f1) {
   // Use barriers as in oop_store
+  assert(f1 != NULL, "");
   oop* f1_addr = (oop*) &_f1;
   update_barrier_set_pre(f1_addr, f1);
-  void* result = Atomic::cmpxchg_ptr(f1, f1_addr, NULL);
-  bool success = (result == NULL);
-  if (success) {
-    update_barrier_set((void*) f1_addr, f1);
-  }
+  OrderAccess::release_store_ptr((intptr_t*)f1_addr, f1);
+  update_barrier_set((void*) f1_addr, f1);
+}
+
+// Sets flags, but only if the value was previously zero.
+bool ConstantPoolCacheEntry::init_flags_atomic(intptr_t flags) {
+  intptr_t result = Atomic::cmpxchg_ptr(flags, &_flags, 0);
+  return (result == 0);
 }
 
 #ifdef ASSERT
@@ -135,17 +129,32 @@
                                        bool is_volatile) {
   set_f1(field_holder()->java_mirror());
   set_f2(field_offset);
-  assert(field_index <= field_index_mask,
+  assert((field_index & field_index_mask) == field_index,
          "field index does not fit in low flag bits");
-  set_flags(as_flags(field_type, is_final, false, is_volatile, false, false) |
-            (field_index & field_index_mask));
+  set_field_flags(field_type,
+                  ((is_volatile ? 1 : 0) << is_volatile_shift) |
+                  ((is_final    ? 1 : 0) << is_final_shift),
+                  field_index);
   set_bytecode_1(get_code);
   set_bytecode_2(put_code);
   NOT_PRODUCT(verify(tty));
 }
 
-int  ConstantPoolCacheEntry::field_index() const {
-  return (_flags & field_index_mask);
+void ConstantPoolCacheEntry::set_parameter_size(int value) {
+  // This routine is called only in corner cases where the CPCE is not yet initialized.
+  // See AbstractInterpreter::deopt_continue_after_entry.
+  assert(_flags == 0 || parameter_size() == 0 || parameter_size() == value,
+         err_msg("size must not change: parameter_size=%d, value=%d", parameter_size(), value));
+  // Setting the parameter size by itself is only safe if the
+  // current value of _flags is 0, otherwise another thread may have
+  // updated it and we don't want to overwrite that value.  Don't
+  // bother trying to update it once it's nonzero but always make
+  // sure that the final parameter size agrees with what was passed.
+  if (_flags == 0) {
+    Atomic::cmpxchg_ptr((value & parameter_size_mask), &_flags, 0);
+  }
+  guarantee(parameter_size() == value,
+            err_msg("size must not change: parameter_size=%d, value=%d", parameter_size(), value));
 }
 
 void ConstantPoolCacheEntry::set_method(Bytecodes::Code invoke_code,
@@ -154,51 +163,51 @@
   assert(!is_secondary_entry(), "");
   assert(method->interpreter_entry() != NULL, "should have been set at this point");
   assert(!method->is_obsolete(),  "attempt to write obsolete method to cpCache");
-  bool change_to_virtual = (invoke_code == Bytecodes::_invokeinterface);
 
   int byte_no = -1;
-  bool needs_vfinal_flag = false;
+  bool change_to_virtual = false;
+
   switch (invoke_code) {
+    case Bytecodes::_invokeinterface:
+      // We get here from InterpreterRuntime::resolve_invoke when an invokeinterface
+      // instruction somehow links to a non-interface method (in Object).
+      // In that case, the method has no itable index and must be invoked as a virtual.
+      // Set a flag to keep track of this corner case.
+      change_to_virtual = true;
+
+      // ...and fall through as if we were handling invokevirtual:
     case Bytecodes::_invokevirtual:
-    case Bytecodes::_invokeinterface: {
+      {
         if (method->can_be_statically_bound()) {
-          set_f2((intptr_t)method());
-          needs_vfinal_flag = true;
+          // set_f2_as_vfinal_method checks if is_vfinal flag is true.
+          set_method_flags(as_TosState(method->result_type()),
+                           (                             1      << is_vfinal_shift) |
+                           ((method->is_final_method() ? 1 : 0) << is_final_shift)  |
+                           ((change_to_virtual         ? 1 : 0) << is_forced_virtual_shift),
+                           method()->size_of_parameters());
+          set_f2_as_vfinal_method(method());
         } else {
           assert(vtable_index >= 0, "valid index");
+          assert(!method->is_final_method(), "sanity");
+          set_method_flags(as_TosState(method->result_type()),
+                           ((change_to_virtual ? 1 : 0) << is_forced_virtual_shift),
+                           method()->size_of_parameters());
           set_f2(vtable_index);
         }
         byte_no = 2;
         break;
-    }
-
-    case Bytecodes::_invokedynamic:  // similar to _invokevirtual
-      if (TraceInvokeDynamic) {
-        tty->print_cr("InvokeDynamic set_method%s method="PTR_FORMAT" index=%d",
-                      (is_secondary_entry() ? " secondary" : ""),
-                      (intptr_t)method(), vtable_index);
-        method->print();
-        this->print(tty, 0);
       }
-      assert(method->can_be_statically_bound(), "must be a MH invoker method");
-      assert(_f2 >= constantPoolOopDesc::CPCACHE_INDEX_TAG, "BSM index initialized");
-      // SystemDictionary::find_method_handle_invoke only caches
-      // methods which signature classes are on the boot classpath,
-      // otherwise the newly created method is returned.  To avoid
-      // races in that case we store the first one coming in into the
-      // cp-cache atomically if it's still unset.
-      set_f1_if_null_atomic(method());
-      needs_vfinal_flag = false;  // _f2 is not an oop
-      assert(!is_vfinal(), "f2 not an oop");
-      byte_no = 1;  // coordinate this with bytecode_number & is_resolved
-      break;
 
     case Bytecodes::_invokespecial:
-      // Preserve the value of the vfinal flag on invokevirtual bytecode
-      // which may be shared with this constant pool cache entry.
-      needs_vfinal_flag = is_resolved(Bytecodes::_invokevirtual) && is_vfinal();
-      // fall through
     case Bytecodes::_invokestatic:
+      // Note:  Read and preserve the value of the is_vfinal flag on any
+      // invokevirtual bytecode shared with this constant pool cache entry.
+      // It is cheap and safe to consult is_vfinal() at all times.
+      // Once is_vfinal is set, it must stay that way, lest we get a dangling oop.
+      set_method_flags(as_TosState(method->result_type()),
+                       ((is_vfinal()               ? 1 : 0) << is_vfinal_shift) |
+                       ((method->is_final_method() ? 1 : 0) << is_final_shift),
+                       method()->size_of_parameters());
       set_f1(method());
       byte_no = 1;
       break;
@@ -207,19 +216,14 @@
       break;
   }
 
-  set_flags(as_flags(as_TosState(method->result_type()),
-                     method->is_final_method(),
-                     needs_vfinal_flag,
-                     false,
-                     change_to_virtual,
-                     true)|
-            method()->size_of_parameters());
-
   // Note:  byte_no also appears in TemplateTable::resolve.
   if (byte_no == 1) {
+    assert(invoke_code != Bytecodes::_invokevirtual &&
+           invoke_code != Bytecodes::_invokeinterface, "");
     set_bytecode_1(invoke_code);
   } else if (byte_no == 2)  {
     if (change_to_virtual) {
+      assert(invoke_code == Bytecodes::_invokeinterface, "");
       // NOTE: THIS IS A HACK - BE VERY CAREFUL!!!
       //
       // Workaround for the case where we encounter an invokeinterface, but we
@@ -235,10 +239,11 @@
       // Otherwise, the method needs to be reresolved with caller for each
       // interface call.
       if (method->is_public()) set_bytecode_1(invoke_code);
-      set_bytecode_2(Bytecodes::_invokevirtual);
     } else {
-      set_bytecode_2(invoke_code);
+      assert(invoke_code == Bytecodes::_invokevirtual, "");
     }
+    // set up for invokevirtual, even if linking for invokeinterface also:
+    set_bytecode_2(Bytecodes::_invokevirtual);
   } else {
     ShouldNotReachHere();
   }
@@ -250,73 +255,129 @@
   assert(!is_secondary_entry(), "");
   klassOop interf = method->method_holder();
   assert(instanceKlass::cast(interf)->is_interface(), "must be an interface");
+  assert(!method->is_final_method(), "interfaces do not have final methods; cannot link to one here");
   set_f1(interf);
   set_f2(index);
-  set_flags(as_flags(as_TosState(method->result_type()), method->is_final_method(), false, false, false, true) | method()->size_of_parameters());
+  set_method_flags(as_TosState(method->result_type()),
+                   0,  // no option bits
+                   method()->size_of_parameters());
   set_bytecode_1(Bytecodes::_invokeinterface);
 }
 
 
-void ConstantPoolCacheEntry::initialize_bootstrap_method_index_in_cache(int bsm_cache_index) {
-  assert(!is_secondary_entry(), "only for JVM_CONSTANT_InvokeDynamic main entry");
-  assert(_f2 == 0, "initialize once");
-  assert(bsm_cache_index == (int)(u2)bsm_cache_index, "oob");
-  set_f2(bsm_cache_index + constantPoolOopDesc::CPCACHE_INDEX_TAG);
+void ConstantPoolCacheEntry::set_method_handle(methodHandle adapter, Handle appendix) {
+  assert(!is_secondary_entry(), "");
+  set_method_handle_common(Bytecodes::_invokehandle, adapter, appendix);
 }
 
-int ConstantPoolCacheEntry::bootstrap_method_index_in_cache() {
-  assert(!is_secondary_entry(), "only for JVM_CONSTANT_InvokeDynamic main entry");
-  intptr_t bsm_cache_index = (intptr_t) _f2 - constantPoolOopDesc::CPCACHE_INDEX_TAG;
-  assert(bsm_cache_index == (intptr_t)(u2)bsm_cache_index, "oob");
-  return (int) bsm_cache_index;
+void ConstantPoolCacheEntry::set_dynamic_call(methodHandle adapter, Handle appendix) {
+  assert(is_secondary_entry(), "");
+  set_method_handle_common(Bytecodes::_invokedynamic, adapter, appendix);
 }
 
-void ConstantPoolCacheEntry::set_dynamic_call(Handle call_site, methodHandle signature_invoker) {
-  assert(is_secondary_entry(), "");
-  // NOTE: it's important that all other values are set before f1 is
-  // set since some users short circuit on f1 being set
-  // (i.e. non-null) and that may result in uninitialized values for
-  // other racing threads (e.g. flags).
-  int param_size = signature_invoker->size_of_parameters();
-  assert(param_size >= 1, "method argument size must include MH.this");
-  param_size -= 1;  // do not count MH.this; it is not stacked for invokedynamic
-  bool is_final = true;
-  assert(signature_invoker->is_final_method(), "is_final");
-  int flags = as_flags(as_TosState(signature_invoker->result_type()), is_final, false, false, false, true) | param_size;
-  assert(_flags == 0 || _flags == flags, "flags should be the same");
-  set_flags(flags);
-  // do not do set_bytecode on a secondary CP cache entry
-  //set_bytecode_1(Bytecodes::_invokedynamic);
-  set_f1_if_null_atomic(call_site());  // This must be the last one to set (see NOTE above)!
+void ConstantPoolCacheEntry::set_method_handle_common(Bytecodes::Code invoke_code, methodHandle adapter, Handle appendix) {
+  // NOTE: This CPCE can be the subject of data races.
+  // There are three words to update: flags, f2, f1 (in that order).
+  // Writers must store all other values before f1.
+  // Readers must test f1 first for non-null before reading other fields.
+  // Competing writers must acquire exclusive access on the first
+  // write, to flags, using a compare/exchange.
+  // A losing writer must spin until the winner writes f1,
+  // so that when he returns, he can use the linked cache entry.
+
+  bool has_appendix = appendix.not_null();
+  if (!has_appendix) {
+    // The extra argument is not used, but we need a non-null value to signify linkage state.
+    // Set it to something benign that will never leak memory.
+    appendix = Universe::void_mirror();
+  }
+
+  bool owner =
+    init_method_flags_atomic(as_TosState(adapter->result_type()),
+                   ((has_appendix ?  1 : 0) << has_appendix_shift) |
+                   (                 1      << is_vfinal_shift)    |
+                   (                 1      << is_final_shift),
+                   adapter->size_of_parameters());
+  if (!owner) {
+    while (is_f1_null()) {
+      // Pause momentarily on a low-level lock, to allow racing thread to win.
+      MutexLockerEx mu(Patching_lock, Mutex::_no_safepoint_check_flag);
+      os::yield();
+    }
+    return;
+  }
+
+  if (TraceInvokeDynamic) {
+    tty->print_cr("set_method_handle bc=%d appendix="PTR_FORMAT"%s method="PTR_FORMAT" ",
+                  invoke_code,
+                  (intptr_t)appendix(), (has_appendix ? "" : " (unused)"),
+                  (intptr_t)adapter());
+    adapter->print();
+    if (has_appendix)  appendix()->print();
+  }
+
+  // Method handle invokes and invokedynamic sites use both cp cache words.
+  // f1, if not null, contains a value passed as a trailing argument to the adapter.
+  // In the general case, this could be the call site's MethodType,
+  // for use with java.lang.Invokers.checkExactType, or else a CallSite object.
+  // f2 contains the adapter method which manages the actual call.
+  // In the general case, this is a compiled LambdaForm.
+  // (The Java code is free to optimize these calls by binding other
+  // sorts of methods and appendices to call sites.)
+  // JVM-level linking is via f2, as if for invokevfinal, and signatures are erased.
+  // The appendix argument (if any) is added to the signature, and is counted in the parameter_size bits.
+  // In principle this means that the method (with appendix) could take up to 256 parameter slots.
+  //
+  // This means that given a call site like (List)mh.invoke("foo"),
+  // the f2 method has signature '(Ljl/Object;Ljl/invoke/MethodType;)Ljl/Object;',
+  // not '(Ljava/lang/String;)Ljava/util/List;'.
+  // The fact that String and List are involved is encoded in the MethodType in f1.
+  // This allows us to create fewer method oops, while keeping type safety.
+  //
+  set_f2_as_vfinal_method(adapter());
+  assert(appendix.not_null(), "needed for linkage state");
+  release_set_f1(appendix());  // This must be the last one to set (see NOTE above)!
+  if (!is_secondary_entry()) {
+    // The interpreter assembly code does not check byte_2,
+    // but it is used by is_resolved, method_if_resolved, etc.
+    set_bytecode_2(invoke_code);
+  }
+  NOT_PRODUCT(verify(tty));
+  if (TraceInvokeDynamic) {
+    this->print(tty, 0);
+  }
 }
 
-
-methodOop ConstantPoolCacheEntry::get_method_if_resolved(Bytecodes::Code invoke_code, constantPoolHandle cpool) {
-  assert(invoke_code > (Bytecodes::Code)0, "bad query");
+methodOop ConstantPoolCacheEntry::method_if_resolved(constantPoolHandle cpool) {
   if (is_secondary_entry()) {
-    return cpool->cache()->entry_at(main_entry_index())->get_method_if_resolved(invoke_code, cpool);
+    if (!is_f1_null())
+      return f2_as_vfinal_method();
+    return NULL;
   }
   // Decode the action of set_method and set_interface_call
-  if (bytecode_1() == invoke_code) {
+  Bytecodes::Code invoke_code = bytecode_1();
+  if (invoke_code != (Bytecodes::Code)0) {
     oop f1 = _f1;
     if (f1 != NULL) {
       switch (invoke_code) {
       case Bytecodes::_invokeinterface:
         assert(f1->is_klass(), "");
-        return klassItable::method_for_itable_index(klassOop(f1), (int) f2());
+        return klassItable::method_for_itable_index(klassOop(f1), f2_as_index());
       case Bytecodes::_invokestatic:
       case Bytecodes::_invokespecial:
+        assert(!has_appendix(), "");
         assert(f1->is_method(), "");
         return methodOop(f1);
       }
     }
   }
-  if (bytecode_2() == invoke_code) {
+  invoke_code = bytecode_2();
+  if (invoke_code != (Bytecodes::Code)0) {
     switch (invoke_code) {
     case Bytecodes::_invokevirtual:
       if (is_vfinal()) {
         // invokevirtual
-        methodOop m = methodOop((intptr_t) f2());
+        methodOop m = f2_as_vfinal_method();
         assert(m->is_method(), "");
         return m;
       } else {
@@ -325,16 +386,19 @@
           klassOop klass = cpool->resolved_klass_at(holder_index);
           if (!Klass::cast(klass)->oop_is_instance())
             klass = SystemDictionary::Object_klass();
-          return instanceKlass::cast(klass)->method_at_vtable((int) f2());
+          return instanceKlass::cast(klass)->method_at_vtable(f2_as_index());
         }
       }
+      break;
+    case Bytecodes::_invokehandle:
+    case Bytecodes::_invokedynamic:
+      return f2_as_vfinal_method();
     }
   }
   return NULL;
 }
 
 
-
 class LocalOopClosure: public OopClosure {
  private:
   void (*_f)(oop*);
@@ -419,9 +483,10 @@
        methodOop new_method, bool * trace_name_printed) {
 
   if (is_vfinal()) {
-    // virtual and final so f2() contains method ptr instead of vtable index
-    if (f2() == (intptr_t)old_method) {
+    // virtual and final so _f2 contains method ptr instead of vtable index
+    if (f2_as_vfinal_method() == old_method) {
       // match old_method so need an update
+      // NOTE: can't use set_f2_as_vfinal_method as it asserts on different values
       _f2 = (intptr_t)new_method;
       if (RC_TRACE_IN_RANGE(0x00100000, 0x00400000)) {
         if (!(*trace_name_printed)) {
@@ -479,16 +544,17 @@
   methodOop m = NULL;
   if (is_vfinal()) {
     // virtual and final so _f2 contains method ptr instead of vtable index
-    m = (methodOop)_f2;
-  } else if ((oop)_f1 == NULL) {
+    m = f2_as_vfinal_method();
+  } else if (is_f1_null()) {
     // NULL _f1 means this is a virtual entry so also not interesting
     return false;
   } else {
-    if (!((oop)_f1)->is_method()) {
+    oop f1 = _f1;  // _f1 is volatile
+    if (!f1->is_method()) {
       // _f1 can also contain a klassOop for an interface
       return false;
     }
-    m = (methodOop)_f1;
+    m = f1_as_method();
   }
 
   assert(m != NULL && m->is_method(), "sanity check");
@@ -504,17 +570,17 @@
 
 void ConstantPoolCacheEntry::print(outputStream* st, int index) const {
   // print separator
-  if (index == 0) tty->print_cr("                 -------------");
+  if (index == 0) st->print_cr("                 -------------");
   // print entry
-  tty->print("%3d  ("PTR_FORMAT")  ", index, (intptr_t)this);
+  st->print("%3d  ("PTR_FORMAT")  ", index, (intptr_t)this);
   if (is_secondary_entry())
-    tty->print_cr("[%5d|secondary]", main_entry_index());
+    st->print_cr("[%5d|secondary]", main_entry_index());
   else
-    tty->print_cr("[%02x|%02x|%5d]", bytecode_2(), bytecode_1(), constant_pool_index());
-  tty->print_cr("                 [   "PTR_FORMAT"]", (intptr_t)(oop)_f1);
-  tty->print_cr("                 [   "PTR_FORMAT"]", (intptr_t)_f2);
-  tty->print_cr("                 [   "PTR_FORMAT"]", (intptr_t)_flags);
-  tty->print_cr("                 -------------");
+    st->print_cr("[%02x|%02x|%5d]", bytecode_2(), bytecode_1(), constant_pool_index());
+  st->print_cr("                 [   "PTR_FORMAT"]", (intptr_t)(oop)_f1);
+  st->print_cr("                 [   "PTR_FORMAT"]", (intptr_t)_f2);
+  st->print_cr("                 [   "PTR_FORMAT"]", (intptr_t)_flags);
+  st->print_cr("                 -------------");
 }
 
 void ConstantPoolCacheEntry::verify(outputStream* st) const {
--- a/src/share/vm/oops/cpCacheOop.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/cpCacheOop.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -38,13 +38,14 @@
 // bit number |31                0|
 // bit length |-8--|-8--|---16----|
 // --------------------------------
-// _indices   [ b2 | b1 |  index  ]
-// _f1        [  entry specific   ]
-// _f2        [  entry specific   ]
-// _flags     [t|f|vf|v|m|h|unused|field_index] (for field entries)
-// bit length |4|1|1 |1|1|0|---7--|----16-----]
-// _flags     [t|f|vf|v|m|h|unused|eidx|psze] (for method entries)
-// bit length |4|1|1 |1|1|1|---7--|-8--|-8--]
+// _indices   [ b2 | b1 |  index  ]  index = constant_pool_index (!= 0, normal entries only)
+// _indices   [  index  |  00000  ]  index = main_entry_index (secondary entries only)
+// _f1        [  entry specific   ]  method, klass, or oop (MethodType or CallSite)
+// _f2        [  entry specific   ]  vtable index or vfinal method
+// _flags     [tos|0|00|00|00|f|v|f2|unused|field_index] (for field entries)
+// bit length [ 4 |1|1 |1 | 1|1|1| 1|---5--|----16-----]
+// _flags     [tos|M|vf|fv|ea|f|0|f2|unused|00000|psize] (for method entries)
+// bit length [ 4 |1|1 |1 | 1|1|1| 1|---5--|--8--|--8--]
 
 // --------------------------------
 //
@@ -52,24 +53,23 @@
 // index  = original constant pool index
 // b1     = bytecode 1
 // b2     = bytecode 2
-// psze   = parameters size (method entries only)
-// eidx   = interpreter entry index (method entries only)
+// psize  = parameters size (method entries only)
 // field_index = index into field information in holder instanceKlass
 //          The index max is 0xffff (max number of fields in constant pool)
 //          and is multiplied by (instanceKlass::next_offset) when accessing.
 // t      = TosState (see below)
 // f      = field is marked final (see below)
-// vf     = virtual, final (method entries only : is_vfinal())
+// f2     = virtual but final (method entries only: is_vfinal())
 // v      = field is volatile (see below)
 // m      = invokeinterface used for method in class Object (see below)
 // h      = RedefineClasses/Hotswap bit (see below)
 //
 // The flags after TosState have the following interpretation:
-// bit 27: f flag  true if field is marked final
-// bit 26: vf flag true if virtual final method
-// bit 25: v flag true if field is volatile (only for fields)
-// bit 24: m flag true if invokeinterface used for method in class Object
-// bit 23: 0 for fields, 1 for methods
+// bit 27: 0 for fields, 1 for methods
+// f  flag true if field is marked final
+// v  flag true if field is volatile (only for fields)
+// f2 flag true if f2 contains an oop (e.g., virtual final method)
+// fv flag true if invokeinterface used for method in class Object
 //
 // The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
 // following mapping to the TosState states:
@@ -86,25 +86,26 @@
 //
 // Entry specific: field entries:
 // _indices = get (b1 section) and put (b2 section) bytecodes, original constant pool index
-// _f1      = field holder
-// _f2      = field offset in words
-// _flags   = field type information, original field index in field holder
+// _f1      = field holder (as a java.lang.Class, not a klassOop)
+// _f2      = field offset in bytes
+// _flags   = field type information, original FieldInfo index in field holder
 //            (field_index section)
 //
 // Entry specific: method entries:
 // _indices = invoke code for f1 (b1 section), invoke code for f2 (b2 section),
 //            original constant pool index
-// _f1      = method for all but virtual calls, unused by virtual calls
-//            (note: for interface calls, which are essentially virtual,
-//             contains klassOop for the corresponding interface.
-//            for invokedynamic, f1 contains the CallSite object for the invocation
-// _f2      = method/vtable index for virtual calls only, unused by all other
-//            calls.  The vf flag indicates this is a method pointer not an
-//            index.
-// _flags   = field type info (f section),
-//            virtual final entry (vf),
-//            interpreter entry index (eidx section),
-//            parameter size (psze section)
+// _f1      = methodOop for non-virtual calls, unused by virtual calls.
+//            for interface calls, which are essentially virtual but need a klass,
+//            contains klassOop for the corresponding interface.
+//            for invokedynamic, f1 contains a site-specific CallSite object (as an appendix)
+//            for invokehandle, f1 contains a site-specific MethodType object (as an appendix)
+//            (upcoming metadata changes will move the appendix to a separate array)
+// _f2      = vtable/itable index (or final methodOop) for virtual calls only,
+//            unused by non-virtual.  The is_vfinal flag indicates this is a
+//            method pointer for a final method, not an index.
+// _flags   = method type info (t section),
+//            virtual final bit (vfinal),
+//            parameter size (psize section)
 //
 // Note: invokevirtual & invokespecial bytecodes can share the same constant
 //       pool entry and thus the same constant pool cache entry. All invoke
@@ -138,30 +139,61 @@
     assert(existing_f1 == NULL || existing_f1 == f1, "illegal field change");
     oop_store(&_f1, f1);
   }
-  void set_f1_if_null_atomic(oop f1);
-  void set_f2(intx f2)                           { assert(_f2 == 0    || _f2 == f2, "illegal field change"); _f2 = f2; }
-  int as_flags(TosState state, bool is_final, bool is_vfinal, bool is_volatile,
-               bool is_method_interface, bool is_method);
+  void release_set_f1(oop f1);
+  void set_f2(intx f2)                           { assert(_f2 == 0 || _f2 == f2,            "illegal field change"); _f2 = f2; }
+  void set_f2_as_vfinal_method(methodOop f2)     { assert(_f2 == 0 || _f2 == (intptr_t) f2, "illegal field change"); assert(is_vfinal(), "flags must be set"); _f2 = (intptr_t) f2; }
+  int make_flags(TosState state, int option_bits, int field_index_or_method_params);
   void set_flags(intx flags)                     { _flags = flags; }
+  bool init_flags_atomic(intx flags);
+  void set_field_flags(TosState field_type, int option_bits, int field_index) {
+    assert((field_index & field_index_mask) == field_index, "field_index in range");
+    set_flags(make_flags(field_type, option_bits | (1 << is_field_entry_shift), field_index));
+  }
+  void set_method_flags(TosState return_type, int option_bits, int method_params) {
+    assert((method_params & parameter_size_mask) == method_params, "method_params in range");
+    set_flags(make_flags(return_type, option_bits, method_params));
+  }
+  bool init_method_flags_atomic(TosState return_type, int option_bits, int method_params) {
+    assert((method_params & parameter_size_mask) == method_params, "method_params in range");
+    return init_flags_atomic(make_flags(return_type, option_bits, method_params));
+  }
 
  public:
-  // specific bit values in flag field
-  // Note: the interpreter knows this layout!
-  enum FlagBitValues {
-    hotSwapBit    = 23,
-    methodInterface = 24,
-    volatileField = 25,
-    vfinalMethod  = 26,
-    finalField    = 27
+  // specific bit definitions for the flags field:
+  // (Note: the interpreter must use these definitions to access the CP cache.)
+  enum {
+    // high order bits are the TosState corresponding to field type or method return type
+    tos_state_bits             = 4,
+    tos_state_mask             = right_n_bits(tos_state_bits),
+    tos_state_shift            = BitsPerInt - tos_state_bits,  // see verify_tos_state_shift below
+    // misc. option bits; can be any bit position in [16..27]
+    is_vfinal_shift            = 21,
+    is_volatile_shift          = 22,
+    is_final_shift             = 23,
+    has_appendix_shift         = 24,
+    is_forced_virtual_shift    = 25,
+    is_field_entry_shift       = 26,
+    // low order bits give field index (for FieldInfo) or method parameter size:
+    field_index_bits           = 16,
+    field_index_mask           = right_n_bits(field_index_bits),
+    parameter_size_bits        = 8,  // subset of field_index_mask, range is 0..255
+    parameter_size_mask        = right_n_bits(parameter_size_bits),
+    option_bits_mask           = ~(((-1) << tos_state_shift) | (field_index_mask | parameter_size_mask))
   };
 
-  enum { field_index_mask = 0xFFFF };
+  // specific bit definitions for the indices field:
+  enum {
+    main_cp_index_bits         = 2*BitsPerByte,
+    main_cp_index_mask         = right_n_bits(main_cp_index_bits),
+    bytecode_1_shift           = main_cp_index_bits,
+    bytecode_1_mask            = right_n_bits(BitsPerByte), // == (u1)0xFF
+    bytecode_2_shift           = main_cp_index_bits + BitsPerByte,
+    bytecode_2_mask            = right_n_bits(BitsPerByte), // == (u1)0xFF
+    // the secondary cp index overlaps with bytecodes 1 and 2:
+    secondary_cp_index_shift   = bytecode_1_shift,
+    secondary_cp_index_bits    = BitsPerInt - main_cp_index_bits
+  };
 
-  // start of type bits in flags
-  // Note: the interpreter knows this layout!
-  enum FlagValues {
-    tosBits      = 28
-  };
 
   // Initialization
   void initialize_entry(int original_index);     // initialize primary entry
@@ -189,30 +221,40 @@
     int index                                    // Method index into interface
   );
 
+  void set_method_handle(
+    methodHandle method,                         // adapter for invokeExact, etc.
+    Handle appendix                              // stored in f1; could be a java.lang.invoke.MethodType
+  );
+
   void set_dynamic_call(
-    Handle call_site,                            // Resolved java.lang.invoke.CallSite (f1)
-    methodHandle signature_invoker               // determines signature information
+    methodHandle method,                         // adapter for this call site
+    Handle appendix                              // stored in f1; could be a java.lang.invoke.CallSite
   );
 
-  methodOop get_method_if_resolved(Bytecodes::Code invoke_code, constantPoolHandle cpool);
-
-  // For JVM_CONSTANT_InvokeDynamic cache entries:
-  void initialize_bootstrap_method_index_in_cache(int bsm_cache_index);
-  int  bootstrap_method_index_in_cache();
+  // Common code for invokedynamic and MH invocations.
 
-  void set_parameter_size(int value) {
-    assert(parameter_size() == 0 || parameter_size() == value,
-           "size must not change");
-    // Setting the parameter size by itself is only safe if the
-    // current value of _flags is 0, otherwise another thread may have
-    // updated it and we don't want to overwrite that value.  Don't
-    // bother trying to update it once it's nonzero but always make
-    // sure that the final parameter size agrees with what was passed.
-    if (_flags == 0) {
-      Atomic::cmpxchg_ptr((value & 0xFF), &_flags, 0);
-    }
-    guarantee(parameter_size() == value, "size must not change");
-  }
+  // The "appendix" is an optional call-site-specific parameter which is
+  // pushed by the JVM at the end of the argument list.  This argument may
+  // be a MethodType for the MH.invokes and a CallSite for an invokedynamic
+  // instruction.  However, its exact type and use depends on the Java upcall,
+  // which simply returns a compiled LambdaForm along with any reference
+  // that LambdaForm needs to complete the call.  If the upcall returns a
+  // null appendix, the argument is not passed at all.
+  //
+  // The appendix is *not* represented in the signature of the symbolic
+  // reference for the call site, but (if present) it *is* represented in
+  // the methodOop bound to the site.  This means that static and dynamic
+  // resolution logic needs to make slightly different assessments about the
+  // number and types of arguments.
+  void set_method_handle_common(
+    Bytecodes::Code invoke_code,                 // _invokehandle or _invokedynamic
+    methodHandle adapter,                        // invoker method (f2)
+    Handle appendix                              // appendix such as CallSite, MethodType, etc. (f1)
+  );
+
+  methodOop method_if_resolved(constantPoolHandle cpool);
+
+  void set_parameter_size(int value);
 
   // Which bytecode number (1 or 2) in the index field is valid for this bytecode?
   // Returns -1 if neither is valid.
@@ -222,10 +264,11 @@
       case Bytecodes::_getfield        :    // fall through
       case Bytecodes::_invokespecial   :    // fall through
       case Bytecodes::_invokestatic    :    // fall through
-      case Bytecodes::_invokedynamic   :    // fall through
       case Bytecodes::_invokeinterface : return 1;
       case Bytecodes::_putstatic       :    // fall through
       case Bytecodes::_putfield        :    // fall through
+      case Bytecodes::_invokehandle    :    // fall through
+      case Bytecodes::_invokedynamic   :    // fall through
       case Bytecodes::_invokevirtual   : return 2;
       default                          : break;
     }
@@ -242,31 +285,43 @@
   }
 
   // Accessors
-  bool is_secondary_entry() const                { return (_indices & 0xFFFF) == 0; }
-  int constant_pool_index() const                { assert((_indices & 0xFFFF) != 0, "must be main entry");
-                                                   return (_indices & 0xFFFF); }
-  int main_entry_index() const                   { assert((_indices & 0xFFFF) == 0, "must be secondary entry");
-                                                   return ((uintx)_indices >> 16); }
-  Bytecodes::Code bytecode_1() const             { return Bytecodes::cast((_indices >> 16) & 0xFF); }
-  Bytecodes::Code bytecode_2() const             { return Bytecodes::cast((_indices >> 24) & 0xFF); }
-  volatile oop  f1() const                       { return _f1; }
-  bool is_f1_null() const                        { return (oop)_f1 == NULL; }  // classifies a CPC entry as unbound
-  intx f2() const                                { return _f2; }
-  int  field_index() const;
-  int  parameter_size() const                    { return _flags & 0xFF; }
-  bool is_vfinal() const                         { return ((_flags & (1 << vfinalMethod)) == (1 << vfinalMethod)); }
-  bool is_volatile() const                       { return ((_flags & (1 << volatileField)) == (1 << volatileField)); }
-  bool is_methodInterface() const                { return ((_flags & (1 << methodInterface)) == (1 << methodInterface)); }
-  bool is_byte() const                           { return (((uintx) _flags >> tosBits) == btos); }
-  bool is_char() const                           { return (((uintx) _flags >> tosBits) == ctos); }
-  bool is_short() const                          { return (((uintx) _flags >> tosBits) == stos); }
-  bool is_int() const                            { return (((uintx) _flags >> tosBits) == itos); }
-  bool is_long() const                           { return (((uintx) _flags >> tosBits) == ltos); }
-  bool is_float() const                          { return (((uintx) _flags >> tosBits) == ftos); }
-  bool is_double() const                         { return (((uintx) _flags >> tosBits) == dtos); }
-  bool is_object() const                         { return (((uintx) _flags >> tosBits) == atos); }
-  TosState flag_state() const                    { assert( ( (_flags >> tosBits) & 0x0F ) < number_of_states, "Invalid state in as_flags");
-                                                   return (TosState)((_flags >> tosBits) & 0x0F); }
+  bool is_secondary_entry() const                { return (_indices & main_cp_index_mask) == 0; }
+  int main_entry_index() const                   { assert(is_secondary_entry(), "must be secondary entry");
+                                                   return ((uintx)_indices >> secondary_cp_index_shift); }
+  int primary_entry_indices() const              { assert(!is_secondary_entry(), "must be main entry");
+                                                   return _indices; }
+  int constant_pool_index() const                { return (primary_entry_indices() & main_cp_index_mask); }
+  Bytecodes::Code bytecode_1() const             { return Bytecodes::cast((primary_entry_indices() >> bytecode_1_shift)
+                                                                          & bytecode_1_mask); }
+  Bytecodes::Code bytecode_2() const             { return Bytecodes::cast((primary_entry_indices() >> bytecode_2_shift)
+                                                                          & bytecode_2_mask); }
+  methodOop f1_as_method() const                 { oop f1 = _f1; assert(f1 == NULL || f1->is_method(), ""); return methodOop(f1); }
+  klassOop  f1_as_klass() const                  { oop f1 = _f1; assert(f1 == NULL || f1->is_klass(), ""); return klassOop(f1); }
+  oop       f1_as_klass_mirror() const           { oop f1 = f1_as_instance(); return f1; }  // i.e., return a java_mirror
+  oop       f1_as_instance() const               { oop f1 = _f1; assert(f1 == NULL || f1->is_instance() || f1->is_array(), ""); return f1; }
+  oop       f1_appendix() const                  { assert(has_appendix(), ""); return f1_as_instance(); }
+  bool      is_f1_null() const                   { oop f1 = _f1; return f1 == NULL; }  // classifies a CPC entry as unbound
+  int       f2_as_index() const                  { assert(!is_vfinal(), ""); return (int) _f2; }
+  methodOop f2_as_vfinal_method() const          { assert(is_vfinal(), ""); return methodOop(_f2); }
+  int  field_index() const                       { assert(is_field_entry(),  ""); return (_flags & field_index_mask); }
+  int  parameter_size() const                    { assert(is_method_entry(), ""); return (_flags & parameter_size_mask); }
+  bool is_volatile() const                       { return (_flags & (1 << is_volatile_shift))       != 0; }
+  bool is_final() const                          { return (_flags & (1 << is_final_shift))          != 0; }
+  bool has_appendix() const                      { return (_flags & (1 << has_appendix_shift))     != 0; }
+  bool is_forced_virtual() const                 { return (_flags & (1 << is_forced_virtual_shift)) != 0; }
+  bool is_vfinal() const                         { return (_flags & (1 << is_vfinal_shift))         != 0; }
+  bool is_method_entry() const                   { return (_flags & (1 << is_field_entry_shift))    == 0; }
+  bool is_field_entry() const                    { return (_flags & (1 << is_field_entry_shift))    != 0; }
+  bool is_byte() const                           { return flag_state() == btos; }
+  bool is_char() const                           { return flag_state() == ctos; }
+  bool is_short() const                          { return flag_state() == stos; }
+  bool is_int() const                            { return flag_state() == itos; }
+  bool is_long() const                           { return flag_state() == ltos; }
+  bool is_float() const                          { return flag_state() == ftos; }
+  bool is_double() const                         { return flag_state() == dtos; }
+  bool is_object() const                         { return flag_state() == atos; }
+  TosState flag_state() const                    { assert((uint)number_of_states <= (uint)tos_state_mask+1, "");
+                                                   return (TosState)((_flags >> tos_state_shift) & tos_state_mask); }
 
   // Code generation support
   static WordSize size()                         { return in_WordSize(sizeof(ConstantPoolCacheEntry) / HeapWordSize); }
@@ -299,15 +354,14 @@
   bool adjust_method_entry(methodOop old_method, methodOop new_method,
          bool * trace_name_printed);
   bool is_interesting_method_entry(klassOop k);
-  bool is_field_entry() const                    { return (_flags & (1 << hotSwapBit)) == 0; }
-  bool is_method_entry() const                   { return (_flags & (1 << hotSwapBit)) != 0; }
 
   // Debugging & Printing
   void print (outputStream* st, int index) const;
   void verify(outputStream* st) const;
 
-  static void verify_tosBits() {
-    assert(tosBits == 28, "interpreter now assumes tosBits is 28");
+  static void verify_tos_state_shift() {
+    // When shifting flags as a 32-bit int, make sure we don't need an extra mask for tos_state:
+    assert((((u4)-1 >> tos_state_shift) & ~tos_state_mask) == 0, "no need for tos_state mask");
   }
 };
 
--- a/src/share/vm/oops/fieldInfo.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/fieldInfo.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -50,8 +50,7 @@
     initval_index_offset     = 3,
     low_offset               = 4,
     high_offset              = 5,
-    generic_signature_offset = 6,
-    field_slots              = 7
+    field_slots              = 6
   };
 
  private:
@@ -60,29 +59,28 @@
   void set_name_index(u2 val)                    { _shorts[name_index_offset] = val;         }
   void set_signature_index(u2 val)               { _shorts[signature_index_offset] = val;    }
   void set_initval_index(u2 val)                 { _shorts[initval_index_offset] = val;      }
-  void set_generic_signature_index(u2 val)       { _shorts[generic_signature_offset] = val;  }
 
   u2 name_index() const                          { return _shorts[name_index_offset];        }
   u2 signature_index() const                     { return _shorts[signature_index_offset];   }
   u2 initval_index() const                       { return _shorts[initval_index_offset];     }
-  u2 generic_signature_index() const             { return _shorts[generic_signature_offset]; }
 
  public:
   static FieldInfo* from_field_array(typeArrayOop fields, int index) {
     return ((FieldInfo*)fields->short_at_addr(index * field_slots));
   }
+  static FieldInfo* from_field_array(u2* fields, int index) {
+    return ((FieldInfo*)(fields + index * field_slots));
+  }
 
   void initialize(u2 access_flags,
                   u2 name_index,
                   u2 signature_index,
                   u2 initval_index,
-                  u2 generic_signature_index,
                   u4 offset) {
     _shorts[access_flags_offset] = access_flags;
     _shorts[name_index_offset] = name_index;
     _shorts[signature_index_offset] = signature_index;
     _shorts[initval_index_offset] = initval_index;
-    _shorts[generic_signature_offset] = generic_signature_index;
     set_offset(offset);
   }
 
@@ -105,14 +103,6 @@
     return cp->symbol_at(index);
   }
 
-  Symbol* generic_signature(constantPoolHandle cp) const {
-    int index = generic_signature_index();
-    if (index == 0) {
-      return NULL;
-    }
-    return cp->symbol_at(index);
-  }
-
   void set_access_flags(u2 val)                  { _shorts[access_flags_offset] = val;             }
   void set_offset(u4 val)                        {
     _shorts[low_offset] = extract_low_short_from_int(val);
--- a/src/share/vm/oops/fieldStreams.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/fieldStreams.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -42,21 +42,57 @@
   constantPoolHandle  _constants;
   int                 _index;
   int                 _limit;
+  int                 _generic_signature_slot;
 
   FieldInfo* field() const { return FieldInfo::from_field_array(_fields(), _index); }
 
+  int init_generic_signature_start_slot() {
+    int length = _fields->length();
+    int num_fields = 0;
+    int skipped_generic_signature_slots = 0;
+    FieldInfo* fi;
+    AccessFlags flags;
+    /* Scan from 0 to the current _index. Count the number of generic
+       signature slots for field[0] to field[_index - 1]. */
+    for (int i = 0; i < _index; i++) {
+      fi = FieldInfo::from_field_array(_fields(), i);
+      flags.set_flags(fi->access_flags());
+      if (flags.field_has_generic_signature()) {
+        length --;
+        skipped_generic_signature_slots ++;
+      }
+    }
+    /* Scan from the current _index. */
+    for (int i = _index; i*FieldInfo::field_slots < length; i++) {
+      fi = FieldInfo::from_field_array(_fields(), i);
+      flags.set_flags(fi->access_flags());
+      if (flags.field_has_generic_signature()) {
+        length --;
+      }
+      num_fields ++;
+    }
+    _generic_signature_slot = length + skipped_generic_signature_slots;
+    assert(_generic_signature_slot <= _fields->length(), "");
+    return num_fields;
+  }
+
   FieldStreamBase(typeArrayHandle fields, constantPoolHandle constants, int start, int limit) {
     _fields = fields;
     _constants = constants;
     _index = start;
-    _limit = limit;
+    int num_fields = init_generic_signature_start_slot();
+    if (limit < start) {
+      _limit = num_fields;
+    } else {
+      _limit = limit;
+    }
   }
 
   FieldStreamBase(typeArrayHandle fields, constantPoolHandle constants) {
     _fields = fields;
     _constants = constants;
     _index = 0;
-    _limit = fields->length() / FieldInfo::field_slots;
+    _limit = init_generic_signature_start_slot();
   }
 
  public:
@@ -65,18 +101,26 @@
     _constants = klass->constants();
     _index = 0;
     _limit = klass->java_fields_count();
+    init_generic_signature_start_slot();
   }
   FieldStreamBase(instanceKlassHandle klass) {
     _fields = klass->fields();
     _constants = klass->constants();
     _index = 0;
     _limit = klass->java_fields_count();
+    init_generic_signature_start_slot();
   }
 
   // accessors
   int index() const                 { return _index; }
 
-  void next() { _index += 1; }
+  void next() {
+    if (access_flags().field_has_generic_signature()) {
+      _generic_signature_slot ++;
+      assert(_generic_signature_slot <= _fields->length(), "");
+    }
+    _index += 1;
+  }
   bool done() const { return _index >= _limit; }
 
   // Accessors for current field
@@ -103,7 +147,13 @@
   }
 
   Symbol* generic_signature() const {
-    return field()->generic_signature(_constants);
+    if (access_flags().field_has_generic_signature()) {
+      assert(_generic_signature_slot < _fields->length(), "out of bounds");
+      int index = _fields->short_at(_generic_signature_slot);
+      return _constants->symbol_at(index);
+    } else {
+      return NULL;
+    }
   }
 
   int offset() const {
@@ -139,11 +189,19 @@
   }
   int generic_signature_index() const {
     assert(!field()->is_internal(), "regular only");
-    return field()->generic_signature_index();
+    if (access_flags().field_has_generic_signature()) {
+      assert(_generic_signature_slot < _fields->length(), "out of bounds");
+      return _fields->short_at(_generic_signature_slot);
+    } else {
+      return 0;
+    }
   }
   void set_generic_signature_index(int index) {
     assert(!field()->is_internal(), "regular only");
-    field()->set_generic_signature_index(index);
+    if (access_flags().field_has_generic_signature()) {
+      assert(_generic_signature_slot < _fields->length(), "out of bounds");
+      _fields->short_at_put(_generic_signature_slot, index);
+    }
   }
   int initval_index() const {
     assert(!field()->is_internal(), "regular only");
@@ -159,8 +217,8 @@
 // Iterate over only the internal fields
 class InternalFieldStream : public FieldStreamBase {
  public:
-  InternalFieldStream(instanceKlass* k):      FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), k->all_fields_count()) {}
-  InternalFieldStream(instanceKlassHandle k): FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), k->all_fields_count()) {}
+  InternalFieldStream(instanceKlass* k):      FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), 0) {}
+  InternalFieldStream(instanceKlassHandle k): FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), 0) {}
 };
 
 
--- a/src/share/vm/oops/generateOopMap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/generateOopMap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "runtime/java.hpp"
 #include "runtime/relocator.hpp"
 #include "utilities/bitMap.inline.hpp"
+#include "prims/methodHandles.hpp"
 
 //
 //
@@ -400,10 +401,9 @@
   bool fellThrough = false;  // False to get first BB marked.
 
   // First mark all exception handlers as start of a basic-block
-  typeArrayOop excps = method()->exception_table();
-  for(int i = 0; i < excps->length(); i += 4) {
-    int handler_pc_idx = i+2;
-    bb_mark_fct(this, excps->int_at(handler_pc_idx), NULL);
+  ExceptionTable excps(method());
+  for(int i = 0; i < excps.length(); i ++) {
+    bb_mark_fct(this, excps.handler_pc(i), NULL);
   }
 
   // Then iterate through the code
@@ -450,10 +450,9 @@
 
   // Mark entry basic block as alive and all exception handlers
   _basic_blocks[0].mark_as_alive();
-  typeArrayOop excps = method()->exception_table();
-  for(int i = 0; i < excps->length(); i += 4) {
-    int handler_pc_idx = i+2;
-    BasicBlock *bb = get_basic_block_at(excps->int_at(handler_pc_idx));
+  ExceptionTable excps(method());
+  for(int i = 0; i < excps.length(); i++) {
+    BasicBlock *bb = get_basic_block_at(excps.handler_pc(i));
     // If block is not already alive (due to multiple exception handlers to same bb), then
     // make it alive
     if (bb->is_dead()) bb->mark_as_alive();
@@ -1181,12 +1180,12 @@
 
   if (_has_exceptions) {
     int bci = itr->bci();
-    typeArrayOop exct  = method()->exception_table();
-    for(int i = 0; i< exct->length(); i+=4) {
-      int start_pc   = exct->int_at(i);
-      int end_pc     = exct->int_at(i+1);
-      int handler_pc = exct->int_at(i+2);
-      int catch_type = exct->int_at(i+3);
+    ExceptionTable exct(method());
+    for(int i = 0; i< exct.length(); i++) {
+      int start_pc   = exct.start_pc(i);
+      int end_pc     = exct.end_pc(i);
+      int handler_pc = exct.handler_pc(i);
+      int catch_type = exct.catch_type_index(i);
 
       if (start_pc <= bci && bci < end_pc) {
         BasicBlock *excBB = get_basic_block_at(handler_pc);
@@ -2055,7 +2054,7 @@
   _conflict       = false;
   _max_locals     = method()->max_locals();
   _max_stack      = method()->max_stack();
-  _has_exceptions = (method()->exception_table()->length() > 0);
+  _has_exceptions = (method()->has_exception_handler());
   _nof_refval_conflicts = 0;
   _init_vars      = new GrowableArray<intptr_t>(5);  // There are seldom more than 5 init_vars
   _report_result  = false;
@@ -2070,9 +2069,10 @@
     if (Verbose) {
       _method->print_codes();
       tty->print_cr("Exception table:");
-      typeArrayOop excps = method()->exception_table();
-      for(int i = 0; i < excps->length(); i += 4) {
-        tty->print_cr("[%d - %d] -> %d", excps->int_at(i + 0), excps->int_at(i + 1), excps->int_at(i + 2));
+      ExceptionTable excps(method());
+      for(int i = 0; i < excps.length(); i ++) {
+        tty->print_cr("[%d - %d] -> %d",
+                      excps.start_pc(i), excps.end_pc(i), excps.handler_pc(i));
       }
     }
   }
--- a/src/share/vm/oops/instanceKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/instanceKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -567,8 +567,18 @@
   ol.notify_all(CHECK);
 }
 
+// The embedded _implementor field can only record one implementor.
+// When there are more than one implementors, the _implementor field
+// is set to the interface klassOop itself. Following are the possible
+// values for the _implementor field:
+//   NULL                  - no implementor
+//   implementor klassOop  - one implementor
+//   self                  - more than one implementor
+//
+// The _implementor field only exists for interfaces.
 void instanceKlass::add_implementor(klassOop k) {
   assert(Compile_lock->owned_by_self(), "");
+  assert(is_interface(), "not interface");
   // Filter out my subinterfaces.
   // (Note: Interfaces are never on the subklass list.)
   if (instanceKlass::cast(k)->is_interface()) return;
@@ -583,17 +593,13 @@
     // Any supers of the super have the same (or fewer) transitive_interfaces.
     return;
 
-  // Update number of implementors
-  int i = _nof_implementors++;
-
-  // Record this implementor, if there are not too many already
-  if (i < implementors_limit) {
-    assert(_implementors[i] == NULL, "should be exactly one implementor");
-    oop_store_without_check((oop*)&_implementors[i], k);
-  } else if (i == implementors_limit) {
-    // clear out the list on first overflow
-    for (int i2 = 0; i2 < implementors_limit; i2++)
-      oop_store_without_check((oop*)&_implementors[i2], NULL);
+  klassOop ik = implementor();
+  if (ik == NULL) {
+    set_implementor(k);
+  } else if (ik != this->as_klassOop()) {
+    // There is already an implementor. Use itself as an indicator of
+    // more than one implementors.
+    set_implementor(this->as_klassOop());
   }
 
   // The implementor also implements the transitive_interfaces
@@ -603,9 +609,9 @@
 }
 
 void instanceKlass::init_implementor() {
-  for (int i = 0; i < implementors_limit; i++)
-    oop_store_without_check((oop*)&_implementors[i], NULL);
-  _nof_implementors = 0;
+  if (is_interface()) {
+    set_implementor(NULL);
+  }
 }
 
 
@@ -982,7 +988,7 @@
   fieldDescriptor fd;
   int length = java_fields_count();
   // In DebugInfo nonstatic fields are sorted by offset.
-  int* fields_sorted = NEW_C_HEAP_ARRAY(int, 2*(length+1));
+  int* fields_sorted = NEW_C_HEAP_ARRAY(int, 2*(length+1), mtClass);
   int j = 0;
   for (int i = 0; i < length; i += 1) {
     fd.initialize(as_klassOop(), i);
@@ -1002,7 +1008,7 @@
       cl->do_field(&fd);
     }
   }
-  FREE_C_HEAP_ARRAY(int, fields_sorted);
+  FREE_C_HEAP_ARRAY(int, fields_sorted, mtClass);
 }
 
 
@@ -1132,6 +1138,36 @@
   return probe;
 }
 
+u2 instanceKlass::enclosing_method_data(int offset) {
+  typeArrayOop inner_class_list = inner_classes();
+  if (inner_class_list == NULL) {
+    return 0;
+  }
+  int length = inner_class_list->length();
+  if (length % inner_class_next_offset == 0) {
+    return 0;
+  } else {
+    int index = length - enclosing_method_attribute_size;
+    typeArrayHandle inner_class_list_h(inner_class_list);
+    assert(offset < enclosing_method_attribute_size, "invalid offset");
+    return inner_class_list_h->ushort_at(index + offset);
+  }
+}
+
+void instanceKlass::set_enclosing_method_indices(u2 class_index,
+                                                 u2 method_index) {
+  typeArrayOop inner_class_list = inner_classes();
+  assert (inner_class_list != NULL, "_inner_classes list is not set up");
+  int length = inner_class_list->length();
+  if (length % inner_class_next_offset == enclosing_method_attribute_size) {
+    int index = length - enclosing_method_attribute_size;
+    typeArrayHandle inner_class_list_h(inner_class_list);
+    inner_class_list_h->ushort_at_put(
+      index + enclosing_method_class_index_offset, class_index);
+    inner_class_list_h->ushort_at_put(
+      index + enclosing_method_method_index_offset, method_index);
+  }
+}
 
 // Lookup or create a jmethodID.
 // This code is called by the VMThread and JavaThreads so the
@@ -1199,7 +1235,7 @@
     if (length <= idnum) {
       // allocate a new cache that might be used
       size_t size = MAX2(idnum+1, (size_t)ik_h->idnum_allocated_count());
-      new_jmeths = NEW_C_HEAP_ARRAY(jmethodID, size+1);
+      new_jmeths = NEW_C_HEAP_ARRAY(jmethodID, size+1, mtClass);
       memset(new_jmeths, 0, (size+1)*sizeof(jmethodID));
       // cache size is stored in element[0], other elements offset by one
       new_jmeths[0] = (jmethodID)size;
@@ -1360,7 +1396,7 @@
     // cache size is stored in element[0], other elements offset by one
     if (indices == NULL || (length = (size_t)indices[0]) <= idnum) {
       size_t size = MAX2(idnum+1, (size_t)idnum_allocated_count());
-      int* new_indices = NEW_C_HEAP_ARRAY(int, size+1);
+      int* new_indices = NEW_C_HEAP_ARRAY(int, size+1, mtClass);
       new_indices[0] = (int)size;
       // copy any existing entries
       size_t i;
@@ -1818,24 +1854,22 @@
 void instanceKlass::follow_weak_klass_links(
   BoolObjectClosure* is_alive, OopClosure* keep_alive) {
   assert(is_alive->do_object_b(as_klassOop()), "this oop should be live");
-  if (ClassUnloading) {
-    for (int i = 0; i < implementors_limit; i++) {
-      klassOop impl = _implementors[i];
-      if (impl == NULL)  break;  // no more in the list
-      if (!is_alive->do_object_b(impl)) {
-        // remove this guy from the list by overwriting him with the tail
-        int lasti = --_nof_implementors;
-        assert(lasti >= i && lasti < implementors_limit, "just checking");
-        _implementors[i] = _implementors[lasti];
-        _implementors[lasti] = NULL;
-        --i; // rerun the loop at this index
+
+  if (is_interface()) {
+    if (ClassUnloading) {
+      klassOop impl = implementor();
+      if (impl != NULL) {
+        if (!is_alive->do_object_b(impl)) {
+          // remove this guy
+          *adr_implementor() = NULL;
+        }
       }
-    }
-  } else {
-    for (int i = 0; i < implementors_limit; i++) {
-      keep_alive->do_oop(&adr_implementors()[i]);
+    } else {
+      assert(adr_implementor() != NULL, "just checking");
+      keep_alive->do_oop(adr_implementor());
     }
   }
+
   Klass::follow_weak_klass_links(is_alive, keep_alive);
 }
 
@@ -1898,7 +1932,7 @@
 
   // deallocate the cached class file
   if (_cached_class_file_bytes != NULL) {
-    os::free(_cached_class_file_bytes);
+    os::free(_cached_class_file_bytes, mtClass);
     _cached_class_file_bytes = NULL;
     _cached_class_file_len = 0;
   }
@@ -1912,7 +1946,7 @@
   // walk constant pool and decrement symbol reference counts
   _constants->unreference_symbols();
 
-  if (_source_debug_extension != NULL) FREE_C_HEAP_ARRAY(char, _source_debug_extension);
+  if (_source_debug_extension != NULL) FREE_C_HEAP_ARRAY(char, _source_debug_extension, mtClass);
 }
 
 void instanceKlass::set_source_file_name(Symbol* n) {
@@ -1929,7 +1963,7 @@
     // already coded with an u4 in the classfile, but in practice, it's
     // unlikely to happen.
     assert((length+1) > length, "Overflow checking");
-    char* sde = NEW_C_HEAP_ARRAY(char, (length + 1));
+    char* sde = NEW_C_HEAP_ARRAY(char, (length + 1), mtClass);
     for (int i = 0; i < length; i++) {
       sde[i] = array[i];
     }
@@ -2120,28 +2154,21 @@
   jint access = access_flags().as_int();
 
   // But check if it happens to be member class.
-  typeArrayOop inner_class_list = inner_classes();
-  int length = (inner_class_list == NULL) ? 0 : inner_class_list->length();
-  assert (length % instanceKlass::inner_class_next_offset == 0, "just checking");
-  if (length > 0) {
-    typeArrayHandle inner_class_list_h(THREAD, inner_class_list);
-    instanceKlassHandle ik(THREAD, k);
-    for (int i = 0; i < length; i += instanceKlass::inner_class_next_offset) {
-      int ioff = inner_class_list_h->ushort_at(
-                      i + instanceKlass::inner_class_inner_class_info_offset);
-
-      // Inner class attribute can be zero, skip it.
-      // Strange but true:  JVM spec. allows null inner class refs.
-      if (ioff == 0) continue;
-
-      // only look at classes that are already loaded
-      // since we are looking for the flags for our self.
-      Symbol* inner_name = ik->constants()->klass_name_at(ioff);
-      if ((ik->name() == inner_name)) {
-        // This is really a member class.
-        access = inner_class_list_h->ushort_at(i + instanceKlass::inner_class_access_flags_offset);
-        break;
-      }
+  instanceKlassHandle ik(THREAD, k);
+  InnerClassesIterator iter(ik);
+  for (; !iter.done(); iter.next()) {
+    int ioff = iter.inner_class_info_index();
+    // Inner class attribute can be zero, skip it.
+    // Strange but true:  JVM spec. allows null inner class refs.
+    if (ioff == 0) continue;
+
+    // only look at classes that are already loaded
+    // since we are looking for the flags for our self.
+    Symbol* inner_name = ik->constants()->klass_name_at(ioff);
+    if ((ik->name() == inner_name)) {
+      // This is really a member class.
+      access = iter.inner_access_flags();
+      break;
     }
   }
   // Remember to strip ACC_SUPER bit
@@ -2402,6 +2429,22 @@
   } else if (java_lang_boxing_object::is_instance(obj)) {
     st->print(" = ");
     java_lang_boxing_object::print(obj, st);
+  } else if (as_klassOop() == SystemDictionary::LambdaForm_klass()) {
+    oop vmentry = java_lang_invoke_LambdaForm::vmentry(obj);
+    if (vmentry != NULL) {
+      st->print(" => ");
+      vmentry->print_value_on(st);
+    }
+  } else if (as_klassOop() == SystemDictionary::MemberName_klass()) {
+    oop vmtarget = java_lang_invoke_MemberName::vmtarget(obj);
+    if (vmtarget != NULL) {
+      st->print(" = ");
+      vmtarget->print_value_on(st);
+    } else {
+      java_lang_invoke_MemberName::clazz(obj)->print_value_on(st);
+      st->print(".");
+      java_lang_invoke_MemberName::name(obj)->print_value_on(st);
+    }
   }
 }
 
@@ -2516,7 +2559,7 @@
     // This is the first previous version so make some space.
     // Start with 2 elements under the assumption that the class
     // won't be redefined much.
-    _previous_versions =  new (ResourceObj::C_HEAP)
+    _previous_versions =  new (ResourceObj::C_HEAP, mtClass)
                             GrowableArray<PreviousVersionNode *>(2, true);
   }
 
@@ -2542,7 +2585,7 @@
       ("add: all methods are obsolete; flushing any EMCP weak refs"));
   } else {
     int local_count = 0;
-    GrowableArray<jweak>* method_refs = new (ResourceObj::C_HEAP)
+    GrowableArray<jweak>* method_refs = new (ResourceObj::C_HEAP, mtClass)
       GrowableArray<jweak>(emcp_method_count, true);
     for (int i = 0; i < old_methods->length(); i++) {
       if (emcp_methods->at(i)) {
@@ -2934,7 +2977,7 @@
 
   while (_current_index < length) {
     PreviousVersionNode * pv_node = _previous_versions->at(_current_index++);
-    PreviousVersionInfo * pv_info = new (ResourceObj::C_HEAP)
+    PreviousVersionInfo * pv_info = new (ResourceObj::C_HEAP, mtClass)
                                           PreviousVersionInfo(pv_node);
 
     constantPoolHandle cp_h = pv_info->prev_constant_pool_handle();
--- a/src/share/vm/oops/instanceKlass.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,8 +56,6 @@
 //    [methods                    ]
 //    [local interfaces           ]
 //    [transitive interfaces      ]
-//    [number of implementors     ]
-//    [implementors               ] klassOop[2]
 //    [fields                     ]
 //    [constants                  ]
 //    [class loader               ]
@@ -77,9 +75,10 @@
 //    [oop map cache (stack maps) ]
 //    [EMBEDDED Java vtable             ] size in words = vtable_len
 //    [EMBEDDED nonstatic oop-map blocks] size in words = nonstatic_oop_map_size
-//
-//    The embedded nonstatic oop-map blocks are short pairs (offset, length) indicating
-//    where oops are located in instances of this klass.
+//      The embedded nonstatic oop-map blocks are short pairs (offset, length)
+//      indicating where oops are located in instances of this klass.
+//    [EMBEDDED implementor of the interface] only exist for interface
+//    [EMBEDDED host klass        ] only exist for an anonymous class (JSR 292 enabled)
 
 
 // forward declaration for class -- see below for definition
@@ -153,10 +152,6 @@
   oop* oop_block_beg() const { return adr_array_klasses(); }
   oop* oop_block_end() const { return adr_methods_default_annotations() + 1; }
 
-  enum {
-    implementors_limit = 2              // how many implems can we track?
-  };
-
  protected:
   //
   // The oop block.  See comment in klass.hpp before making changes.
@@ -173,8 +168,19 @@
   objArrayOop     _local_interfaces;
   // Interface (klassOops) this class implements transitively.
   objArrayOop     _transitive_interfaces;
-  // Instance and static variable information, 5-tuples of shorts [access, name
-  // index, sig index, initval index, offset].
+  // Instance and static variable information, starts with 6-tuples of shorts
+  // [access, name index, sig index, initval index, low_offset, high_offset]
+  // for all fields, followed by the generic signature data at the end of
+  // the array. Only fields with generic signature attributes have the generic
+  // signature data set in the array. The fields array looks like following:
+  //
+  // f1: [access, name index, sig index, initial value index, low_offset, high_offset]
+  // f2: [access, name index, sig index, initial value index, low_offset, high_offset]
+  //      ...
+  // fn: [access, name index, sig index, initial value index, low_offset, high_offset]
+  //     [generic signature index]
+  //     [generic signature index]
+  //     ...
   typeArrayOop    _fields;
   // Constant pool for this class.
   constantPoolOop _constants;
@@ -182,16 +188,20 @@
   oop             _class_loader;
   // Protection domain.
   oop             _protection_domain;
-  // Host class, which grants its access privileges to this class also.
-  // This is only non-null for an anonymous class (JSR 292 enabled).
-  // The host class is either named, or a previously loaded anonymous class.
-  klassOop        _host_klass;
   // Class signers.
   objArrayOop     _signers;
-  // inner_classes attribute.
+  // The InnerClasses attribute and EnclosingMethod attribute. The
+  // _inner_classes is an array of shorts. If the class has InnerClasses
+  // attribute, then the _inner_classes array begins with 4-tuples of shorts
+  // [inner_class_info_index, outer_class_info_index,
+  // inner_name_index, inner_class_access_flags] for the InnerClasses
+  // attribute. If the EnclosingMethod attribute exists, it occupies the
+  // last two shorts [class_index, method_index] of the array. If only
+  // the InnerClasses attribute exists, the _inner_classes array length is
+  // number_of_inner_classes * 4. If the class has both InnerClasses
+  // and EnclosingMethod attributes the _inner_classes array length is
+  // number_of_inner_classes * 4 + enclosing_method_attribute_size.
   typeArrayOop    _inner_classes;
-  // Implementors of this interface (not valid if it overflows)
-  klassOop        _implementors[implementors_limit];
   // Annotations for this class, or null if none.
   typeArrayOop    _class_annotations;
   // Annotation objects (byte arrays) for fields, or null if no annotations.
@@ -234,9 +244,13 @@
   int             _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks
 
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
-  bool            _rewritten;            // methods rewritten.
-  bool            _has_nonstatic_fields; // for sizing with UseCompressedOops
-  bool            _should_verify_class;  // allow caching of preverification
+  enum {
+    _misc_rewritten            = 1 << 0, // methods rewritten.
+    _misc_has_nonstatic_fields = 1 << 1, // for sizing with UseCompressedOops
+    _misc_should_verify_class  = 1 << 2, // allow caching of preverification
+    _misc_is_anonymous         = 1 << 3  // has embedded _inner_classes field
+  };
+  u2              _misc_flags;
   u2              _minor_version;        // minor version number of class file
   u2              _major_version;        // major version number of class file
   Thread*         _init_thread;          // Pointer to current thread doing initialization (to handle recusive initialization)
@@ -249,12 +263,9 @@
   nmethodBucket*  _dependencies;         // list of dependent nmethods
   nmethod*        _osr_nmethods_head;    // Head of list of on-stack replacement nmethods for this class
   BreakpointInfo* _breakpoints;          // bpt lists, managed by methodOop
-  int             _nof_implementors;     // No of implementors of this interface (zero if not an interface)
   // Array of interesting part(s) of the previous version(s) of this
   // instanceKlass. See PreviousVersionWalker below.
   GrowableArray<PreviousVersionNode *>* _previous_versions;
-  u2              _enclosing_method_class_index;  // Constant pool index for class of enclosing method, or 0 if none
-  u2              _enclosing_method_method_index; // Constant pool index for name and type of enclosing method, or 0 if none
   // JVMTI fields can be moved to their own structure - see 6315920
   unsigned char * _cached_class_file_bytes;       // JVMTI: cached class file, before retransformable agent modified it in CFLH
   jint            _cached_class_file_len;         // JVMTI: length of above
@@ -272,13 +283,36 @@
   // embedded Java itables follows here
   // embedded static fields follows here
   // embedded nonstatic oop-map blocks follows here
+  // embedded implementor of this interface follows here
+  //   The embedded implementor only exists if the current klass is an
+  //   iterface. The possible values of the implementor fall into following
+  //   three cases:
+  //     NULL: no implementor.
+  //     A klassOop that's not itself: one implementor.
+  //     Itsef: more than one implementors.
+  // embedded host klass follows here
+  //   The embedded host klass only exists in an anonymous class for
+  //   dynamic language support (JSR 292 enabled). The host class grants
+  //   its access privileges to this class also. The host class is either
+  //   named, or a previously loaded anonymous class. A non-anonymous class
+  //   or an anonymous class loaded through normal classloading does not
+  //   have this embedded field.
+  //
 
   friend class instanceKlassKlass;
   friend class SystemDictionary;
 
  public:
-  bool has_nonstatic_fields() const        { return _has_nonstatic_fields; }
-  void set_has_nonstatic_fields(bool b)    { _has_nonstatic_fields = b; }
+  bool has_nonstatic_fields() const        {
+    return (_misc_flags & _misc_has_nonstatic_fields) != 0;
+  }
+  void set_has_nonstatic_fields(bool b)    {
+    if (b) {
+      _misc_flags |= _misc_has_nonstatic_fields;
+    } else {
+      _misc_flags &= ~_misc_has_nonstatic_fields;
+    }
+  }
 
   // field sizes
   int nonstatic_field_size() const         { return _nonstatic_field_size; }
@@ -330,9 +364,6 @@
   // Number of Java declared fields
   int java_fields_count() const           { return (int)_java_fields_count; }
 
-  // Number of fields including any injected fields
-  int all_fields_count() const            { return _fields->length() / sizeof(FieldInfo::field_slots); }
-
   typeArrayOop fields() const              { return _fields; }
 
   void set_fields(typeArrayOop f, u2 java_fields_count) {
@@ -353,6 +384,12 @@
     inner_class_next_offset = 4
   };
 
+  enum EnclosingMethodAttributeOffset {
+    enclosing_method_class_index_offset = 0,
+    enclosing_method_method_index_offset = 1,
+    enclosing_method_attribute_size = 2
+  };
+
   // method override check
   bool is_override(methodHandle super_method, Handle targetclassloader, Symbol* targetclassname, TRAPS);
 
@@ -386,11 +423,19 @@
   bool is_in_error_state() const           { return _init_state == initialization_error; }
   bool is_reentrant_initialization(Thread *thread)  { return thread == _init_thread; }
   ClassState  init_state()                 { return (ClassState)_init_state; }
-  bool is_rewritten() const                { return _rewritten; }
+  bool is_rewritten() const                { return (_misc_flags & _misc_rewritten) != 0; }
 
   // defineClass specified verification
-  bool should_verify_class() const         { return _should_verify_class; }
-  void set_should_verify_class(bool value) { _should_verify_class = value; }
+  bool should_verify_class() const         {
+    return (_misc_flags & _misc_should_verify_class) != 0;
+  }
+  void set_should_verify_class(bool value) {
+    if (value) {
+      _misc_flags |= _misc_should_verify_class;
+    } else {
+      _misc_flags &= ~_misc_should_verify_class;
+    }
+  }
 
   // marking
   bool is_marked_dependent() const         { return _is_marked_dependent; }
@@ -459,9 +504,30 @@
   void set_protection_domain(oop pd)       { oop_store((oop*) &_protection_domain, pd); }
 
   // host class
-  oop host_klass() const                   { return _host_klass; }
-  void set_host_klass(oop host)            { oop_store((oop*) &_host_klass, host); }
-  bool is_anonymous() const                { return _host_klass != NULL; }
+  oop host_klass() const                   {
+    oop* hk = adr_host_klass();
+    if (hk == NULL) {
+      return NULL;
+    } else {
+      return *hk;
+    }
+  }
+  void set_host_klass(oop host)            {
+    assert(is_anonymous(), "not anonymous");
+    oop* addr = adr_host_klass();
+    assert(addr != NULL, "no reversed space");
+    oop_store(addr, host);
+  }
+  bool is_anonymous() const                {
+    return (_misc_flags & _misc_is_anonymous) != 0;
+  }
+  void set_is_anonymous(bool value)        {
+    if (value) {
+      _misc_flags |= _misc_is_anonymous;
+    } else {
+      _misc_flags &= ~_misc_is_anonymous;
+    }
+  }
 
   // signers
   objArrayOop signers() const              { return _signers; }
@@ -535,11 +601,15 @@
   Symbol* generic_signature() const                   { return _generic_signature; }
   void set_generic_signature(Symbol* sig)             { _generic_signature = sig; }
 
-  u2 enclosing_method_class_index() const             { return _enclosing_method_class_index; }
-  u2 enclosing_method_method_index() const            { return _enclosing_method_method_index; }
+  u2 enclosing_method_data(int offset);
+  u2 enclosing_method_class_index() {
+    return enclosing_method_data(enclosing_method_class_index_offset);
+  }
+  u2 enclosing_method_method_index() {
+    return enclosing_method_data(enclosing_method_method_index_offset);
+  }
   void set_enclosing_method_indices(u2 class_index,
-                                    u2 method_index)  { _enclosing_method_class_index  = class_index;
-                                                        _enclosing_method_method_index = method_index; }
+                                    u2 method_index);
 
   // jmethodID support
   static jmethodID get_jmethod_id(instanceKlassHandle ik_h,
@@ -628,19 +698,40 @@
 
   // support for stub routines
   static ByteSize init_state_offset()  { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_state)); }
+  TRACE_DEFINE_OFFSET;
   static ByteSize init_thread_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_thread)); }
 
   // subclass/subinterface checks
   bool implements_interface(klassOop k) const;
 
-  // Access to implementors of an interface. We only store the count
-  // of implementors, and in case, there are only a few
-  // implementors, we store them in a short list.
-  // This accessor returns NULL if we walk off the end of the list.
-  klassOop implementor(int i) const {
-    return (i < implementors_limit)? _implementors[i]: (klassOop) NULL;
+  // Access to the implementor of an interface.
+  klassOop implementor() const
+  {
+    klassOop* k = (klassOop*)adr_implementor();
+    if (k == NULL) {
+      return NULL;
+    } else {
+      return *k;
+    }
   }
-  int  nof_implementors() const       { return _nof_implementors; }
+
+  void set_implementor(klassOop k) {
+    assert(is_interface(), "not interface");
+    oop* addr = adr_implementor();
+    oop_store_without_check(addr, k);
+  }
+
+  int  nof_implementors() const       {
+    klassOop k = implementor();
+    if (k == NULL) {
+      return 0;
+    } else if (k != this->as_klassOop()) {
+      return 1;
+    } else {
+      return 2;
+    }
+  }
+
   void add_implementor(klassOop k);  // k is a new class that implements this interface
   void init_implementor();           // initialize
 
@@ -677,7 +768,17 @@
 
   // Sizing (in words)
   static int header_size()            { return align_object_offset(oopDesc::header_size() + sizeof(instanceKlass)/HeapWordSize); }
-  int object_size() const             { return object_size(align_object_offset(vtable_length()) + align_object_offset(itable_length()) + nonstatic_oop_map_size()); }
+
+  int object_size() const
+  {
+    return object_size(align_object_offset(vtable_length()) +
+                       align_object_offset(itable_length()) +
+                       ((is_interface() || is_anonymous()) ?
+                         align_object_offset(nonstatic_oop_map_size()) :
+                         nonstatic_oop_map_size()) +
+                       (is_interface() ? (int)sizeof(klassOop)/HeapWordSize : 0) +
+                       (is_anonymous() ? (int)sizeof(klassOop)/HeapWordSize : 0));
+  }
   static int vtable_start_offset()    { return header_size(); }
   static int vtable_length_offset()   { return oopDesc::header_size() + offset_of(instanceKlass, _vtable_len) / HeapWordSize; }
   static int object_size(int extra)   { return align_object_size(header_size() + extra); }
@@ -694,6 +795,29 @@
     return (OopMapBlock*)(start_of_itable() + align_object_offset(itable_length()));
   }
 
+  oop* adr_implementor() const {
+    if (is_interface()) {
+      return (oop*)(start_of_nonstatic_oop_maps() +
+                    nonstatic_oop_map_count());
+    } else {
+      return NULL;
+    }
+  };
+
+  oop* adr_host_klass() const {
+    if (is_anonymous()) {
+      oop* adr_impl = adr_implementor();
+      if (adr_impl != NULL) {
+        return adr_impl + 1;
+      } else {
+        return (oop*)(start_of_nonstatic_oop_maps() +
+                      nonstatic_oop_map_count());
+      }
+    } else {
+      return NULL;
+    }
+  }
+
   // Allocation profiling support
   juint alloc_size() const            { return _alloc_count * size_helper(); }
   void set_alloc_size(juint n)        {}
@@ -767,7 +891,7 @@
 #else
   void set_init_state(ClassState state) { _init_state = (u1)state; }
 #endif
-  void set_rewritten()                  { _rewritten = true; }
+  void set_rewritten()                  { _misc_flags |= _misc_rewritten; }
   void set_init_thread(Thread *thread)  { _init_thread = thread; }
 
   u2 idnum_allocated_count() const      { return _idnum_allocated_count; }
@@ -800,10 +924,8 @@
   oop* adr_constants() const         { return (oop*)&this->_constants;}
   oop* adr_class_loader() const      { return (oop*)&this->_class_loader;}
   oop* adr_protection_domain() const { return (oop*)&this->_protection_domain;}
-  oop* adr_host_klass() const        { return (oop*)&this->_host_klass;}
   oop* adr_signers() const           { return (oop*)&this->_signers;}
   oop* adr_inner_classes() const     { return (oop*)&this->_inner_classes;}
-  oop* adr_implementors() const      { return (oop*)&this->_implementors[0];}
   oop* adr_methods_jmethod_ids() const             { return (oop*)&this->_methods_jmethod_ids;}
   oop* adr_methods_cached_itable_indices() const   { return (oop*)&this->_methods_cached_itable_indices;}
   oop* adr_class_annotations() const   { return (oop*)&this->_class_annotations;}
@@ -888,7 +1010,7 @@
 
 
 /* JNIid class for jfieldIDs only */
-class JNIid: public CHeapObj {
+class JNIid: public CHeapObj<mtClass> {
   friend class VMStructs;
  private:
   klassOop           _holder;
@@ -939,7 +1061,7 @@
 // reference must be used because a weak reference would be seen as
 // collectible. A GrowableArray of PreviousVersionNodes is attached
 // to the instanceKlass as needed. See PreviousVersionWalker below.
-class PreviousVersionNode : public CHeapObj {
+class PreviousVersionNode : public CHeapObj<mtClass> {
  private:
   // A shared ConstantPool is never collected so we'll always have
   // a reference to it so we can update items in the cache. We'll
@@ -1034,7 +1156,7 @@
 // noticed since an nmethod should be removed as many times are it's
 // added.
 //
-class nmethodBucket: public CHeapObj {
+class nmethodBucket: public CHeapObj<mtClass> {
   friend class VMStructs;
  private:
   nmethod*       _nmethod;
@@ -1055,4 +1177,83 @@
   nmethod* get_nmethod()                  { return _nmethod; }
 };
 
+// An iterator that's used to access the inner classes indices in the
+// instanceKlass::_inner_classes array.
+class InnerClassesIterator : public StackObj {
+ private:
+  typeArrayHandle _inner_classes;
+  int _length;
+  int _idx;
+ public:
+
+  InnerClassesIterator(instanceKlassHandle k) {
+    _inner_classes = k->inner_classes();
+    if (k->inner_classes() != NULL) {
+      _length = _inner_classes->length();
+      // The inner class array's length should be the multiple of
+      // inner_class_next_offset if it only contains the InnerClasses
+      // attribute data, or it should be
+      // n*inner_class_next_offset+enclosing_method_attribute_size
+      // if it also contains the EnclosingMethod data.
+      assert((_length % instanceKlass::inner_class_next_offset == 0 ||
+              _length % instanceKlass::inner_class_next_offset == instanceKlass::enclosing_method_attribute_size),
+             "just checking");
+      // Remove the enclosing_method portion if exists.
+      if (_length % instanceKlass::inner_class_next_offset == instanceKlass::enclosing_method_attribute_size) {
+        _length -= instanceKlass::enclosing_method_attribute_size;
+      }
+    } else {
+      _length = 0;
+    }
+    _idx = 0;
+  }
+
+  int length() const {
+    return _length;
+  }
+
+  void next() {
+    _idx += instanceKlass::inner_class_next_offset;
+  }
+
+  bool done() const {
+    return (_idx >= _length);
+  }
+
+  u2 inner_class_info_index() const {
+    return _inner_classes->ushort_at(
+               _idx + instanceKlass::inner_class_inner_class_info_offset);
+  }
+
+  void set_inner_class_info_index(u2 index) {
+    _inner_classes->ushort_at_put(
+               _idx + instanceKlass::inner_class_inner_class_info_offset, index);
+  }
+
+  u2 outer_class_info_index() const {
+    return _inner_classes->ushort_at(
+               _idx + instanceKlass::inner_class_outer_class_info_offset);
+  }
+
+  void set_outer_class_info_index(u2 index) {
+    _inner_classes->ushort_at_put(
+               _idx + instanceKlass::inner_class_outer_class_info_offset, index);
+  }
+
+  u2 inner_name_index() const {
+    return _inner_classes->ushort_at(
+               _idx + instanceKlass::inner_class_inner_name_offset);
+  }
+
+  void set_inner_name_index(u2 index) {
+    _inner_classes->ushort_at_put(
+               _idx + instanceKlass::inner_class_inner_name_offset, index);
+  }
+
+  u2 inner_access_flags() const {
+    return _inner_classes->ushort_at(
+               _idx + instanceKlass::inner_class_access_flags_offset);
+  }
+};
+
 #endif // SHARE_VM_OOPS_INSTANCEKLASS_HPP
--- a/src/share/vm/oops/instanceKlassKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/instanceKlassKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -103,7 +103,9 @@
   MarkSweep::mark_and_push(ik->adr_class_loader());
   MarkSweep::mark_and_push(ik->adr_inner_classes());
   MarkSweep::mark_and_push(ik->adr_protection_domain());
-  MarkSweep::mark_and_push(ik->adr_host_klass());
+  if (ik->adr_host_klass() != NULL) {
+    MarkSweep::mark_and_push(ik->adr_host_klass());
+  }
   MarkSweep::mark_and_push(ik->adr_signers());
   MarkSweep::mark_and_push(ik->adr_class_annotations());
   MarkSweep::mark_and_push(ik->adr_fields_annotations());
@@ -111,7 +113,7 @@
   MarkSweep::mark_and_push(ik->adr_methods_parameter_annotations());
   MarkSweep::mark_and_push(ik->adr_methods_default_annotations());
 
-  // We do not follow adr_implementors() here. It is followed later
+  // We do not follow adr_implementor() here. It is followed later
   // in instanceKlass::follow_weak_klass_links()
 
   klassKlass::oop_follow_contents(obj);
@@ -139,7 +141,9 @@
   PSParallelCompact::mark_and_push(cm, ik->adr_class_loader());
   PSParallelCompact::mark_and_push(cm, ik->adr_inner_classes());
   PSParallelCompact::mark_and_push(cm, ik->adr_protection_domain());
-  PSParallelCompact::mark_and_push(cm, ik->adr_host_klass());
+  if (ik->adr_host_klass() != NULL) {
+    PSParallelCompact::mark_and_push(cm, ik->adr_host_klass());
+  }
   PSParallelCompact::mark_and_push(cm, ik->adr_signers());
   PSParallelCompact::mark_and_push(cm, ik->adr_class_annotations());
   PSParallelCompact::mark_and_push(cm, ik->adr_fields_annotations());
@@ -177,11 +181,13 @@
   blk->do_oop(ik->adr_constants());
   blk->do_oop(ik->adr_class_loader());
   blk->do_oop(ik->adr_protection_domain());
-  blk->do_oop(ik->adr_host_klass());
+  if (ik->adr_host_klass() != NULL) {
+    blk->do_oop(ik->adr_host_klass());
+  }
   blk->do_oop(ik->adr_signers());
   blk->do_oop(ik->adr_inner_classes());
-  for (int i = 0; i < instanceKlass::implementors_limit; i++) {
-    blk->do_oop(&ik->adr_implementors()[i]);
+  if (ik->adr_implementor() != NULL) {
+    blk->do_oop(ik->adr_implementor());
   }
   blk->do_oop(ik->adr_class_annotations());
   blk->do_oop(ik->adr_fields_annotations());
@@ -227,15 +233,13 @@
   adr = ik->adr_protection_domain();
   if (mr.contains(adr)) blk->do_oop(adr);
   adr = ik->adr_host_klass();
-  if (mr.contains(adr)) blk->do_oop(adr);
+  if (adr != NULL && mr.contains(adr)) blk->do_oop(adr);
   adr = ik->adr_signers();
   if (mr.contains(adr)) blk->do_oop(adr);
   adr = ik->adr_inner_classes();
   if (mr.contains(adr)) blk->do_oop(adr);
-  adr = ik->adr_implementors();
-  for (int i = 0; i < instanceKlass::implementors_limit; i++) {
-    if (mr.contains(&adr[i])) blk->do_oop(&adr[i]);
-  }
+  adr = ik->adr_implementor();
+  if (adr != NULL && mr.contains(adr)) blk->do_oop(adr);
   adr = ik->adr_class_annotations();
   if (mr.contains(adr)) blk->do_oop(adr);
   adr = ik->adr_fields_annotations();
@@ -270,11 +274,13 @@
   MarkSweep::adjust_pointer(ik->adr_constants());
   MarkSweep::adjust_pointer(ik->adr_class_loader());
   MarkSweep::adjust_pointer(ik->adr_protection_domain());
-  MarkSweep::adjust_pointer(ik->adr_host_klass());
+  if (ik->adr_host_klass() != NULL) {
+    MarkSweep::adjust_pointer(ik->adr_host_klass());
+  }
   MarkSweep::adjust_pointer(ik->adr_signers());
   MarkSweep::adjust_pointer(ik->adr_inner_classes());
-  for (int i = 0; i < instanceKlass::implementors_limit; i++) {
-    MarkSweep::adjust_pointer(&ik->adr_implementors()[i]);
+  if (ik->adr_implementor() != NULL) {
+    MarkSweep::adjust_pointer(ik->adr_implementor());
   }
   MarkSweep::adjust_pointer(ik->adr_class_annotations());
   MarkSweep::adjust_pointer(ik->adr_fields_annotations());
@@ -302,7 +308,7 @@
   }
 
   oop* hk_addr = ik->adr_host_klass();
-  if (PSScavenge::should_scavenge(hk_addr)) {
+  if (hk_addr != NULL && PSScavenge::should_scavenge(hk_addr)) {
     pm->claim_or_forward_depth(hk_addr);
   }
 
@@ -328,6 +334,13 @@
   for (oop* cur_oop = beg_oop; cur_oop < end_oop; ++cur_oop) {
     PSParallelCompact::adjust_pointer(cur_oop);
   }
+  // embedded oops
+  if (ik->adr_implementor() != NULL) {
+    PSParallelCompact::adjust_pointer(ik->adr_implementor());
+  }
+  if (ik->adr_host_klass() != NULL) {
+    PSParallelCompact::adjust_pointer(ik->adr_host_klass());
+  }
 
   OopClosure* closure = PSParallelCompact::adjust_root_pointer_closure();
   iterate_c_heap_oops(ik, closure);
@@ -342,11 +355,25 @@
 instanceKlassKlass::allocate_instance_klass(Symbol* name, int vtable_len, int itable_len,
                                             int static_field_size,
                                             unsigned nonstatic_oop_map_count,
-                                            ReferenceType rt, TRAPS) {
+                                            AccessFlags access_flags,
+                                            ReferenceType rt,
+                                            KlassHandle host_klass, TRAPS) {
 
   const int nonstatic_oop_map_size =
     instanceKlass::nonstatic_oop_map_size(nonstatic_oop_map_count);
-  int size = instanceKlass::object_size(align_object_offset(vtable_len) + align_object_offset(itable_len) + nonstatic_oop_map_size);
+  int size = align_object_offset(vtable_len) + align_object_offset(itable_len);
+  if (access_flags.is_interface() || !host_klass.is_null()) {
+    size += align_object_offset(nonstatic_oop_map_size);
+  } else {
+    size += nonstatic_oop_map_size;
+  }
+  if (access_flags.is_interface()) {
+    size += (int)sizeof(klassOop)/HeapWordSize;
+  }
+  if (!host_klass.is_null()) {
+    size += (int)sizeof(klassOop)/HeapWordSize;
+  }
+  size = instanceKlass::object_size(size);
 
   // Allocation
   KlassHandle h_this_klass(THREAD, as_klassOop());
@@ -378,6 +405,8 @@
     ik->set_itable_length(itable_len);
     ik->set_static_field_size(static_field_size);
     ik->set_nonstatic_oop_map_size(nonstatic_oop_map_size);
+    ik->set_access_flags(access_flags);
+    ik->set_is_anonymous(!host_klass.is_null());
     assert(k()->size() == size, "wrong size for object");
 
     ik->set_array_klasses(NULL);
@@ -390,7 +419,6 @@
     ik->set_constants(NULL);
     ik->set_class_loader(NULL);
     ik->set_protection_domain(NULL);
-    ik->set_host_klass(NULL);
     ik->set_signers(NULL);
     ik->set_source_file_name(NULL);
     ik->set_source_debug_extension(NULL, 0);
@@ -415,7 +443,6 @@
     ik->set_methods_annotations(NULL);
     ik->set_methods_parameter_annotations(NULL);
     ik->set_methods_default_annotations(NULL);
-    ik->set_enclosing_method_indices(0, 0);
     ik->set_jvmti_cached_class_field_map(NULL);
     ik->set_initial_method_idnum(0);
     assert(k()->is_parsable(), "should be parsable here.");
@@ -470,16 +497,12 @@
 
   if (ik->is_interface()) {
     st->print_cr(BULLET"nof implementors:  %d", ik->nof_implementors());
-    int print_impl = 0;
-    for (int i = 0; i < instanceKlass::implementors_limit; i++) {
-      if (ik->implementor(i) != NULL) {
-        if (++print_impl == 1)
-          st->print_cr(BULLET"implementor:    ");
-        st->print("   ");
-        ik->implementor(i)->print_value_on(st);
-      }
+    if (ik->nof_implementors() == 1) {
+      st->print_cr(BULLET"implementor:    ");
+      st->print("   ");
+      ik->implementor()->print_value_on(st);
+      st->cr();
     }
-    if (print_impl > 0)  st->cr();
   }
 
   st->print(BULLET"arrays:            "); ik->array_klasses()->print_value_on(st);     st->cr();
@@ -496,7 +519,9 @@
   st->print(BULLET"constants:         "); ik->constants()->print_value_on(st);         st->cr();
   st->print(BULLET"class loader:      "); ik->class_loader()->print_value_on(st);      st->cr();
   st->print(BULLET"protection domain: "); ik->protection_domain()->print_value_on(st); st->cr();
-  st->print(BULLET"host class:        "); ik->host_klass()->print_value_on(st);        st->cr();
+  if (ik->host_klass() != NULL) {
+    st->print(BULLET"host class:        "); ik->host_klass()->print_value_on(st);        st->cr();
+  }
   st->print(BULLET"signers:           "); ik->signers()->print_value_on(st);           st->cr();
   if (ik->source_file_name() != NULL) {
     st->print(BULLET"source file:       ");
@@ -640,16 +665,12 @@
     }
 
     // Verify implementor fields
-    bool saw_null_impl = false;
-    for (int i = 0; i < instanceKlass::implementors_limit; i++) {
-      klassOop im = ik->implementor(i);
-      if (im == NULL) { saw_null_impl = true; continue; }
-      guarantee(!saw_null_impl, "non-nulls must preceded all nulls");
+    klassOop im = ik->implementor();
+    if (im != NULL) {
       guarantee(ik->is_interface(), "only interfaces should have implementor set");
-      guarantee(i < ik->nof_implementors(), "should only have one implementor");
       guarantee(im->is_perm(),  "should be in permspace");
       guarantee(im->is_klass(), "should be klass");
-      guarantee(!Klass::cast(klassOop(im))->is_interface(), "implementors cannot be interfaces");
+      guarantee(!Klass::cast(klassOop(im))->is_interface() || im == ik->as_klassOop(), "implementors cannot be interfaces");
     }
 
     // Verify local interfaces
--- a/src/share/vm/oops/instanceKlassKlass.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/instanceKlassKlass.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,9 @@
                                    int itable_len,
                                    int static_field_size,
                                    unsigned int nonstatic_oop_map_count,
+                                   AccessFlags access_flags,
                                    ReferenceType rt,
+                                   KlassHandle host_klass,
                                    TRAPS);
 
   // Casting from klassOop
--- a/src/share/vm/oops/instanceRefKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/instanceRefKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -497,36 +497,12 @@
 
   if (referent != NULL) {
     guarantee(referent->is_oop(), "referent field heap failed");
-    if (gch != NULL && !gch->is_in_young(obj)) {
-      // We do a specific remembered set check here since the referent
-      // field is not part of the oop mask and therefore skipped by the
-      // regular verify code.
-      if (UseCompressedOops) {
-        narrowOop* referent_addr = (narrowOop*)java_lang_ref_Reference::referent_addr(obj);
-        obj->verify_old_oop(referent_addr, true);
-      } else {
-        oop* referent_addr = (oop*)java_lang_ref_Reference::referent_addr(obj);
-        obj->verify_old_oop(referent_addr, true);
-      }
-    }
   }
   // Verify next field
   oop next = java_lang_ref_Reference::next(obj);
   if (next != NULL) {
     guarantee(next->is_oop(), "next field verify failed");
     guarantee(next->is_instanceRef(), "next field verify failed");
-    if (gch != NULL && !gch->is_in_young(obj)) {
-      // We do a specific remembered set check here since the next field is
-      // not part of the oop mask and therefore skipped by the regular
-      // verify code.
-      if (UseCompressedOops) {
-        narrowOop* next_addr = (narrowOop*)java_lang_ref_Reference::next_addr(obj);
-        obj->verify_old_oop(next_addr, true);
-      } else {
-        oop* next_addr = (oop*)java_lang_ref_Reference::next_addr(obj);
-        obj->verify_old_oop(next_addr, true);
-      }
-    }
   }
 }
 
@@ -539,6 +515,12 @@
 void instanceRefKlass::acquire_pending_list_lock(BasicLock *pending_list_basic_lock) {
   // we may enter this with pending exception set
   PRESERVE_EXCEPTION_MARK;  // exceptions are never thrown, needed for TRAPS argument
+
+  // Create a HandleMark in case we retry a GC multiple times.
+  // Each time we attempt the GC, we allocate the handle below
+  // to hold the pending list lock. We want to free this handle.
+  HandleMark hm;
+
   Handle h_lock(THREAD, java_lang_ref_Reference::pending_list_lock());
   ObjectSynchronizer::fast_enter(h_lock, pending_list_basic_lock, false, THREAD);
   assert(ObjectSynchronizer::current_thread_holds_lock(
@@ -551,7 +533,12 @@
   BasicLock *pending_list_basic_lock) {
   // we may enter this with pending exception set
   PRESERVE_EXCEPTION_MARK;  // exceptions are never thrown, needed for TRAPS argument
-  //
+
+  // Create a HandleMark in case we retry a GC multiple times.
+  // Each time we attempt the GC, we allocate the handle below
+  // to hold the pending list lock. We want to free this handle.
+  HandleMark hm;
+
   Handle h_lock(THREAD, java_lang_ref_Reference::pending_list_lock());
   assert(ObjectSynchronizer::current_thread_holds_lock(
            JavaThread::current(), h_lock),
--- a/src/share/vm/oops/klass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/klass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -174,10 +174,9 @@
 }
 
 void Klass_vtbl::post_new_init_klass(KlassHandle& klass,
-                                     klassOop new_klass,
-                                     int size) const {
+                                     klassOop new_klass) const {
   assert(!new_klass->klass_part()->null_vtbl(), "Not a complete klass");
-  CollectedHeap::post_allocation_install_obj_klass(klass, new_klass, size);
+  CollectedHeap::post_allocation_install_obj_klass(klass, new_klass);
 }
 
 void* Klass_vtbl::operator new(size_t ignored, KlassHandle& klass,
@@ -582,14 +581,6 @@
   guarantee(obj->klass()->is_klass(), "klass field is not a klass");
 }
 
-
-void Klass::oop_verify_old_oop(oop obj, oop* p, bool allow_dirty) {
-  /* $$$ I think this functionality should be handled by verification of
-  RememberedSet::verify_old_oop(obj, p, allow_dirty, false);
-  the card table. */
-}
-void Klass::oop_verify_old_oop(oop obj, narrowOop* p, bool allow_dirty) { }
-
 #ifndef PRODUCT
 
 void Klass::verify_vtable_index(int i) {
--- a/src/share/vm/oops/klass.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/klass.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -149,7 +149,7 @@
   // by the shared "base_create" subroutines.
   //
   virtual void* allocate_permanent(KlassHandle& klass, int size, TRAPS) const = 0;
-  void post_new_init_klass(KlassHandle& klass, klassOop obj, int size) const;
+  void post_new_init_klass(KlassHandle& klass, klassOop obj) const;
 
   // Every subclass on which vtbl_value is called must include this macro.
   // Delay the installation of the klassKlass pointer until after the
@@ -160,7 +160,7 @@
     if (HAS_PENDING_EXCEPTION) return NULL;                                   \
     klassOop new_klass = ((Klass*) result)->as_klassOop();                    \
     OrderAccess::storestore();                                                \
-    post_new_init_klass(klass_klass, new_klass, size);                        \
+    post_new_init_klass(klass_klass, new_klass);                              \
     return result;                                                            \
   }
 
@@ -805,8 +805,6 @@
   // Verification
   virtual const char* internal_name() const = 0;
   virtual void oop_verify_on(oop obj, outputStream* st);
-  virtual void oop_verify_old_oop(oop obj, oop* p, bool allow_dirty);
-  virtual void oop_verify_old_oop(oop obj, narrowOop* p, bool allow_dirty);
   // tells whether obj is partially constructed (gc during class loading)
   virtual bool oop_partially_loaded(oop obj) const { return false; }
   virtual void oop_set_partially_loaded(oop obj) {};
--- a/src/share/vm/oops/methodKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/methodKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -112,11 +112,6 @@
 
   assert(m->is_parsable(), "must be parsable here.");
   assert(m->size() == size, "wrong size for object");
-  // We should not publish an uprasable object's reference
-  // into one that is parsable, since that presents problems
-  // for the concurrent parallel marking and precleaning phases
-  // of concurrent gc (CMS).
-  xconst->set_method(m);
   return m;
 }
 
@@ -127,7 +122,6 @@
   // Performance tweak: We skip iterating over the klass pointer since we
   // know that Universe::methodKlassObj never moves.
   MarkSweep::mark_and_push(m->adr_constMethod());
-  MarkSweep::mark_and_push(m->adr_constants());
   if (m->method_data() != NULL) {
     MarkSweep::mark_and_push(m->adr_method_data());
   }
@@ -141,7 +135,6 @@
   // Performance tweak: We skip iterating over the klass pointer since we
   // know that Universe::methodKlassObj never moves.
   PSParallelCompact::mark_and_push(cm, m->adr_constMethod());
-  PSParallelCompact::mark_and_push(cm, m->adr_constants());
 #ifdef COMPILER2
   if (m->method_data() != NULL) {
     PSParallelCompact::mark_and_push(cm, m->adr_method_data());
@@ -159,7 +152,6 @@
   // Performance tweak: We skip iterating over the klass pointer since we
   // know that Universe::methodKlassObj never moves
   blk->do_oop(m->adr_constMethod());
-  blk->do_oop(m->adr_constants());
   if (m->method_data() != NULL) {
     blk->do_oop(m->adr_method_data());
   }
@@ -178,8 +170,6 @@
   oop* adr;
   adr = m->adr_constMethod();
   if (mr.contains(adr)) blk->do_oop(adr);
-  adr = m->adr_constants();
-  if (mr.contains(adr)) blk->do_oop(adr);
   if (m->method_data() != NULL) {
     adr = m->adr_method_data();
     if (mr.contains(adr)) blk->do_oop(adr);
@@ -197,7 +187,6 @@
   // Performance tweak: We skip iterating over the klass pointer since we
   // know that Universe::methodKlassObj never moves.
   MarkSweep::adjust_pointer(m->adr_constMethod());
-  MarkSweep::adjust_pointer(m->adr_constants());
   if (m->method_data() != NULL) {
     MarkSweep::adjust_pointer(m->adr_method_data());
   }
@@ -213,7 +202,6 @@
   assert(obj->is_method(), "should be method");
   methodOop m = methodOop(obj);
   PSParallelCompact::adjust_pointer(m->adr_constMethod());
-  PSParallelCompact::adjust_pointer(m->adr_constants());
 #ifdef COMPILER2
   if (m->method_data() != NULL) {
     PSParallelCompact::adjust_pointer(m->adr_method_data());
@@ -250,7 +238,11 @@
     st->print_cr(" - highest level:     %d", m->highest_comp_level());
   st->print_cr(" - vtable index:      %d",   m->_vtable_index);
   st->print_cr(" - i2i entry:         " INTPTR_FORMAT, m->interpreter_entry());
-  st->print_cr(" - adapter:           " INTPTR_FORMAT, m->adapter());
+  st->print(   " - adapters:          ");
+  if (m->adapter() == NULL)
+    st->print_cr(INTPTR_FORMAT, m->adapter());
+  else
+    m->adapter()->print_adapter_on(st);
   st->print_cr(" - compiled entry     " INTPTR_FORMAT, m->from_compiled_entry());
   st->print_cr(" - code size:         %d",   m->code_size());
   if (m->code_size() != 0) {
@@ -298,13 +290,8 @@
   if (m->code() != NULL) {
     st->print   (" - compiled code: ");
     m->code()->print_value_on(st);
-    st->cr();
   }
-  if (m->is_method_handle_invoke()) {
-    st->print_cr(" - invoke method type: " INTPTR_FORMAT, (address) m->method_handle_type());
-    // m is classified as native, but it does not have an interesting
-    // native_function or signature handler
-  } else if (m->is_native()) {
+  if (m->is_native()) {
     st->print_cr(" - native function:   " INTPTR_FORMAT, m->native_function());
     st->print_cr(" - signature handler: " INTPTR_FORMAT, m->signature_handler());
   }
@@ -339,8 +326,6 @@
   if (!obj->partially_loaded()) {
     methodOop m = methodOop(obj);
     guarantee(m->is_perm(),  "should be in permspace");
-    guarantee(m->constants()->is_perm(), "should be in permspace");
-    guarantee(m->constants()->is_constantPool(), "should be constant pool");
     guarantee(m->constMethod()->is_constMethod(), "should be constMethodOop");
     guarantee(m->constMethod()->is_perm(), "should be in permspace");
     methodDataOop method_data = m->method_data();
--- a/src/share/vm/oops/methodOop.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/methodOop.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
 #include "oops/oop.inline.hpp"
 #include "oops/symbol.hpp"
 #include "prims/jvmtiExport.hpp"
-#include "prims/methodHandleWalk.hpp"
+#include "prims/methodHandles.hpp"
 #include "prims/nativeLookup.hpp"
 #include "runtime/arguments.hpp"
 #include "runtime/compilationPolicy.hpp"
@@ -111,25 +111,21 @@
 
 int  methodOopDesc::fast_exception_handler_bci_for(KlassHandle ex_klass, int throw_bci, TRAPS) {
   // exception table holds quadruple entries of the form (beg_bci, end_bci, handler_bci, klass_index)
-  const int beg_bci_offset     = 0;
-  const int end_bci_offset     = 1;
-  const int handler_bci_offset = 2;
-  const int klass_index_offset = 3;
-  const int entry_size         = 4;
   // access exception table
-  typeArrayHandle table (THREAD, constMethod()->exception_table());
-  int length = table->length();
-  assert(length % entry_size == 0, "exception table format has changed");
+  ExceptionTable table(this);
+  int length = table.length();
   // iterate through all entries sequentially
   constantPoolHandle pool(THREAD, constants());
-  for (int i = 0; i < length; i += entry_size) {
-    int beg_bci = table->int_at(i + beg_bci_offset);
-    int end_bci = table->int_at(i + end_bci_offset);
+  for (int i = 0; i < length; i ++) {
+    //reacquire the table in case a GC happened
+    ExceptionTable table(this);
+    int beg_bci = table.start_pc(i);
+    int end_bci = table.end_pc(i);
     assert(beg_bci <= end_bci, "inconsistent exception table");
     if (beg_bci <= throw_bci && throw_bci < end_bci) {
       // exception handler bci range covers throw_bci => investigate further
-      int handler_bci = table->int_at(i + handler_bci_offset);
-      int klass_index = table->int_at(i + klass_index_offset);
+      int handler_bci = table.handler_pc(i);
+      int klass_index = table.catch_type_index(i);
       if (klass_index == 0) {
         return handler_bci;
       } else if (ex_klass.is_null()) {
@@ -532,9 +528,9 @@
 
 
 bool methodOopDesc::is_klass_loaded_by_klass_index(int klass_index) const {
-  if( _constants->tag_at(klass_index).is_unresolved_klass() ) {
+  if( constants()->tag_at(klass_index).is_unresolved_klass() ) {
     Thread *thread = Thread::current();
-    Symbol* klass_name = _constants->klass_name_at(klass_index);
+    Symbol* klass_name = constants()->klass_name_at(klass_index);
     Handle loader(thread, instanceKlass::cast(method_holder())->class_loader());
     Handle prot  (thread, Klass::cast(method_holder())->protection_domain());
     return SystemDictionary::find(klass_name, loader, prot, thread) != NULL;
@@ -545,7 +541,7 @@
 
 
 bool methodOopDesc::is_klass_loaded(int refinfo_index, bool must_be_resolved) const {
-  int klass_index = _constants->klass_ref_index_at(refinfo_index);
+  int klass_index = constants()->klass_ref_index_at(refinfo_index);
   if (must_be_resolved) {
     // Make sure klass is resolved in constantpool.
     if (constants()->tag_at(klass_index).is_unresolved_klass()) return false;
@@ -556,6 +552,7 @@
 
 void methodOopDesc::set_native_function(address function, bool post_event_flag) {
   assert(function != NULL, "use clear_native_function to unregister natives");
+  assert(!is_method_handle_intrinsic() || function == SharedRuntime::native_method_throw_unsatisfied_link_error_entry(), "");
   address* native_function = native_function_addr();
 
   // We can see racers trying to place the same native function into place. Once
@@ -585,12 +582,15 @@
 
 
 bool methodOopDesc::has_native_function() const {
+  if (is_method_handle_intrinsic())
+    return false;  // special-cased in SharedRuntime::generate_native_wrapper
   address func = native_function();
   return (func != NULL && func != SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
 }
 
 
 void methodOopDesc::clear_native_function() {
+  // Note: is_method_handle_intrinsic() is allowed here.
   set_native_function(
     SharedRuntime::native_method_throw_unsatisfied_link_error_entry(),
     !native_bind_event_is_interesting);
@@ -610,12 +610,11 @@
 
 
 bool methodOopDesc::is_not_compilable(int comp_level) const {
-  if (is_method_handle_invoke()) {
-    // compilers must recognize this method specially, or not at all
+  if (number_of_breakpoints() > 0) {
     return true;
   }
-  if (number_of_breakpoints() > 0) {
-    return true;
+  if (is_method_handle_intrinsic()) {
+    return !is_synthetic();  // the generated adapters must be compiled
   }
   if (comp_level == CompLevel_any) {
     return is_not_c1_compilable() || is_not_c2_compilable();
@@ -713,7 +712,7 @@
   assert(entry != NULL, "interpreter entry must be non-null");
   // Sets both _i2i_entry and _from_interpreted_entry
   set_interpreter_entry(entry);
-  if (is_native() && !is_method_handle_invoke()) {
+  if (is_native() && !is_method_handle_intrinsic()) {
     set_native_function(
       SharedRuntime::native_method_throw_unsatisfied_link_error_entry(),
       !native_bind_event_is_interesting);
@@ -801,13 +800,13 @@
   OrderAccess::storestore();
 #ifdef SHARK
   mh->_from_interpreted_entry = code->insts_begin();
-#else
+#else //!SHARK
   mh->_from_compiled_entry = code->verified_entry_point();
   OrderAccess::storestore();
   // Instantly compiled code can execute.
-  mh->_from_interpreted_entry = mh->get_i2c_entry();
-#endif // SHARK
-
+  if (!mh->is_method_handle_intrinsic())
+    mh->_from_interpreted_entry = mh->get_i2c_entry();
+#endif //!SHARK
 }
 
 
@@ -859,104 +858,51 @@
   return false;
 }
 
-bool methodOopDesc::is_method_handle_invoke_name(vmSymbols::SID name_sid) {
-  switch (name_sid) {
-  case vmSymbols::VM_SYMBOL_ENUM_NAME(invokeExact_name):
-  case vmSymbols::VM_SYMBOL_ENUM_NAME(invoke_name):
-    return true;
-  }
-  if (AllowInvokeGeneric
-      && name_sid == vmSymbols::VM_SYMBOL_ENUM_NAME(invokeGeneric_name))
-    return true;
-  return false;
-}
-
 // Constant pool structure for invoke methods:
 enum {
-  _imcp_invoke_name = 1,        // utf8: 'invokeExact' or 'invokeGeneric'
+  _imcp_invoke_name = 1,        // utf8: 'invokeExact', etc.
   _imcp_invoke_signature,       // utf8: (variable Symbol*)
-  _imcp_method_type_value,      // string: (variable java/lang/invoke/MethodType, sic)
   _imcp_limit
 };
 
-oop methodOopDesc::method_handle_type() const {
-  if (!is_method_handle_invoke()) { assert(false, "caller resp."); return NULL; }
-  oop mt = constants()->resolved_string_at(_imcp_method_type_value);
-  assert(mt->klass() == SystemDictionary::MethodType_klass(), "");
-  return mt;
-}
-
-jint* methodOopDesc::method_type_offsets_chain() {
-  static jint pchase[] = { -1, -1, -1 };
-  if (pchase[0] == -1) {
-    jint step0 = in_bytes(constants_offset());
-    jint step1 = (constantPoolOopDesc::header_size() + _imcp_method_type_value) * HeapWordSize;
-    // do this in reverse to avoid races:
-    OrderAccess::release_store(&pchase[1], step1);
-    OrderAccess::release_store(&pchase[0], step0);
-  }
-  return pchase;
+// Test if this method is an MH adapter frame generated by Java code.
+// Cf. java/lang/invoke/InvokerBytecodeGenerator
+bool methodOopDesc::is_compiled_lambda_form() const {
+  return intrinsic_id() == vmIntrinsics::_compiledLambdaForm;
 }
 
-//------------------------------------------------------------------------------
-// methodOopDesc::is_method_handle_adapter
-//
-// Tests if this method is an internal adapter frame from the
-// MethodHandleCompiler.
-// Must be consistent with MethodHandleCompiler::get_method_oop().
-bool methodOopDesc::is_method_handle_adapter() const {
-  if (is_synthetic() &&
-      !is_native() &&   // has code from MethodHandleCompiler
-      is_method_handle_invoke_name(name()) &&
-      MethodHandleCompiler::klass_is_method_handle_adapter_holder(method_holder())) {
-    assert(!is_method_handle_invoke(), "disjoint");
-    return true;
-  } else {
-    return false;
-  }
+// Test if this method is an internal MH primitive method.
+bool methodOopDesc::is_method_handle_intrinsic() const {
+  vmIntrinsics::ID iid = intrinsic_id();
+  return (MethodHandles::is_signature_polymorphic(iid) &&
+          MethodHandles::is_signature_polymorphic_intrinsic(iid));
 }
 
-methodHandle methodOopDesc::make_invoke_method(KlassHandle holder,
-                                               Symbol* name,
-                                               Symbol* signature,
-                                               Handle method_type, TRAPS) {
+bool methodOopDesc::has_member_arg() const {
+  vmIntrinsics::ID iid = intrinsic_id();
+  return (MethodHandles::is_signature_polymorphic(iid) &&
+          MethodHandles::has_member_arg(iid));
+}
+
+// Make an instance of a signature-polymorphic internal MH primitive.
+methodHandle methodOopDesc::make_method_handle_intrinsic(vmIntrinsics::ID iid,
+                                                         Symbol* signature,
+                                                         TRAPS) {
   ResourceMark rm;
   methodHandle empty;
 
-  assert(holder() == SystemDictionary::MethodHandle_klass(),
-         "must be a JSR 292 magic type");
-
+  KlassHandle holder = SystemDictionary::MethodHandle_klass();
+  Symbol* name = MethodHandles::signature_polymorphic_intrinsic_name(iid);
+  assert(iid == MethodHandles::signature_polymorphic_name_id(name), "");
   if (TraceMethodHandles) {
-    tty->print("Creating invoke method for ");
-    signature->print_value();
-    tty->cr();
+    tty->print_cr("make_method_handle_intrinsic MH.%s%s", name->as_C_string(), signature->as_C_string());
   }
 
   // invariant:   cp->symbol_at_put is preceded by a refcount increment (more usually a lookup)
   name->increment_refcount();
   signature->increment_refcount();
 
-  // record non-BCP method types in the constant pool
-  GrowableArray<KlassHandle>* extra_klasses = NULL;
-  for (int i = -1, len = java_lang_invoke_MethodType::ptype_count(method_type()); i < len; i++) {
-    oop ptype = (i == -1
-                 ? java_lang_invoke_MethodType::rtype(method_type())
-                 : java_lang_invoke_MethodType::ptype(method_type(), i));
-    klassOop klass = check_non_bcp_klass(java_lang_Class::as_klassOop(ptype));
-    if (klass != NULL) {
-      if (extra_klasses == NULL)
-        extra_klasses = new GrowableArray<KlassHandle>(len+1);
-      bool dup = false;
-      for (int j = 0; j < extra_klasses->length(); j++) {
-        if (extra_klasses->at(j) == klass) { dup = true; break; }
-      }
-      if (!dup)
-        extra_klasses->append(KlassHandle(THREAD, klass));
-    }
-  }
-
-  int extra_klass_count = (extra_klasses == NULL ? 0 : extra_klasses->length());
-  int cp_length = _imcp_limit + extra_klass_count;
+  int cp_length = _imcp_limit;
   constantPoolHandle cp;
   {
     constantPoolOop cp_oop = oopFactory::new_constantPool(cp_length, IsSafeConc, CHECK_(empty));
@@ -964,54 +910,44 @@
   }
   cp->symbol_at_put(_imcp_invoke_name,       name);
   cp->symbol_at_put(_imcp_invoke_signature,  signature);
-  cp->string_at_put(_imcp_method_type_value, Universe::the_null_string());
-  for (int j = 0; j < extra_klass_count; j++) {
-    KlassHandle klass = extra_klasses->at(j);
-    cp->klass_at_put(_imcp_limit + j, klass());
-  }
   cp->set_preresolution();
   cp->set_pool_holder(holder());
 
-  // set up the fancy stuff:
-  cp->pseudo_string_at_put(_imcp_method_type_value, method_type());
+  // decide on access bits:  public or not?
+  int flags_bits = (JVM_ACC_NATIVE | JVM_ACC_SYNTHETIC | JVM_ACC_FINAL);
+  bool must_be_static = MethodHandles::is_signature_polymorphic_static(iid);
+  if (must_be_static)  flags_bits |= JVM_ACC_STATIC;
+  assert((flags_bits & JVM_ACC_PUBLIC) == 0, "do not expose these methods");
+
   methodHandle m;
   {
-    int flags_bits = (JVM_MH_INVOKE_BITS | JVM_ACC_PUBLIC | JVM_ACC_FINAL);
     methodOop m_oop = oopFactory::new_method(0, accessFlags_from(flags_bits),
-                                             0, 0, 0, IsSafeConc, CHECK_(empty));
+                                             0, 0, 0, 0, IsSafeConc, CHECK_(empty));
     m = methodHandle(THREAD, m_oop);
   }
   m->set_constants(cp());
   m->set_name_index(_imcp_invoke_name);
   m->set_signature_index(_imcp_invoke_signature);
-  assert(is_method_handle_invoke_name(m->name()), "");
+  assert(MethodHandles::is_signature_polymorphic_name(m->name()), "");
   assert(m->signature() == signature, "");
-  assert(m->is_method_handle_invoke(), "");
 #ifdef CC_INTERP
   ResultTypeFinder rtf(signature);
   m->set_result_index(rtf.type());
 #endif
   m->compute_size_of_parameters(THREAD);
-  m->set_exception_table(Universe::the_empty_int_array());
   m->init_intrinsic_id();
-  assert(m->intrinsic_id() == vmIntrinsics::_invokeExact ||
-         m->intrinsic_id() == vmIntrinsics::_invokeGeneric, "must be an invoker");
+  assert(m->is_method_handle_intrinsic(), "");
+#ifdef ASSERT
+  if (!MethodHandles::is_signature_polymorphic(m->intrinsic_id()))  m->print();
+  assert(MethodHandles::is_signature_polymorphic(m->intrinsic_id()), "must be an invoker");
+  assert(m->intrinsic_id() == iid, "correctly predicted iid");
+#endif //ASSERT
 
   // Finally, set up its entry points.
-  assert(m->method_handle_type() == method_type(), "");
   assert(m->can_be_statically_bound(), "");
   m->set_vtable_index(methodOopDesc::nonvirtual_vtable_index);
   m->link_method(m, CHECK_(empty));
 
-#ifdef ASSERT
-  // Make sure the pointer chase works.
-  address p = (address) m();
-  for (jint* pchase = method_type_offsets_chain(); (*pchase) != -1; pchase++) {
-    p = *(address*)(p + (*pchase));
-  }
-  assert((oop)p == method_type(), "pointer chase is correct");
-#endif
-
   if (TraceMethodHandles && (Verbose || WizardMode))
     m->print_on(tty);
 
@@ -1028,7 +964,7 @@
 }
 
 
-methodHandle methodOopDesc:: clone_with_new_data(methodHandle m, u_char* new_code, int new_code_length,
+methodHandle methodOopDesc::clone_with_new_data(methodHandle m, u_char* new_code, int new_code_length,
                                                 u_char* new_compressed_linenumber_table, int new_compressed_linenumber_size, TRAPS) {
   // Code below does not work for native methods - they should never get rewritten anyway
   assert(!m->is_native(), "cannot rewrite native methods");
@@ -1036,6 +972,7 @@
   AccessFlags flags = m->access_flags();
   int checked_exceptions_len = m->checked_exceptions_length();
   int localvariable_len = m->localvariable_table_length();
+  int exception_table_len = m->exception_table_length();
   // Allocate newm_oop with the is_conc_safe parameter set
   // to IsUnsafeConc to indicate that newm_oop is not yet
   // safe for concurrent processing by a GC.
@@ -1043,6 +980,7 @@
                                               flags,
                                               new_compressed_linenumber_size,
                                               localvariable_len,
+                                              exception_table_len,
                                               checked_exceptions_len,
                                               IsUnsafeConc,
                                               CHECK_(methodHandle()));
@@ -1077,14 +1015,13 @@
   assert(m->constMethod()->is_parsable(), "Should remain parsable");
 
   // Reset correct method/const method, method size, and parameter info
-  newcm->set_method(newm());
   newm->set_constMethod(newcm);
-  assert(newcm->method() == newm(), "check");
   newm->constMethod()->set_code_size(new_code_length);
   newm->constMethod()->set_constMethod_size(new_const_method_size);
   newm->set_method_size(new_method_size);
   assert(newm->code_size() == new_code_length, "check");
   assert(newm->checked_exceptions_length() == checked_exceptions_len, "check");
+  assert(newm->exception_table_length() == exception_table_len, "check");
   assert(newm->localvariable_table_length() == localvariable_len, "check");
   // Copy new byte codes
   memcpy(newm->code_base(), new_code, new_code_length);
@@ -1100,6 +1037,12 @@
            m->checked_exceptions_start(),
            checked_exceptions_len * sizeof(CheckedExceptionElement));
   }
+  // Copy exception table
+  if (exception_table_len > 0) {
+    memcpy(newm->exception_table_start(),
+           m->exception_table_start(),
+           exception_table_len * sizeof(ExceptionTableElement));
+  }
   // Copy local variable number table
   if (localvariable_len > 0) {
     memcpy(newm->localvariable_table_start(),
@@ -1138,7 +1081,9 @@
 
   // ditto for method and signature:
   vmSymbols::SID  name_id = vmSymbols::find_sid(name());
-  if (name_id == vmSymbols::NO_SID)  return;
+  if (klass_id != vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_MethodHandle)
+      && name_id == vmSymbols::NO_SID)
+    return;
   vmSymbols::SID   sig_id = vmSymbols::find_sid(signature());
   if (klass_id != vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_MethodHandle)
       && sig_id == vmSymbols::NO_SID)  return;
@@ -1167,21 +1112,10 @@
 
   // Signature-polymorphic methods: MethodHandle.invoke*, InvokeDynamic.*.
   case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_MethodHandle):
-    if (is_static() || !is_native())  break;
-    switch (name_id) {
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(invokeGeneric_name):
-      if (!AllowInvokeGeneric)  break;
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(invoke_name):
-      id = vmIntrinsics::_invokeGeneric;
-      break;
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(invokeExact_name):
-      id = vmIntrinsics::_invokeExact;
-      break;
-    }
-    break;
-  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_InvokeDynamic):
-    if (!is_static() || !is_native())  break;
-    id = vmIntrinsics::_invokeDynamic;
+    if (!is_native())  break;
+    id = MethodHandles::signature_polymorphic_name_id(method_holder(), name());
+    if (is_static() != MethodHandles::is_signature_polymorphic_static(id))
+      id = vmIntrinsics::_none;
     break;
   }
 
@@ -1194,6 +1128,12 @@
 
 // These two methods are static since a GC may move the methodOopDesc
 bool methodOopDesc::load_signature_classes(methodHandle m, TRAPS) {
+  if (THREAD->is_Compiler_thread()) {
+    // There is nothing useful this routine can do from within the Compile thread.
+    // Hopefully, the signature contains only well-known classes.
+    // We could scan for this and return true/false, but the caller won't care.
+    return false;
+  }
   bool sig_is_loaded = true;
   Handle class_loader(THREAD, instanceKlass::cast(m->method_holder())->class_loader());
   Handle protection_domain(THREAD, Klass::cast(m->method_holder())->protection_domain());
@@ -1247,6 +1187,8 @@
 #endif
   name()->print_symbol_on(st);
   if (WizardMode) signature()->print_symbol_on(st);
+  else if (MethodHandles::is_signature_polymorphic(intrinsic_id()))
+    MethodHandles::print_as_basic_type_signature_on(st, signature(), true);
 }
 
 // This is only done during class loading, so it is OK to assume method_idnum matches the methods() array
--- a/src/share/vm/oops/methodOop.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/methodOop.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -64,7 +64,6 @@
 // | klass                                                |
 // |------------------------------------------------------|
 // | constMethodOop                 (oop)                 |
-// | constants                      (oop)                 |
 // |------------------------------------------------------|
 // | methodData                     (oop)                 |
 // | interp_invocation_count                              |
@@ -110,7 +109,6 @@
  friend class VMStructs;
  private:
   constMethodOop    _constMethod;                // Method read-only data.
-  constantPoolOop   _constants;                  // Constant pool
   methodDataOop     _method_data;
   int               _interpreter_invocation_count; // Count of times invoked (reused as prev_event_count in tiered)
   AccessFlags       _access_flags;               // Access flags
@@ -124,8 +122,11 @@
   u2                _max_locals;                 // Number of local variables used by this method
   u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
   u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
-  u1                _jfr_towrite : 1,            // Flags
-                                 : 7;
+  u1                _jfr_towrite  : 1,           // Flags
+                    _force_inline : 1,
+                    _hidden       : 1,
+                    _dont_inline  : 1,
+                                  : 4;
   u2                _interpreter_throwout_count; // Count of times method was exited via exception while interpreting
   u2                _number_of_breakpoints;      // fullspeed debugging support
   InvocationCounter _invocation_counter;         // Incremented before each activation of the method - used to trigger frequency-based optimizations
@@ -170,17 +171,17 @@
   void set_access_flags(AccessFlags flags)       { _access_flags = flags; }
 
   // name
-  Symbol* name() const                           { return _constants->symbol_at(name_index()); }
+  Symbol* name() const                           { return constants()->symbol_at(name_index()); }
   int name_index() const                         { return constMethod()->name_index();         }
   void set_name_index(int index)                 { constMethod()->set_name_index(index);       }
 
   // signature
-  Symbol* signature() const                      { return _constants->symbol_at(signature_index()); }
+  Symbol* signature() const                      { return constants()->symbol_at(signature_index()); }
   int signature_index() const                    { return constMethod()->signature_index();         }
   void set_signature_index(int index)            { constMethod()->set_signature_index(index);       }
 
   // generics support
-  Symbol* generic_signature() const              { int idx = generic_signature_index(); return ((idx != 0) ? _constants->symbol_at(idx) : (Symbol*)NULL); }
+  Symbol* generic_signature() const              { int idx = generic_signature_index(); return ((idx != 0) ? constants()->symbol_at(idx) : (Symbol*)NULL); }
   int generic_signature_index() const            { return constMethod()->generic_signature_index(); }
   void set_generic_signature_index(int index)    { constMethod()->set_generic_signature_index(index); }
 
@@ -242,12 +243,14 @@
   }
 
   // constant pool for klassOop holding this method
-  constantPoolOop constants() const              { return _constants; }
-  void set_constants(constantPoolOop c)          { oop_store_without_check((oop*)&_constants, c); }
+  constantPoolOop constants() const              { return constMethod()->constants(); }
+  void set_constants(constantPoolOop c)          { constMethod()->set_constants(c); }
 
   // max stack
-  int  max_stack() const                         { return _max_stack; }
-  void set_max_stack(int size)                   { _max_stack = size; }
+  // return original max stack size for method verification
+  int  verifier_max_stack() const                { return _max_stack; }
+  int           max_stack() const                { return _max_stack + extra_stack_entries(); }
+  void      set_max_stack(int size)              {        _max_stack = size; }
 
   // max locals
   int  max_locals() const                        { return _max_locals; }
@@ -284,12 +287,12 @@
   }
 
   // exception handler table
-  typeArrayOop exception_table() const
-                                   { return constMethod()->exception_table(); }
-  void set_exception_table(typeArrayOop e)
-                                     { constMethod()->set_exception_table(e); }
   bool has_exception_handler() const
                              { return constMethod()->has_exception_handler(); }
+  int exception_table_length() const
+                             { return constMethod()->exception_table_length(); }
+  ExceptionTableElement* exception_table_start() const
+                             { return constMethod()->exception_table_start(); }
 
   // Finds the first entry point bci of an exception handler for an
   // exception of klass ex_klass thrown at throw_bci. A value of NULL
@@ -453,7 +456,7 @@
                        { return constMethod()->compressed_linenumber_table(); }
 
   // method holder (the klassOop holding this method)
-  klassOop method_holder() const                 { return _constants->pool_holder(); }
+  klassOop method_holder() const                 { return constants()->pool_holder(); }
 
   void compute_size_of_parameters(Thread *thread); // word size of parameters (receiver if any + arguments)
   Symbol* klass_name() const;                    // returns the name of the method holder
@@ -544,7 +547,6 @@
 
   // interpreter support
   static ByteSize const_offset()                 { return byte_offset_of(methodOopDesc, _constMethod       ); }
-  static ByteSize constants_offset()             { return byte_offset_of(methodOopDesc, _constants         ); }
   static ByteSize access_flags_offset()          { return byte_offset_of(methodOopDesc, _access_flags      ); }
 #ifdef CC_INTERP
   static ByteSize result_index_offset()          { return byte_offset_of(methodOopDesc, _result_index ); }
@@ -592,28 +594,19 @@
   bool is_overridden_in(klassOop k) const;
 
   // JSR 292 support
-  bool is_method_handle_invoke() const              { return access_flags().is_method_handle_invoke(); }
-  static bool is_method_handle_invoke_name(vmSymbols::SID name_sid);
-  static bool is_method_handle_invoke_name(Symbol* name) {
-    return is_method_handle_invoke_name(vmSymbols::find_sid(name));
-  }
-  // Tests if this method is an internal adapter frame from the
-  // MethodHandleCompiler.
-  bool is_method_handle_adapter() const;
-  static methodHandle make_invoke_method(KlassHandle holder,
-                                         Symbol* name, //invokeExact or invokeGeneric
-                                         Symbol* signature, //anything at all
-                                         Handle method_type,
-                                         TRAPS);
+  bool is_method_handle_intrinsic() const;          // MethodHandles::is_signature_polymorphic_intrinsic(intrinsic_id)
+  bool is_compiled_lambda_form() const;             // intrinsic_id() == vmIntrinsics::_compiledLambdaForm
+  bool has_member_arg() const;                      // intrinsic_id() == vmIntrinsics::_linkToSpecial, etc.
+  static methodHandle make_method_handle_intrinsic(vmIntrinsics::ID iid, // _invokeBasic, _linkToVirtual
+                                                   Symbol* signature, //anything at all
+                                                   TRAPS);
   static klassOop check_non_bcp_klass(klassOop klass);
   // these operate only on invoke methods:
-  oop method_handle_type() const;
-  static jint* method_type_offsets_chain();  // series of pointer-offsets, terminated by -1
   // presize interpreter frames for extra interpreter stack entries, if needed
   // method handles want to be able to push a few extra values (e.g., a bound receiver), and
   // invokedynamic sometimes needs to push a bootstrap method, call site, and arglist,
   // all without checking for a stack overflow
-  static int extra_stack_entries() { return EnableInvokeDynamic ? (int) MethodHandlePushLimit + 3 : 0; }
+  static int extra_stack_entries() { return EnableInvokeDynamic ? 2 : 0; }
   static int extra_stack_words();  // = extra_stack_entries() * Interpreter::stackElementSize()
 
   // RedefineClasses() support:
@@ -658,6 +651,13 @@
   bool jfr_towrite()                 { return _jfr_towrite; }
   void set_jfr_towrite(bool towrite) { _jfr_towrite = towrite; }
 
+  bool     force_inline()       { return _force_inline;     }
+  void set_force_inline(bool x) {        _force_inline = x; }
+  bool     dont_inline()        { return _dont_inline;      }
+  void set_dont_inline(bool x)  {        _dont_inline = x;  }
+  bool  is_hidden()             { return _hidden;           }
+  void set_hidden(bool x)       {        _hidden = x;       }
+
   // On-stack replacement support
   bool has_osr_nmethod(int level, bool match_level) {
    return instanceKlass::cast(method_holder())->lookup_osr_nmethod(this, InvocationEntryBci, level, match_level) != NULL;
@@ -703,8 +703,8 @@
   static bool has_unloaded_classes_in_signature(methodHandle m, TRAPS);
 
   // Printing
-  void print_short_name(outputStream* st)        /*PRODUCT_RETURN*/; // prints as klassname::methodname; Exposed so field engineers can debug VM
-  void print_name(outputStream* st)              PRODUCT_RETURN; // prints as "virtual void foo(int)"
+  void print_short_name(outputStream* st = tty)  /*PRODUCT_RETURN*/; // prints as klassname::methodname; Exposed so field engineers can debug VM
+  void print_name(outputStream* st = tty)        PRODUCT_RETURN; // prints as "virtual void foo(int)"
 
   // Helper routine used for method sorting
   static void sort_methods(objArrayOop methods,
@@ -723,7 +723,6 @@
 
   // Garbage collection support
   oop*  adr_constMethod() const                  { return (oop*)&_constMethod;     }
-  oop*  adr_constants() const                    { return (oop*)&_constants;       }
   oop*  adr_method_data() const                  { return (oop*)&_method_data;     }
 };
 
@@ -805,7 +804,7 @@
 // breakpoints are written only at safepoints, and are read
 // concurrently only outside of safepoints.
 
-class BreakpointInfo : public CHeapObj {
+class BreakpointInfo : public CHeapObj<mtClass> {
   friend class VMStructs;
  private:
   Bytecodes::Code  _orig_bytecode;
@@ -839,4 +838,66 @@
   void clear(methodOop method);
 };
 
+// Utility class for access exception handlers
+class ExceptionTable : public StackObj {
+ private:
+  ExceptionTableElement* _table;
+  u2  _length;
+
+ public:
+  ExceptionTable(methodOop m) {
+    if (m->has_exception_handler()) {
+      _table = m->exception_table_start();
+      _length = m->exception_table_length();
+    } else {
+      _table = NULL;
+      _length = 0;
+    }
+  }
+
+  int length() const {
+    return _length;
+  }
+
+  u2 start_pc(int idx) const {
+    assert(idx < _length, "out of bounds");
+    return _table[idx].start_pc;
+  }
+
+  void set_start_pc(int idx, u2 value) {
+    assert(idx < _length, "out of bounds");
+    _table[idx].start_pc = value;
+  }
+
+  u2 end_pc(int idx) const {
+    assert(idx < _length, "out of bounds");
+    return _table[idx].end_pc;
+  }
+
+  void set_end_pc(int idx, u2 value) {
+    assert(idx < _length, "out of bounds");
+    _table[idx].end_pc = value;
+  }
+
+  u2 handler_pc(int idx) const {
+    assert(idx < _length, "out of bounds");
+    return _table[idx].handler_pc;
+  }
+
+  void set_handler_pc(int idx, u2 value) {
+    assert(idx < _length, "out of bounds");
+    _table[idx].handler_pc = value;
+  }
+
+  u2 catch_type_index(int idx) const {
+    assert(idx < _length, "out of bounds");
+    return _table[idx].catch_type_index;
+  }
+
+  void set_catch_type_index(int idx, u2 value) {
+    assert(idx < _length, "out of bounds");
+    _table[idx].catch_type_index = value;
+  }
+};
+
 #endif // SHARE_VM_OOPS_METHODOOP_HPP
--- a/src/share/vm/oops/objArrayKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/objArrayKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -545,10 +545,3 @@
     guarantee(oa->obj_at(index)->is_oop_or_null(), "should be oop");
   }
 }
-
-void objArrayKlass::oop_verify_old_oop(oop obj, oop* p, bool allow_dirty) {
-  /* $$$ move into remembered set verification?
-  RememberedSet::verify_old_oop(obj, p, allow_dirty, true);
-  */
-}
-void objArrayKlass::oop_verify_old_oop(oop obj, narrowOop* p, bool allow_dirty) {}
--- a/src/share/vm/oops/objArrayKlass.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/objArrayKlass.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -144,8 +144,6 @@
   // Verification
   const char* internal_name() const;
   void oop_verify_on(oop obj, outputStream* st);
-  void oop_verify_old_oop(oop obj, oop* p, bool allow_dirty);
-  void oop_verify_old_oop(oop obj, narrowOop* p, bool allow_dirty);
 };
 
 #endif // SHARE_VM_OOPS_OBJARRAYKLASS_HPP
--- a/src/share/vm/oops/objArrayKlassKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/objArrayKlassKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -137,7 +137,7 @@
       new_str[idx++] = ';';
     }
     new_str[idx++] = '\0';
-    name = SymbolTable::new_symbol(new_str, CHECK_0);
+    name = SymbolTable::new_permanent_symbol(new_str, CHECK_0);
     if (element_klass->oop_is_instance()) {
       instanceKlass* ik = instanceKlass::cast(element_klass());
       ik->set_array_name(name);
--- a/src/share/vm/oops/oop.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/oop.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -107,16 +107,6 @@
   verify_on(tty);
 }
 
-
-// XXX verify_old_oop doesn't do anything (should we remove?)
-void oopDesc::verify_old_oop(oop* p, bool allow_dirty) {
-  blueprint()->oop_verify_old_oop(this, p, allow_dirty);
-}
-
-void oopDesc::verify_old_oop(narrowOop* p, bool allow_dirty) {
-  blueprint()->oop_verify_old_oop(this, p, allow_dirty);
-}
-
 bool oopDesc::partially_loaded() {
   return blueprint()->oop_partially_loaded(this);
 }
--- a/src/share/vm/oops/oop.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/oop.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -293,8 +293,6 @@
   // verification operations
   void verify_on(outputStream* st);
   void verify();
-  void verify_old_oop(oop* p, bool allow_dirty);
-  void verify_old_oop(narrowOop* p, bool allow_dirty);
 
   // tells whether this oop is partially constructed (gc during class loading)
   bool partially_loaded();
--- a/src/share/vm/oops/symbol.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/symbol.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,15 +29,25 @@
 #include "runtime/os.hpp"
 #include "memory/allocation.inline.hpp"
 
-Symbol::Symbol(const u1* name, int length) : _refcount(0), _length(length) {
+Symbol::Symbol(const u1* name, int length, int refcount) : _refcount(refcount), _length(length) {
   _identity_hash = os::random();
   for (int i = 0; i < _length; i++) {
     byte_at_put(i, name[i]);
   }
 }
 
-void* Symbol::operator new(size_t size, int len) {
-  return (void *) AllocateHeap(object_size(len) * HeapWordSize, "symbol");
+void* Symbol::operator new(size_t sz, int len, TRAPS) {
+  int alloc_size = object_size(len)*HeapWordSize;
+  address res = (address) AllocateHeap(alloc_size, mtSymbol);
+  DEBUG_ONLY(set_allocation_type(res, ResourceObj::C_HEAP);)
+  return res;
+}
+
+void* Symbol::operator new(size_t sz, int len, Arena* arena, TRAPS) {
+  int alloc_size = object_size(len)*HeapWordSize;
+  address res = (address)arena->Amalloc(alloc_size);
+  DEBUG_ONLY(set_allocation_type(res, ResourceObj::ARENA);)
+  return res;
 }
 
 // ------------------------------------------------------------------
@@ -86,7 +96,7 @@
   address scan = bytes + i;
   if (scan > limit)
     return -1;
-  for (;;) {
+  for (; scan <= limit; scan++) {
     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
     if (scan == NULL)
       return -1;  // not found
@@ -94,6 +104,7 @@
     if (memcmp(scan, str, len) == 0)
       return (int)(scan - bytes);
   }
+  return -1;
 }
 
 
@@ -206,26 +217,5 @@
   }
 }
 
-void Symbol::increment_refcount() {
-  // Only increment the refcount if positive.  If negative either
-  // overflow has occurred or it is a permanent symbol in a read only
-  // shared archive.
-  if (_refcount >= 0) {
-    Atomic::inc(&_refcount);
-    NOT_PRODUCT(Atomic::inc(&_total_count);)
-  }
-}
-
-void Symbol::decrement_refcount() {
-  if (_refcount >= 0) {
-    Atomic::dec(&_refcount);
-#ifdef ASSERT
-    if (_refcount < 0) {
-      print();
-      assert(false, "reference count underflow for symbol");
-    }
-#endif
-  }
-}
-
+// SymbolTable prints this in its statistics
 NOT_PRODUCT(int Symbol::_total_count = 0;)
--- a/src/share/vm/oops/symbol.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/symbol.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 
 #include "utilities/utf8.hpp"
 #include "memory/allocation.hpp"
+#include "runtime/atomic.hpp"
 
 // A Symbol is a canonicalized string.
 // All Symbols reside in global SymbolTable and are reference counted.
@@ -95,7 +96,7 @@
 // TempNewSymbol (passed in as a parameter) so the reference count on its symbol
 // will be decremented when it goes out of scope.
 
-class Symbol : public CHeapObj {
+class Symbol : public ResourceObj {
   friend class VMStructs;
   friend class SymbolTable;
   friend class MoveSymbols;
@@ -111,7 +112,7 @@
   };
 
   static int object_size(int length) {
-    size_t size = heap_word_size(sizeof(Symbol) + length);
+    size_t size = heap_word_size(sizeof(Symbol) + (length > 0 ? length - 1 : 0));
     return align_object_size(size);
   }
 
@@ -120,28 +121,25 @@
     _body[index] = value;
   }
 
-  Symbol(const u1* name, int length);
-  void* operator new(size_t size, int len);
+  Symbol(const u1* name, int length, int refcount);
+  void* operator new(size_t size, int len, TRAPS);
+  void* operator new(size_t size, int len, Arena* arena, TRAPS);
 
  public:
   // Low-level access (used with care, since not GC-safe)
   const jbyte* base() const { return &_body[0]; }
 
-  int object_size() { return object_size(utf8_length()); }
+  int object_size()         { return object_size(utf8_length()); }
 
   // Returns the largest size symbol we can safely hold.
-  static int max_length() {
-    return max_symbol_length;
-  }
+  static int max_length()   { return max_symbol_length; }
 
-  int identity_hash() {
-    return _identity_hash;
-  }
+  int identity_hash()       { return _identity_hash; }
 
   // Reference counting.  See comments above this class for when to use.
-  int refcount() const { return _refcount; }
-  void increment_refcount();
-  void decrement_refcount();
+  int refcount() const      { return _refcount; }
+  inline void increment_refcount();
+  inline void decrement_refcount();
 
   int byte_at(int index) const {
     assert(index >=0 && index < _length, "symbol index overflow");
@@ -220,4 +218,26 @@
  return (((uintptr_t)this < (uintptr_t)other) ? -1
    : ((uintptr_t)this == (uintptr_t) other) ? 0 : 1);
 }
+
+inline void Symbol::increment_refcount() {
+  // Only increment the refcount if positive.  If negative either
+  // overflow has occurred or it is a permanent symbol in a read only
+  // shared archive.
+  if (_refcount >= 0) {
+    Atomic::inc(&_refcount);
+    NOT_PRODUCT(Atomic::inc(&_total_count);)
+  }
+}
+
+inline void Symbol::decrement_refcount() {
+  if (_refcount >= 0) {
+    Atomic::dec(&_refcount);
+#ifdef ASSERT
+    if (_refcount < 0) {
+      print();
+      assert(false, "reference count underflow for symbol");
+    }
+#endif
+  }
+}
 #endif // SHARE_VM_OOPS_SYMBOL_HPP
--- a/src/share/vm/oops/typeArrayKlass.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/oops/typeArrayKlass.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -55,7 +55,7 @@
 
   Symbol* sym = NULL;
   if (name_str != NULL) {
-    sym = SymbolTable::new_symbol(name_str, CHECK_NULL);
+    sym = SymbolTable::new_permanent_symbol(name_str, CHECK_NULL);
   }
   KlassHandle klassklass (THREAD, Universe::typeArrayKlassKlassObj());
 
--- a/src/share/vm/opto/bytecodeInfo.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -93,7 +93,7 @@
          );
 }
 
-// positive filter: should send be inlined?  returns NULL, if yes, or rejection msg
+// positive filter: should callee be inlined?  returns NULL, if yes, or rejection msg
 const char* InlineTree::should_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const {
   // Allows targeted inlining
   if(callee_method->should_inline()) {
@@ -131,33 +131,6 @@
   int call_site_count  = method()->scale_count(profile.count());
   int invoke_count     = method()->interpreter_invocation_count();
 
-  // Bytecoded method handle adapters do not have interpreter
-  // profiling data but only made up MDO data.  Get the counter from
-  // there.
-  if (caller_method->is_method_handle_adapter()) {
-    assert(method()->method_data_or_null(), "must have an MDO");
-    ciMethodData* mdo = method()->method_data();
-    ciProfileData* mha_profile = mdo->bci_to_data(caller_bci);
-    assert(mha_profile, "must exist");
-    CounterData* cd = mha_profile->as_CounterData();
-    invoke_count = cd->count();
-    if (invoke_count == 0) {
-      return "method handle not reached";
-    }
-
-    if (_caller_jvms != NULL && _caller_jvms->method() != NULL &&
-        _caller_jvms->method()->method_data() != NULL &&
-        !_caller_jvms->method()->method_data()->is_empty()) {
-      ciMethodData* mdo = _caller_jvms->method()->method_data();
-      ciProfileData* mha_profile = mdo->bci_to_data(_caller_jvms->bci());
-      assert(mha_profile, "must exist");
-      CounterData* cd = mha_profile->as_CounterData();
-      call_site_count = cd->count();
-    } else {
-      call_site_count = invoke_count;  // use the same value
-    }
-  }
-
   assert(invoke_count != 0, "require invocation count greater than zero");
   int freq = call_site_count / invoke_count;
 
@@ -189,15 +162,16 @@
 }
 
 
-// negative filter: should send NOT be inlined?  returns NULL, ok to inline, or rejection msg
+// negative filter: should callee NOT be inlined?  returns NULL, ok to inline, or rejection msg
 const char* InlineTree::should_not_inline(ciMethod *callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const {
   // negative filter: should send NOT be inlined?  returns NULL (--> inline) or rejection msg
   if (!UseOldInlining) {
     const char* fail = NULL;
-    if (callee_method->is_abstract())               fail = "abstract method";
+    if ( callee_method->is_abstract())               fail = "abstract method";
     // note: we allow ik->is_abstract()
-    if (!callee_method->holder()->is_initialized()) fail = "method holder not initialized";
-    if (callee_method->is_native())                 fail = "native method";
+    if (!callee_method->holder()->is_initialized())  fail = "method holder not initialized";
+    if ( callee_method->is_native())                 fail = "native method";
+    if ( callee_method->dont_inline())               fail = "don't inline by annotation";
 
     if (fail) {
       *wci_result = *(WarmCallInfo::always_cold());
@@ -217,7 +191,8 @@
       }
     }
 
-    if (callee_method->has_compiled_code() && callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) {
+    if (callee_method->has_compiled_code() &&
+        callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) {
       wci_result->set_profit(wci_result->profit() * 0.1);
       // %%% adjust wci_result->size()?
     }
@@ -225,26 +200,25 @@
     return NULL;
   }
 
-  // Always inline MethodHandle methods and generated MethodHandle adapters.
-  if (callee_method->is_method_handle_invoke() || callee_method->is_method_handle_adapter())
-    return NULL;
+  // First check all inlining restrictions which are required for correctness
+  if ( callee_method->is_abstract())                        return "abstract method";
+  // note: we allow ik->is_abstract()
+  if (!callee_method->holder()->is_initialized())           return "method holder not initialized";
+  if ( callee_method->is_native())                          return "native method";
+  if ( callee_method->dont_inline())                        return "don't inline by annotation";
+  if ( callee_method->has_unloaded_classes_in_signature())  return "unloaded signature classes";
 
-  // First check all inlining restrictions which are required for correctness
-  if (callee_method->is_abstract())               return "abstract method";
-  // note: we allow ik->is_abstract()
-  if (!callee_method->holder()->is_initialized()) return "method holder not initialized";
-  if (callee_method->is_native())                 return "native method";
-  if (callee_method->has_unloaded_classes_in_signature()) return "unloaded signature classes";
-
-  if (callee_method->should_inline()) {
+  if (callee_method->force_inline() || callee_method->should_inline()) {
     // ignore heuristic controls on inlining
     return NULL;
   }
 
   // Now perform checks which are heuristic
 
-  if( callee_method->has_compiled_code() && callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode )
+  if (callee_method->has_compiled_code() &&
+      callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) {
     return "already compiled into a big method";
+  }
 
   // don't inline exception code unless the top method belongs to an
   // exception class
@@ -257,8 +231,20 @@
       return "exception method";
   }
 
+  if (callee_method->should_not_inline()) {
+    return "disallowed by CompilerOracle";
+  }
+
+  if (UseStringCache) {
+    // Do not inline StringCache::profile() method used only at the beginning.
+    if (callee_method->name() == ciSymbol::profile_name() &&
+        callee_method->holder()->name() == ciSymbol::java_lang_StringCache()) {
+      return "profiling method";
+    }
+  }
+
   // use frequency-based objections only for non-trivial methods
-  if (callee_method->code_size_for_inlining() <= MaxTrivialSize) return NULL;
+  if (callee_method->code_size() <= MaxTrivialSize) return NULL;
 
   // don't use counts with -Xcomp or CTW
   if (UseInterpreter && !CompileTheWorld) {
@@ -278,18 +264,6 @@
     }
   }
 
-  if (callee_method->should_not_inline()) {
-    return "disallowed by CompilerOracle";
-  }
-
-  if (UseStringCache) {
-    // Do not inline StringCache::profile() method used only at the beginning.
-    if (callee_method->name() == ciSymbol::profile_name() &&
-        callee_method->holder()->name() == ciSymbol::java_lang_StringCache()) {
-      return "profiling method";
-    }
-  }
-
   return NULL;
 }
 
@@ -319,7 +293,7 @@
   }
 
   // suppress a few checks for accessors and trivial methods
-  if (callee_method->code_size_for_inlining() > MaxTrivialSize) {
+  if (callee_method->code_size() > MaxTrivialSize) {
 
     // don't inline into giant methods
     if (C->unique() > (uint)NodeCountInliningCutoff) {
@@ -346,7 +320,7 @@
   }
 
   // detect direct and indirect recursive inlining
-  {
+  if (!callee_method->is_compiled_lambda_form()) {
     // count the current method and the callee
     int inline_level = (method() == callee_method) ? 1 : 0;
     if (inline_level > MaxRecursiveInlineLevel)
@@ -412,6 +386,7 @@
 const char* InlineTree::check_can_parse(ciMethod* callee) {
   // Certain methods cannot be parsed at all:
   if ( callee->is_native())                     return "native method";
+  if ( callee->is_abstract())                   return "abstract method";
   if (!callee->can_be_compiled())               return "not compilable (disabled)";
   if (!callee->has_balanced_monitors())         return "not compilable (unbalanced monitors)";
   if ( callee->get_flow_analysis()->failing())  return "not compilable (flow analysis failed)";
@@ -426,7 +401,7 @@
   if (Verbose && callee_method) {
     const InlineTree *top = this;
     while( top->caller_tree() != NULL ) { top = top->caller_tree(); }
-    tty->print("  bcs: %d+%d  invoked: %d", top->count_inline_bcs(), callee_method->code_size(), callee_method->interpreter_invocation_count());
+    //tty->print("  bcs: %d+%d  invoked: %d", top->count_inline_bcs(), callee_method->code_size(), callee_method->interpreter_invocation_count());
   }
 }
 
@@ -449,10 +424,7 @@
 
   // Do some initial checks.
   if (!pass_initial_checks(caller_method, caller_bci, callee_method)) {
-    if (PrintInlining) {
-      failure_msg = "failed_initial_checks";
-      print_inlining(callee_method, caller_bci, failure_msg);
-    }
+    if (PrintInlining)  print_inlining(callee_method, caller_bci, "failed initial checks");
     return NULL;
   }
 
@@ -539,9 +511,10 @@
   }
   int max_inline_level_adjust = 0;
   if (caller_jvms->method() != NULL) {
-    if (caller_jvms->method()->is_method_handle_adapter())
+    if (caller_jvms->method()->is_compiled_lambda_form())
       max_inline_level_adjust += 1;  // don't count actions in MH or indy adapter frames
-    else if (callee_method->is_method_handle_invoke()) {
+    else if (callee_method->is_method_handle_intrinsic() ||
+             callee_method->is_compiled_lambda_form()) {
       max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implem
     }
     if (max_inline_level_adjust != 0 && PrintInlining && (Verbose || WizardMode)) {
@@ -590,7 +563,7 @@
 // Given a jvms, which determines a call chain from the root method,
 // find the corresponding inline tree.
 // Note: This method will be removed or replaced as InlineTree goes away.
-InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee, bool create_if_not_found) {
+InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee) {
   InlineTree* iltp = root;
   uint depth = jvms && jvms->has_method() ? jvms->depth() : 0;
   for (uint d = 1; d <= depth; d++) {
@@ -599,12 +572,12 @@
     assert(jvmsp->method() == iltp->method(), "tree still in sync");
     ciMethod* d_callee = (d == depth) ? callee : jvms->of_depth(d+1)->method();
     InlineTree* sub = iltp->callee_at(jvmsp->bci(), d_callee);
-    if (!sub) {
-      if (create_if_not_found && d == depth) {
-        return iltp->build_inline_tree_for_callee(d_callee, jvmsp, jvmsp->bci());
+    if (sub == NULL) {
+      if (d == depth) {
+        sub = iltp->build_inline_tree_for_callee(d_callee, jvmsp, jvmsp->bci());
       }
-      assert(sub != NULL, "should be a sub-ilt here");
-      return NULL;
+      guarantee(sub != NULL, "should be a sub-ilt here");
+      return sub;
     }
     iltp = sub;
   }
--- a/src/share/vm/opto/c2_globals.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -81,6 +81,13 @@
   product(intx, MaxLoopPad, (OptoLoopAlignment-1),                          \
           "Align a loop if padding size in bytes is less or equal to this value") \
                                                                             \
+  product(intx, MaxVectorSize, 32,                                          \
+          "Max vector size in bytes, "                                      \
+          "actual size could be less depending on elements type")           \
+                                                                            \
+  product(bool, AlignVector, false,                                         \
+          "Perform vector store/load alignment in loop")                    \
+                                                                            \
   product(intx, NumberOfLoopInstrToAlign, 4,                                \
           "Number of first instructions in a loop to align")                \
                                                                             \
@@ -292,9 +299,12 @@
   develop(bool, SuperWordRTDepCheck, false,                                 \
           "Enable runtime dependency checks.")                              \
                                                                             \
-  product(bool, TraceSuperWord, false,                                      \
+  notproduct(bool, TraceSuperWord, false,                                   \
           "Trace superword transforms")                                     \
                                                                             \
+  notproduct(bool, TraceNewVectors, false,                                  \
+          "Trace creation of Vector nodes")                                 \
+                                                                            \
   product_pd(bool, OptoBundling,                                            \
           "Generate nops to fill i-cache lines")                            \
                                                                             \
@@ -343,27 +353,9 @@
   develop(bool, StressRecompilation, false,                                 \
           "Recompile each compiled method without subsuming loads or escape analysis.") \
                                                                             \
-  /* controls for tier 1 compilations */                                    \
-                                                                            \
-  develop(bool, Tier1CountInvocations, true,                                \
-          "Generate code, during tier 1, to update invocation counter")     \
-                                                                            \
-  product(intx, Tier1Inline, false,                                         \
-          "enable inlining during tier 1")                                  \
-                                                                            \
-  product(intx, Tier1MaxInlineSize, 8,                                      \
-          "maximum bytecode size of a method to be inlined, during tier 1") \
-                                                                            \
-  product(intx, Tier1FreqInlineSize, 35,                                    \
-          "max bytecode size of a frequent method to be inlined, tier 1")   \
-                                                                            \
   develop(intx, ImplicitNullCheckThreshold, 3,                              \
           "Don't do implicit null checks if NPE's in a method exceeds limit") \
                                                                             \
- /* controls for loop optimization */                                       \
-  product(intx, Tier1LoopOptsCount, 0,                                      \
-          "Set level of loop optimization for tier 1 compiles")             \
-                                                                            \
   product(intx, LoopOptsCount, 43,                                          \
           "Set level of loop optimization for tier 1 compiles")             \
                                                                             \
@@ -465,6 +457,9 @@
   notproduct(bool, PrintOptimizePtrCompare, false,                          \
           "Print information about optimized pointers compare")             \
                                                                             \
+  notproduct(bool, VerifyConnectionGraph , true,                            \
+          "Verify Connection Graph construction in Escape Analysis")        \
+                                                                            \
   product(bool, UseOptoBiasInlining, true,                                  \
           "Generate biased locking code in C2 ideal graph")                 \
                                                                             \
@@ -492,6 +487,116 @@
                                                                             \
   product(bool, BlockLayoutRotateLoops, true,                               \
           "Allow back branches to be fall throughs in the block layour")    \
+                                                                            \
+  develop(bool, InlineReflectionGetCallerClass, true,                       \
+          "inline sun.reflect.Reflection.getCallerClass(), known to be part "\
+          "of base library DLL")                                            \
+                                                                            \
+  develop(bool, InlineObjectCopy, true,                                     \
+          "inline Object.clone and Arrays.copyOf[Range] intrinsics")        \
+                                                                            \
+  develop(bool, SpecialStringCompareTo, true,                               \
+          "special version of string compareTo")                            \
+                                                                            \
+  develop(bool, SpecialStringIndexOf, true,                                 \
+          "special version of string indexOf")                              \
+                                                                            \
+  develop(bool, SpecialStringEquals, true,                                  \
+          "special version of string equals")                               \
+                                                                            \
+  develop(bool, SpecialArraysEquals, true,                                  \
+          "special version of Arrays.equals(char[],char[])")                \
+                                                                            \
+  develop(bool, BailoutToInterpreterForThrows, false,                       \
+          "Compiled methods which throws/catches exceptions will be "       \
+          "deopt and intp.")                                                \
+                                                                            \
+  develop(bool, ConvertCmpD2CmpF, true,                                     \
+          "Convert cmpD to cmpF when one input is constant in float range") \
+                                                                            \
+  develop(bool, ConvertFloat2IntClipping, true,                             \
+          "Convert float2int clipping idiom to integer clipping")           \
+                                                                            \
+  develop(bool, Use24BitFPMode, true,                                       \
+          "Set 24-bit FPU mode on a per-compile basis ")                    \
+                                                                            \
+  develop(bool, Use24BitFP, true,                                           \
+          "use FP instructions that produce 24-bit precise results")        \
+                                                                            \
+  develop(bool, MonomorphicArrayCheck, true,                                \
+          "Uncommon-trap array store checks that require full type check")  \
+                                                                            \
+  notproduct(bool, TracePhaseCCP, false,                                    \
+          "Print progress during Conditional Constant Propagation")         \
+                                                                            \
+  develop(bool, PrintDominators, false,                                     \
+          "Print out dominator trees for GVN")                              \
+                                                                            \
+  notproduct(bool, TraceSpilling, false,                                    \
+          "Trace spilling")                                                 \
+                                                                            \
+  notproduct(bool, TraceTypeProfile, false,                                 \
+          "Trace type profile")                                             \
+                                                                            \
+  develop(bool, PoisonOSREntry, true,                                       \
+           "Detect abnormal calls to OSR code")                             \
+                                                                            \
+  product(bool, UseCondCardMark, false,                                     \
+          "Check for already marked card before updating card table")       \
+                                                                            \
+  develop(bool, SoftMatchFailure, trueInProduct,                            \
+          "If the DFA fails to match a node, print a message and bail out") \
+                                                                            \
+  develop(bool, InlineAccessors, true,                                      \
+          "inline accessor methods (get/set)")                              \
+                                                                            \
+  product(intx, TypeProfileMajorReceiverPercent, 90,                        \
+          "% of major receiver type to all profiled receivers")             \
+                                                                            \
+  notproduct(bool, TimeCompiler2, false,                                    \
+          "detailed time the compiler (requires +TimeCompiler)")            \
+                                                                            \
+  diagnostic(bool, PrintIntrinsics, false,                                  \
+          "prints attempted and successful inlining of intrinsics")         \
+                                                                            \
+  diagnostic(ccstrlist, DisableIntrinsic, "",                               \
+          "do not expand intrinsics whose (internal) names appear here")    \
+                                                                            \
+  develop(bool, StressReflectiveCode, false,                                \
+          "Use inexact types at allocations, etc., to test reflection")     \
+                                                                            \
+  diagnostic(bool, DebugInlinedCalls, true,                                 \
+         "If false, restricts profiled locations to the root method only")  \
+                                                                            \
+  notproduct(bool, VerifyLoopOptimizations, false,                          \
+          "verify major loop optimizations")                                \
+                                                                            \
+  diagnostic(bool, ProfileDynamicTypes, true,                               \
+          "do extra type profiling and use it more aggressively")           \
+                                                                            \
+  develop(bool, TraceIterativeGVN, false,                                   \
+          "Print progress during Iterative Global Value Numbering")         \
+                                                                            \
+  develop(bool, VerifyIterativeGVN, false,                                  \
+          "Verify Def-Use modifications during sparse Iterative Global "    \
+          "Value Numbering")                                                \
+                                                                            \
+  notproduct(bool, TraceCISCSpill, false,                                   \
+          "Trace allocators use of cisc spillable instructions")            \
+                                                                            \
+  product(bool, SplitIfBlocks, true,                                        \
+          "Clone compares and control flow through merge points to fold "   \
+          "some branches")                                                  \
+                                                                            \
+  develop(intx, FreqCountInvocations,  1,                                   \
+          "Scaling factor for branch frequencies (deprecated)")             \
+                                                                            \
+  product(intx, AliasLevel,     3,                                          \
+          "0 for no aliasing, 1 for oop/field/static/array split, "         \
+          "2 for class split, 3 for unique instances")                      \
+                                                                            \
+  develop(bool, VerifyAliases, false,                                       \
+          "perform extra checks on the results of alias analysis")          \
 
 C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
 
--- a/src/share/vm/opto/callGenerator.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/callGenerator.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #include "ci/bcEscapeAnalyzer.hpp"
 #include "ci/ciCallSite.hpp"
 #include "ci/ciCPCache.hpp"
+#include "ci/ciMemberName.hpp"
 #include "ci/ciMethodHandle.hpp"
 #include "classfile/javaClasses.hpp"
 #include "compiler/compileLog.hpp"
@@ -39,9 +40,6 @@
 #include "opto/runtime.hpp"
 #include "opto/subnode.hpp"
 
-CallGenerator::CallGenerator(ciMethod* method) {
-  _method = method;
-}
 
 // Utility function.
 const TypeFunc* CallGenerator::tf() const {
@@ -137,6 +135,7 @@
   }
 
   CallStaticJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), target, method(), kit.bci());
+  _call_node = call;  // Save the call node in case we need it later
   if (!is_static) {
     // Make an explicit receiver null_check as part of this call.
     // Since we share a map with the caller, his JVMS gets adjusted.
@@ -147,7 +146,8 @@
     }
     // Mark the call node as virtual, sort of:
     call->set_optimized_virtual(true);
-    if (method()->is_method_handle_invoke()) {
+    if (method()->is_method_handle_intrinsic() ||
+        method()->is_compiled_lambda_form()) {
       call->set_method_handle_invoke(true);
     }
   }
@@ -155,75 +155,6 @@
   kit.set_edges_for_java_call(call, false, _separate_io_proj);
   Node* ret = kit.set_results_for_java_call(call, _separate_io_proj);
   kit.push_node(method()->return_type()->basic_type(), ret);
-  _call_node = call;  // Save the call node in case we need it later
-  return kit.transfer_exceptions_into_jvms();
-}
-
-//---------------------------DynamicCallGenerator-----------------------------
-// Internal class which handles all out-of-line invokedynamic calls.
-class DynamicCallGenerator : public CallGenerator {
-public:
-  DynamicCallGenerator(ciMethod* method)
-    : CallGenerator(method)
-  {
-  }
-  virtual JVMState* generate(JVMState* jvms);
-};
-
-JVMState* DynamicCallGenerator::generate(JVMState* jvms) {
-  GraphKit kit(jvms);
-  Compile* C = kit.C;
-  PhaseGVN& gvn = kit.gvn();
-
-  if (C->log() != NULL) {
-    C->log()->elem("dynamic_call bci='%d'", jvms->bci());
-  }
-
-  // Get the constant pool cache from the caller class.
-  ciMethod* caller_method = jvms->method();
-  ciBytecodeStream str(caller_method);
-  str.force_bci(jvms->bci());  // Set the stream to the invokedynamic bci.
-  assert(str.cur_bc() == Bytecodes::_invokedynamic, "wrong place to issue a dynamic call!");
-  ciCPCache* cpcache = str.get_cpcache();
-
-  // Get the offset of the CallSite from the constant pool cache
-  // pointer.
-  int index = str.get_method_index();
-  size_t call_site_offset = cpcache->get_f1_offset(index);
-
-  // Load the CallSite object from the constant pool cache.
-  const TypeOopPtr* cpcache_type   = TypeOopPtr::make_from_constant(cpcache);  // returns TypeAryPtr of type T_OBJECT
-  const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass());
-  Node* cpcache_adr   = kit.makecon(cpcache_type);
-  Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset);
-  // The oops in the constant pool cache are not compressed; load then as raw pointers.
-  Node* call_site     = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw);
-
-  // Load the target MethodHandle from the CallSite object.
-  const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass());
-  Node* target_mh_adr = kit.basic_plus_adr(call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
-  Node* target_mh     = kit.make_load(kit.control(), target_mh_adr, target_type, T_OBJECT);
-
-  address resolve_stub = SharedRuntime::get_resolve_opt_virtual_call_stub();
-
-  CallStaticJavaNode* call = new (C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci());
-  // invokedynamic is treated as an optimized invokevirtual.
-  call->set_optimized_virtual(true);
-  // Take extra care (in the presence of argument motion) not to trash the SP:
-  call->set_method_handle_invoke(true);
-
-  // Pass the target MethodHandle as first argument and shift the
-  // other arguments.
-  call->init_req(0 + TypeFunc::Parms, target_mh);
-  uint nargs = call->method()->arg_size();
-  for (uint i = 1; i < nargs; i++) {
-    Node* arg = kit.argument(i - 1);
-    call->init_req(i + TypeFunc::Parms, arg);
-  }
-
-  kit.set_edges_for_java_call(call);
-  Node* ret = kit.set_results_for_java_call(call);
-  kit.push_node(method()->return_type()->basic_type(), ret);
   return kit.transfer_exceptions_into_jvms();
 }
 
@@ -325,15 +256,10 @@
 
 CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) {
   assert(!m->is_static(), "for_virtual_call mismatch");
-  assert(!m->is_method_handle_invoke(), "should be a direct call");
+  assert(!m->is_method_handle_intrinsic(), "should be a direct call");
   return new VirtualCallGenerator(m, vtable_index);
 }
 
-CallGenerator* CallGenerator::for_dynamic_call(ciMethod* m) {
-  assert(m->is_method_handle_invoke() || m->is_method_handle_adapter(), "for_dynamic_call mismatch");
-  return new DynamicCallGenerator(m);
-}
-
 // Allow inlining decisions to be delayed
 class LateInlineCallGenerator : public DirectCallGenerator {
   CallGenerator* _inline_cg;
@@ -347,7 +273,7 @@
   // Convert the CallStaticJava into an inline
   virtual void do_late_inline();
 
-  JVMState* generate(JVMState* jvms) {
+  virtual JVMState* generate(JVMState* jvms) {
     // Record that this call site should be revisited once the main
     // parse is finished.
     Compile::current()->add_late_inline(this);
@@ -654,272 +580,96 @@
 }
 
 
-//------------------------PredictedDynamicCallGenerator-----------------------
-// Internal class which handles all out-of-line calls checking receiver type.
-class PredictedDynamicCallGenerator : public CallGenerator {
-  ciMethodHandle* _predicted_method_handle;
-  CallGenerator*  _if_missed;
-  CallGenerator*  _if_hit;
-  float           _hit_prob;
-
-public:
-  PredictedDynamicCallGenerator(ciMethodHandle* predicted_method_handle,
-                                CallGenerator* if_missed,
-                                CallGenerator* if_hit,
-                                float hit_prob)
-    : CallGenerator(if_missed->method()),
-      _predicted_method_handle(predicted_method_handle),
-      _if_missed(if_missed),
-      _if_hit(if_hit),
-      _hit_prob(hit_prob)
-  {}
-
-  virtual bool is_inline()   const { return _if_hit->is_inline(); }
-  virtual bool is_deferred() const { return _if_hit->is_deferred(); }
-
-  virtual JVMState* generate(JVMState* jvms);
-};
-
-
-CallGenerator* CallGenerator::for_predicted_dynamic_call(ciMethodHandle* predicted_method_handle,
-                                                         CallGenerator* if_missed,
-                                                         CallGenerator* if_hit,
-                                                         float hit_prob) {
-  return new PredictedDynamicCallGenerator(predicted_method_handle, if_missed, if_hit, hit_prob);
-}
-
-
-CallGenerator* CallGenerator::for_method_handle_call(Node* method_handle, JVMState* jvms,
-                                                     ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
-  assert(callee->is_method_handle_invoke() || callee->is_method_handle_adapter(), "for_method_handle_call mismatch");
-  CallGenerator* cg = CallGenerator::for_method_handle_inline(method_handle, jvms, caller, callee, profile);
+CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee) {
+  assert(callee->is_method_handle_intrinsic() ||
+         callee->is_compiled_lambda_form(), "for_method_handle_call mismatch");
+  CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee);
   if (cg != NULL)
     return cg;
   return CallGenerator::for_direct_call(callee);
 }
 
-CallGenerator* CallGenerator::for_method_handle_inline(Node* method_handle, JVMState* jvms,
-                                                       ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
-  if (method_handle->Opcode() == Op_ConP) {
-    const TypeOopPtr* oop_ptr = method_handle->bottom_type()->is_oopptr();
-    ciObject* const_oop = oop_ptr->const_oop();
-    ciMethodHandle* method_handle = const_oop->as_method_handle();
-
-    // Set the callee to have access to the class and signature in
-    // the MethodHandleCompiler.
-    method_handle->set_callee(callee);
-    method_handle->set_caller(caller);
-    method_handle->set_call_profile(profile);
-
-    // Get an adapter for the MethodHandle.
-    ciMethod* target_method = method_handle->get_method_handle_adapter();
-    if (target_method != NULL) {
-      CallGenerator* cg = Compile::current()->call_generator(target_method, -1, false, jvms, true, PROB_ALWAYS);
-      if (cg != NULL && cg->is_inline())
-        return cg;
-    }
-  } else if (method_handle->Opcode() == Op_Phi && method_handle->req() == 3 &&
-             method_handle->in(1)->Opcode() == Op_ConP && method_handle->in(2)->Opcode() == Op_ConP) {
-    float prob = PROB_FAIR;
-    Node* meth_region = method_handle->in(0);
-    if (meth_region->is_Region() &&
-        meth_region->in(1)->is_Proj() && meth_region->in(2)->is_Proj() &&
-        meth_region->in(1)->in(0) == meth_region->in(2)->in(0) &&
-        meth_region->in(1)->in(0)->is_If()) {
-      // If diamond, so grab the probability of the test to drive the inlining below
-      prob = meth_region->in(1)->in(0)->as_If()->_prob;
-      if (meth_region->in(1)->is_IfTrue()) {
-        prob = 1 - prob;
+CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee) {
+  GraphKit kit(jvms);
+  PhaseGVN& gvn = kit.gvn();
+  Compile* C = kit.C;
+  vmIntrinsics::ID iid = callee->intrinsic_id();
+  switch (iid) {
+  case vmIntrinsics::_invokeBasic:
+    {
+      // get MethodHandle receiver
+      Node* receiver = kit.argument(0);
+      if (receiver->Opcode() == Op_ConP) {
+        const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr();
+        ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget();
+        guarantee(!target->is_method_handle_intrinsic(), "should not happen");  // XXX remove
+        const int vtable_index = methodOopDesc::invalid_vtable_index;
+        CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS);
+        if (cg != NULL && cg->is_inline())
+          return cg;
+      } else {
+        if (PrintInlining)  CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
       }
     }
+    break;
 
-    // selectAlternative idiom merging two constant MethodHandles.
-    // Generate a guard so that each can be inlined.  We might want to
-    // do more inputs at later point but this gets the most common
-    // case.
-    CallGenerator* cg1 = for_method_handle_call(method_handle->in(1), jvms, caller, callee, profile.rescale(1.0 - prob));
-    CallGenerator* cg2 = for_method_handle_call(method_handle->in(2), jvms, caller, callee, profile.rescale(prob));
-    if (cg1 != NULL && cg2 != NULL) {
-      const TypeOopPtr* oop_ptr = method_handle->in(1)->bottom_type()->is_oopptr();
-      ciObject* const_oop = oop_ptr->const_oop();
-      ciMethodHandle* mh = const_oop->as_method_handle();
-      return new PredictedDynamicCallGenerator(mh, cg2, cg1, prob);
-    }
-  }
-  return NULL;
-}
+  case vmIntrinsics::_linkToVirtual:
+  case vmIntrinsics::_linkToStatic:
+  case vmIntrinsics::_linkToSpecial:
+  case vmIntrinsics::_linkToInterface:
+    {
+      // pop MemberName argument
+      Node* member_name = kit.argument(callee->arg_size() - 1);
+      if (member_name->Opcode() == Op_ConP) {
+        const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr();
+        ciMethod* target = oop_ptr->const_oop()->as_member_name()->get_vmtarget();
 
-CallGenerator* CallGenerator::for_invokedynamic_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
-  assert(callee->is_method_handle_invoke() || callee->is_method_handle_adapter(), "for_invokedynamic_call mismatch");
-  // Get the CallSite object.
-  ciBytecodeStream str(caller);
-  str.force_bci(jvms->bci());  // Set the stream to the invokedynamic bci.
-  ciCallSite* call_site = str.get_call_site();
-  CallGenerator* cg = CallGenerator::for_invokedynamic_inline(call_site, jvms, caller, callee, profile);
-  if (cg != NULL)
-    return cg;
-  return CallGenerator::for_dynamic_call(callee);
-}
-
-CallGenerator* CallGenerator::for_invokedynamic_inline(ciCallSite* call_site, JVMState* jvms,
-                                                       ciMethod* caller, ciMethod* callee, ciCallProfile profile) {
-  ciMethodHandle* method_handle = call_site->get_target();
+        // In lamda forms we erase signature types to avoid resolving issues
+        // involving class loaders.  When we optimize a method handle invoke
+        // to a direct call we must cast the receiver and arguments to its
+        // actual types.
+        ciSignature* signature = target->signature();
+        const int receiver_skip = target->is_static() ? 0 : 1;
+        // Cast receiver to its type.
+        if (!target->is_static()) {
+          Node* arg = kit.argument(0);
+          const TypeOopPtr* arg_type = arg->bottom_type()->isa_oopptr();
+          const Type*       sig_type = TypeOopPtr::make_from_klass(signature->accessing_klass());
+          if (arg_type != NULL && !arg_type->higher_equal(sig_type)) {
+            Node* cast_obj = gvn.transform(new (C, 2) CheckCastPPNode(kit.control(), arg, sig_type));
+            kit.set_argument(0, cast_obj);
+          }
+        }
+        // Cast reference arguments to its type.
+        for (int i = 0; i < signature->count(); i++) {
+          ciType* t = signature->type_at(i);
+          if (t->is_klass()) {
+            Node* arg = kit.argument(receiver_skip + i);
+            const TypeOopPtr* arg_type = arg->bottom_type()->isa_oopptr();
+            const Type*       sig_type = TypeOopPtr::make_from_klass(t->as_klass());
+            if (arg_type != NULL && !arg_type->higher_equal(sig_type)) {
+              Node* cast_obj = gvn.transform(new (C, 2) CheckCastPPNode(kit.control(), arg, sig_type));
+              kit.set_argument(receiver_skip + i, cast_obj);
+            }
+          }
+        }
+        const int vtable_index = methodOopDesc::invalid_vtable_index;
+        const bool call_is_virtual = target->is_abstract();  // FIXME workaround
+        CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS);
+        if (cg != NULL && cg->is_inline())
+          return cg;
+      }
+    }
+    break;
 
-  // Set the callee to have access to the class and signature in the
-  // MethodHandleCompiler.
-  method_handle->set_callee(callee);
-  method_handle->set_caller(caller);
-  method_handle->set_call_profile(profile);
-
-  // Get an adapter for the MethodHandle.
-  ciMethod* target_method = method_handle->get_invokedynamic_adapter();
-  if (target_method != NULL) {
-    Compile *C = Compile::current();
-    CallGenerator* cg = C->call_generator(target_method, -1, false, jvms, true, PROB_ALWAYS);
-    if (cg != NULL && cg->is_inline()) {
-      // Add a dependence for invalidation of the optimization.
-      if (!call_site->is_constant_call_site()) {
-        C->dependencies()->assert_call_site_target_value(call_site, method_handle);
-      }
-      return cg;
-    }
+  default:
+    fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+    break;
   }
   return NULL;
 }
 
 
-JVMState* PredictedDynamicCallGenerator::generate(JVMState* jvms) {
-  GraphKit kit(jvms);
-  Compile* C = kit.C;
-  PhaseGVN& gvn = kit.gvn();
-
-  CompileLog* log = C->log();
-  if (log != NULL) {
-    log->elem("predicted_dynamic_call bci='%d'", jvms->bci());
-  }
-
-  const TypeOopPtr* predicted_mh_ptr = TypeOopPtr::make_from_constant(_predicted_method_handle, true);
-  Node* predicted_mh = kit.makecon(predicted_mh_ptr);
-
-  Node* bol = NULL;
-  int bc = jvms->method()->java_code_at_bci(jvms->bci());
-  if (bc != Bytecodes::_invokedynamic) {
-    // This is the selectAlternative idiom for guardWithTest or
-    // similar idioms.
-    Node* receiver = kit.argument(0);
-
-    // Check if the MethodHandle is the expected one
-    Node* cmp = gvn.transform(new (C, 3) CmpPNode(receiver, predicted_mh));
-    bol = gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq) );
-  } else {
-    // Get the constant pool cache from the caller class.
-    ciMethod* caller_method = jvms->method();
-    ciBytecodeStream str(caller_method);
-    str.force_bci(jvms->bci());  // Set the stream to the invokedynamic bci.
-    ciCPCache* cpcache = str.get_cpcache();
-
-    // Get the offset of the CallSite from the constant pool cache
-    // pointer.
-    int index = str.get_method_index();
-    size_t call_site_offset = cpcache->get_f1_offset(index);
-
-    // Load the CallSite object from the constant pool cache.
-    const TypeOopPtr* cpcache_type   = TypeOopPtr::make_from_constant(cpcache);  // returns TypeAryPtr of type T_OBJECT
-    const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass());
-    Node* cpcache_adr   = kit.makecon(cpcache_type);
-    Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset);
-    // The oops in the constant pool cache are not compressed; load then as raw pointers.
-    Node* call_site     = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw);
-
-    // Load the target MethodHandle from the CallSite object.
-    const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass());
-    Node* target_adr = kit.basic_plus_adr(call_site, call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
-    Node* target_mh  = kit.make_load(kit.control(), target_adr, target_type, T_OBJECT);
-
-    // Check if the MethodHandle is still the same.
-    Node* cmp = gvn.transform(new (C, 3) CmpPNode(target_mh, predicted_mh));
-    bol = gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq) );
-  }
-  IfNode* iff = kit.create_and_xform_if(kit.control(), bol, _hit_prob, COUNT_UNKNOWN);
-  kit.set_control( gvn.transform(new (C, 1) IfTrueNode (iff)));
-  Node* slow_ctl = gvn.transform(new (C, 1) IfFalseNode(iff));
-
-  SafePointNode* slow_map = NULL;
-  JVMState* slow_jvms;
-  { PreserveJVMState pjvms(&kit);
-    kit.set_control(slow_ctl);
-    if (!kit.stopped()) {
-      slow_jvms = _if_missed->generate(kit.sync_jvms());
-      if (kit.failing())
-        return NULL;  // might happen because of NodeCountInliningCutoff
-      assert(slow_jvms != NULL, "must be");
-      kit.add_exception_states_from(slow_jvms);
-      kit.set_map(slow_jvms->map());
-      if (!kit.stopped())
-        slow_map = kit.stop();
-    }
-  }
-
-  if (kit.stopped()) {
-    // Instance exactly does not matches the desired type.
-    kit.set_jvms(slow_jvms);
-    return kit.transfer_exceptions_into_jvms();
-  }
-
-  // Make the hot call:
-  JVMState* new_jvms = _if_hit->generate(kit.sync_jvms());
-  if (new_jvms == NULL) {
-    // Inline failed, so make a direct call.
-    assert(_if_hit->is_inline(), "must have been a failed inline");
-    CallGenerator* cg = CallGenerator::for_direct_call(_if_hit->method());
-    new_jvms = cg->generate(kit.sync_jvms());
-  }
-  kit.add_exception_states_from(new_jvms);
-  kit.set_jvms(new_jvms);
-
-  // Need to merge slow and fast?
-  if (slow_map == NULL) {
-    // The fast path is the only path remaining.
-    return kit.transfer_exceptions_into_jvms();
-  }
-
-  if (kit.stopped()) {
-    // Inlined method threw an exception, so it's just the slow path after all.
-    kit.set_jvms(slow_jvms);
-    return kit.transfer_exceptions_into_jvms();
-  }
-
-  // Finish the diamond.
-  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
-  RegionNode* region = new (C, 3) RegionNode(3);
-  region->init_req(1, kit.control());
-  region->init_req(2, slow_map->control());
-  kit.set_control(gvn.transform(region));
-  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
-  iophi->set_req(2, slow_map->i_o());
-  kit.set_i_o(gvn.transform(iophi));
-  kit.merge_memory(slow_map->merged_memory(), region, 2);
-  uint tos = kit.jvms()->stkoff() + kit.sp();
-  uint limit = slow_map->req();
-  for (uint i = TypeFunc::Parms; i < limit; i++) {
-    // Skip unused stack slots; fast forward to monoff();
-    if (i == tos) {
-      i = kit.jvms()->monoff();
-      if( i >= limit ) break;
-    }
-    Node* m = kit.map()->in(i);
-    Node* n = slow_map->in(i);
-    if (m != n) {
-      const Type* t = gvn.type(m)->meet(gvn.type(n));
-      Node* phi = PhiNode::make(region, m, t);
-      phi->set_req(2, n);
-      kit.map()->set_req(i, gvn.transform(phi));
-    }
-  }
-  return kit.transfer_exceptions_into_jvms();
-}
-
-
 //-------------------------UncommonTrapCallGenerator-----------------------------
 // Internal class which handles all out-of-line calls checking receiver type.
 class UncommonTrapCallGenerator : public CallGenerator {
--- a/src/share/vm/opto/callGenerator.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/callGenerator.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_OPTO_CALLGENERATOR_HPP
 #define SHARE_VM_OPTO_CALLGENERATOR_HPP
 
+#include "compiler/compileBroker.hpp"
 #include "opto/callnode.hpp"
 #include "opto/compile.hpp"
 #include "opto/type.hpp"
@@ -44,7 +45,7 @@
   ciMethod*             _method;                // The method being called.
 
  protected:
-  CallGenerator(ciMethod* method);
+  CallGenerator(ciMethod* method) : _method(method) {}
 
  public:
   // Accessors
@@ -111,11 +112,8 @@
   static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
   static CallGenerator* for_dynamic_call(ciMethod* m);   // invokedynamic
 
-  static CallGenerator* for_method_handle_call(Node* method_handle, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
-  static CallGenerator* for_invokedynamic_call(                     JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
-
-  static CallGenerator* for_method_handle_inline(Node* method_handle,   JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
-  static CallGenerator* for_invokedynamic_inline(ciCallSite* call_site, JVMState* jvms, ciMethod* caller, ciMethod* callee, ciCallProfile profile);
+  static CallGenerator* for_method_handle_call(  JVMState* jvms, ciMethod* caller, ciMethod* callee);
+  static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee);
 
   // How to generate a replace a direct call with an inline version
   static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
@@ -145,13 +143,21 @@
   // Registry for intrinsics:
   static CallGenerator* for_intrinsic(ciMethod* m);
   static void register_intrinsic(ciMethod* m, CallGenerator* cg);
+
+  static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
+    if (PrintInlining)
+      CompileTask::print_inlining(callee, inline_level, bci, msg);
+  }
 };
 
-class InlineCallGenerator : public CallGenerator {
-  virtual bool      is_inline() const           { return true; }
 
+//------------------------InlineCallGenerator----------------------------------
+class InlineCallGenerator : public CallGenerator {
  protected:
-  InlineCallGenerator(ciMethod* method) : CallGenerator(method) { }
+  InlineCallGenerator(ciMethod* method) : CallGenerator(method) {}
+
+ public:
+  virtual bool      is_inline() const           { return true; }
 };
 
 
--- a/src/share/vm/opto/callnode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/callnode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -231,9 +231,9 @@
 }
 
 //=============================================================================
-JVMState::JVMState(ciMethod* method, JVMState* caller) {
+JVMState::JVMState(ciMethod* method, JVMState* caller) :
+  _method(method) {
   assert(method != NULL, "must be valid call site");
-  _method = method;
   _reexecute = Reexecute_Undefined;
   debug_only(_bci = -99);  // random garbage value
   debug_only(_map = (SafePointNode*)-1);
@@ -246,8 +246,8 @@
   _endoff = _monoff;
   _sp = 0;
 }
-JVMState::JVMState(int stack_size) {
-  _method = NULL;
+JVMState::JVMState(int stack_size) :
+  _method(NULL) {
   _bci = InvocationEntryBci;
   _reexecute = Reexecute_Undefined;
   debug_only(_map = (SafePointNode*)-1);
@@ -526,8 +526,8 @@
     }
     _map->dump(2);
   }
-  st->print("JVMS depth=%d loc=%d stk=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d reexecute=%s method=",
-             depth(), locoff(), stkoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci(), should_reexecute()?"true":"false");
+  st->print("JVMS depth=%d loc=%d stk=%d arg=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d reexecute=%s method=",
+             depth(), locoff(), stkoff(), argoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci(), should_reexecute()?"true":"false");
   if (_method == NULL) {
     st->print_cr("(none)");
   } else {
@@ -1538,10 +1538,7 @@
     // If we are locking an unescaped object, the lock/unlock is unnecessary
     //
     ConnectionGraph *cgr = phase->C->congraph();
-    PointsToNode::EscapeState es = PointsToNode::GlobalEscape;
-    if (cgr != NULL)
-      es = cgr->escape_state(obj_node());
-    if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
+    if (cgr != NULL && cgr->not_global_escape(obj_node())) {
       assert(!is_eliminated() || is_coarsened(), "sanity");
       // The lock could be marked eliminated by lock coarsening
       // code during first IGVN before EA. Replace coarsened flag
@@ -1680,10 +1677,7 @@
     // If we are unlocking an unescaped object, the lock/unlock is unnecessary.
     //
     ConnectionGraph *cgr = phase->C->congraph();
-    PointsToNode::EscapeState es = PointsToNode::GlobalEscape;
-    if (cgr != NULL)
-      es = cgr->escape_state(obj_node());
-    if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
+    if (cgr != NULL && cgr->not_global_escape(obj_node())) {
       assert(!is_eliminated() || is_coarsened(), "sanity");
       // The lock could be marked eliminated by lock coarsening
       // code during first IGVN before EA. Replace coarsened flag
--- a/src/share/vm/opto/callnode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/callnode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -197,7 +197,7 @@
 
 private:
   JVMState*         _caller;    // List pointer for forming scope chains
-  uint              _depth;     // One mroe than caller depth, or one.
+  uint              _depth;     // One more than caller depth, or one.
   uint              _locoff;    // Offset to locals in input edge mapping
   uint              _stkoff;    // Offset to stack in input edge mapping
   uint              _monoff;    // Offset to monitors in input edge mapping
@@ -223,6 +223,8 @@
   JVMState(int stack_size);  // root state; has a null method
 
   // Access functions for the JVM
+  // ... --|--- loc ---|--- stk ---|--- arg ---|--- mon ---|--- scl ---|
+  //       \ locoff    \ stkoff    \ argoff    \ monoff    \ scloff    \ endoff
   uint              locoff() const { return _locoff; }
   uint              stkoff() const { return _stkoff; }
   uint              argoff() const { return _stkoff + _sp; }
@@ -231,15 +233,16 @@
   uint              endoff() const { return _endoff; }
   uint              oopoff() const { return debug_end(); }
 
-  int            loc_size() const { return _stkoff - _locoff; }
-  int            stk_size() const { return _monoff - _stkoff; }
-  int            mon_size() const { return _scloff - _monoff; }
-  int            scl_size() const { return _endoff - _scloff; }
+  int            loc_size() const { return stkoff() - locoff(); }
+  int            stk_size() const { return monoff() - stkoff(); }
+  int            arg_size() const { return monoff() - argoff(); }
+  int            mon_size() const { return scloff() - monoff(); }
+  int            scl_size() const { return endoff() - scloff(); }
 
-  bool        is_loc(uint i) const { return i >= _locoff && i < _stkoff; }
-  bool        is_stk(uint i) const { return i >= _stkoff && i < _monoff; }
-  bool        is_mon(uint i) const { return i >= _monoff && i < _scloff; }
-  bool        is_scl(uint i) const { return i >= _scloff && i < _endoff; }
+  bool        is_loc(uint i) const { return locoff() <= i && i < stkoff(); }
+  bool        is_stk(uint i) const { return stkoff() <= i && i < monoff(); }
+  bool        is_mon(uint i) const { return monoff() <= i && i < scloff(); }
+  bool        is_scl(uint i) const { return scloff() <= i && i < endoff(); }
 
   uint                      sp() const { return _sp; }
   int                      bci() const { return _bci; }
@@ -546,6 +549,12 @@
   // or result projection is there are several CheckCastPP
   // or returns NULL if there is no one.
   Node *result_cast();
+  // Does this node returns pointer?
+  bool returns_pointer() const {
+    const TypeTuple *r = tf()->range();
+    return (r->cnt() > TypeFunc::Parms &&
+            r->field_at(TypeFunc::Parms)->isa_ptr());
+  }
 
   // Collect all the interesting edges from a call for use in
   // replacing the call by something else.  Used by macro expansion
--- a/src/share/vm/opto/chaitin.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/chaitin.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -75,6 +75,7 @@
   // Flags
   if( _is_oop ) tty->print("Oop ");
   if( _is_float ) tty->print("Float ");
+  if( _is_vector ) tty->print("Vector ");
   if( _was_spilled1 ) tty->print("Spilled ");
   if( _was_spilled2 ) tty->print("Spilled2 ");
   if( _direct_conflict ) tty->print("Direct_conflict ");
@@ -221,6 +222,7 @@
   _alternate = 0;
   _matcher._allocation_started = true;
 
+  ResourceArea split_arena;     // Arena for Split local resources
   ResourceArea live_arena;      // Arena for liveness & IFG info
   ResourceMark rm(&live_arena);
 
@@ -323,7 +325,7 @@
     // Bail out if unique gets too large (ie - unique > MaxNodeLimit)
     C->check_node_count(10*must_spill, "out of nodes before split");
     if (C->failing())  return;
-    _maxlrg = Split( _maxlrg );        // Split spilling LRG everywhere
+    _maxlrg = Split(_maxlrg, &split_arena);  // Split spilling LRG everywhere
     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
     // or we failed to split
     C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
@@ -389,7 +391,7 @@
     }
 
     if( !_maxlrg ) return;
-    _maxlrg = Split( _maxlrg );        // Split spilling LRG everywhere
+    _maxlrg = Split(_maxlrg, &split_arena);  // Split spilling LRG everywhere
     // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
     C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split");
     if (C->failing())  return;
@@ -479,26 +481,37 @@
 
   // Move important info out of the live_arena to longer lasting storage.
   alloc_node_regs(_names.Size());
-  for( uint i=0; i < _names.Size(); i++ ) {
-    if( _names[i] ) {           // Live range associated with Node?
-      LRG &lrg = lrgs( _names[i] );
-      if( lrg.num_regs() == 1 ) {
-        _node_regs[i].set1( lrg.reg() );
-      } else {                  // Must be a register-pair
-        if( !lrg._fat_proj ) {  // Must be aligned adjacent register pair
+  for (uint i=0; i < _names.Size(); i++) {
+    if (_names[i]) {           // Live range associated with Node?
+      LRG &lrg = lrgs(_names[i]);
+      if (!lrg.alive()) {
+        set_bad(i);
+      } else if (lrg.num_regs() == 1) {
+        set1(i, lrg.reg());
+      } else {                  // Must be a register-set
+        if (!lrg._fat_proj) {   // Must be aligned adjacent register set
           // Live ranges record the highest register in their mask.
           // We want the low register for the AD file writer's convenience.
-          _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) );
+          OptoReg::Name hi = lrg.reg(); // Get hi register
+          OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
+          // We have to use pair [lo,lo+1] even for wide vectors because
+          // the rest of code generation works only with pairs. It is safe
+          // since for registers encoding only 'lo' is used.
+          // Second reg from pair is used in ScheduleAndBundle on SPARC where
+          // vector max size is 8 which corresponds to registers pair.
+          // It is also used in BuildOopMaps but oop operations are not
+          // vectorized.
+          set2(i, lo);
         } else {                // Misaligned; extract 2 bits
           OptoReg::Name hi = lrg.reg(); // Get hi register
           lrg.Remove(hi);       // Yank from mask
           int lo = lrg.mask().find_first_elem(); // Find lo
-          _node_regs[i].set_pair( hi, lo );
+          set_pair(i, hi, lo);
         }
       }
       if( lrg._is_oop ) _node_oops.set(i);
     } else {
-      _node_regs[i].set_bad();
+      set_bad(i);
     }
   }
 
@@ -568,7 +581,7 @@
         // Check for float-vs-int live range (used in register-pressure
         // calculations)
         const Type *n_type = n->bottom_type();
-        if( n_type->is_floatingpoint() )
+        if (n_type->is_floatingpoint())
           lrg._is_float = 1;
 
         // Check for twice prior spilling.  Once prior spilling might have
@@ -599,18 +612,28 @@
         // Limit result register mask to acceptable registers
         const RegMask &rm = n->out_RegMask();
         lrg.AND( rm );
-        // Check for bound register masks
-        const RegMask &lrgmask = lrg.mask();
-        if( lrgmask.is_bound1() || lrgmask.is_bound2() )
-          lrg._is_bound = 1;
-
-        // Check for maximum frequency value
-        if( lrg._maxfreq < b->_freq )
-          lrg._maxfreq = b->_freq;
 
         int ireg = n->ideal_reg();
         assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
                 "oops must be in Op_RegP's" );
+
+        // Check for vector live range (only if vector register is used).
+        // On SPARC vector uses RegD which could be misaligned so it is not
+        // processes as vector in RA.
+        if (RegMask::is_vector(ireg))
+          lrg._is_vector = 1;
+        assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD,
+               "vector must be in vector registers");
+
+        // Check for bound register masks
+        const RegMask &lrgmask = lrg.mask();
+        if (lrgmask.is_bound(ireg))
+          lrg._is_bound = 1;
+
+        // Check for maximum frequency value
+        if (lrg._maxfreq < b->_freq)
+          lrg._maxfreq = b->_freq;
+
         // Check for oop-iness, or long/double
         // Check for multi-kill projection
         switch( ireg ) {
@@ -689,7 +712,7 @@
           // AND changes how we count interferences.  A mis-aligned
           // double can interfere with TWO aligned pairs, or effectively
           // FOUR registers!
-          if( rm.is_misaligned_Pair() ) {
+          if (rm.is_misaligned_pair()) {
             lrg._fat_proj = 1;
             lrg._is_bound = 1;
           }
@@ -706,6 +729,33 @@
           lrg.set_reg_pressure(1);
 #endif
           break;
+        case Op_VecS:
+          assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
+          assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
+          lrg.set_num_regs(RegMask::SlotsPerVecS);
+          lrg.set_reg_pressure(1);
+          break;
+        case Op_VecD:
+          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity");
+          assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecD);
+          lrg.set_reg_pressure(1);
+          break;
+        case Op_VecX:
+          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity");
+          assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecX);
+          lrg.set_reg_pressure(1);
+          break;
+        case Op_VecY:
+          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity");
+          assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecY);
+          lrg.set_reg_pressure(1);
+          break;
         default:
           ShouldNotReachHere();
         }
@@ -763,24 +813,38 @@
         } else {
           lrg.AND( rm );
         }
+
         // Check for bound register masks
         const RegMask &lrgmask = lrg.mask();
-        if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+        int kreg = n->in(k)->ideal_reg();
+        bool is_vect = RegMask::is_vector(kreg);
+        assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
+               is_vect || kreg == Op_RegD,
+               "vector must be in vector registers");
+        if (lrgmask.is_bound(kreg))
           lrg._is_bound = 1;
+
         // If this use of a double forces a mis-aligned double,
         // flag as '_fat_proj' - really flag as allowing misalignment
         // AND changes how we count interferences.  A mis-aligned
         // double can interfere with TWO aligned pairs, or effectively
         // FOUR registers!
-        if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) {
+#ifdef ASSERT
+        if (is_vect) {
+          assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
+          assert(!lrg._fat_proj, "sanity");
+          assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
+        }
+#endif
+        if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
           lrg._fat_proj = 1;
           lrg._is_bound = 1;
         }
         // if the LRG is an unaligned pair, we will have to spill
         // so clear the LRG's register mask if it is not already spilled
-        if ( !n->is_SpillCopy() &&
-               (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
-               lrgmask.is_misaligned_Pair()) {
+        if (!is_vect && !n->is_SpillCopy() &&
+            (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
+            lrgmask.is_misaligned_pair()) {
           lrg.Clear();
         }
 
@@ -793,12 +857,14 @@
   } // end for all blocks
 
   // Final per-liverange setup
-  for( uint i2=0; i2<_maxlrg; i2++ ) {
+  for (uint i2=0; i2<_maxlrg; i2++) {
     LRG &lrg = lrgs(i2);
-    if( lrg.num_regs() == 2 && !lrg._fat_proj )
-      lrg.ClearToPairs();
+    assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+    if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+      lrg.clear_to_sets();
+    }
     lrg.compute_set_mask_size();
-    if( lrg.not_free() ) {      // Handle case where we lose from the start
+    if (lrg.not_free()) {      // Handle case where we lose from the start
       lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
       lrg._direct_conflict = 1;
     }
@@ -1065,6 +1131,33 @@
 
 }
 
+//------------------------------is_legal_reg-----------------------------------
+// Is 'reg' register legal for 'lrg'?
+static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
+  if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
+      lrg.mask().Member(OptoReg::add(reg,-chunk))) {
+    // RA uses OptoReg which represent the highest element of a registers set.
+    // For example, vectorX (128bit) on x86 uses [XMM,XMMb,XMMc,XMMd] set
+    // in which XMMd is used by RA to represent such vectors. A double value
+    // uses [XMM,XMMb] pairs and XMMb is used by RA for it.
+    // The register mask uses largest bits set of overlapping register sets.
+    // On x86 with AVX it uses 8 bits for each XMM registers set.
+    //
+    // The 'lrg' already has cleared-to-set register mask (done in Select()
+    // before calling choose_color()). Passing mask.Member(reg) check above
+    // indicates that the size (num_regs) of 'reg' set is less or equal to
+    // 'lrg' set size.
+    // For set size 1 any register which is member of 'lrg' mask is legal.
+    if (lrg.num_regs()==1)
+      return true;
+    // For larger sets only an aligned register with the same set size is legal.
+    int mask = lrg.num_regs()-1;
+    if ((reg&mask) == mask)
+      return true;
+  }
+  return false;
+}
+
 //------------------------------bias_color-------------------------------------
 // Choose a color using the biasing heuristic
 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
@@ -1081,10 +1174,7 @@
     while ((datum = elements.next()) != 0) {
       OptoReg::Name reg = lrgs(datum).reg();
       // If this LRG's register is legal for us, choose it
-      if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
-          lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
-          (lrg.num_regs()==1 || // either size 1
-           (reg&1) == 1) )      // or aligned (adjacent reg is available since we already cleared-to-pairs)
+      if (is_legal_reg(lrg, reg, chunk))
         return reg;
     }
   }
@@ -1095,31 +1185,23 @@
     if( !(*(_ifg->_yanked))[copy_lrg] ) {
       OptoReg::Name reg = lrgs(copy_lrg).reg();
       //  And it is legal for you,
-      if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
-          lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
-          (lrg.num_regs()==1 || // either size 1
-           (reg&1) == 1) )      // or aligned (adjacent reg is available since we already cleared-to-pairs)
+      if (is_legal_reg(lrg, reg, chunk))
         return reg;
     } else if( chunk == 0 ) {
       // Choose a color which is legal for him
       RegMask tempmask = lrg.mask();
       tempmask.AND(lrgs(copy_lrg).mask());
-      OptoReg::Name reg;
-      if( lrg.num_regs() == 1 ) {
-        reg = tempmask.find_first_elem();
-      } else {
-        tempmask.ClearToPairs();
-        reg = tempmask.find_first_pair();
-      }
-      if( OptoReg::is_valid(reg) )
+      tempmask.clear_to_sets(lrg.num_regs());
+      OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+      if (OptoReg::is_valid(reg))
         return reg;
     }
   }
 
   // If no bias info exists, just go with the register selection ordering
-  if( lrg.num_regs() == 2 ) {
-    // Find an aligned pair
-    return OptoReg::add(lrg.mask().find_first_pair(),chunk);
+  if (lrg._is_vector || lrg.num_regs() == 2) {
+    // Find an aligned set
+    return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
   }
 
   // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
@@ -1149,6 +1231,7 @@
     // Use a heuristic to "bias" the color choice
     return bias_color(lrg, chunk);
 
+  assert(!lrg._is_vector, "should be not vector here" );
   assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
 
   // Fat-proj case or misaligned double argument.
@@ -1238,14 +1321,16 @@
     }
     //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
     // Aligned pairs need aligned masks
-    if( lrg->num_regs() == 2 && !lrg->_fat_proj )
-      lrg->ClearToPairs();
+    assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+    if (lrg->num_regs() > 1 && !lrg->_fat_proj) {
+      lrg->clear_to_sets();
+    }
 
     // Check if a color is available and if so pick the color
     OptoReg::Name reg = choose_color( *lrg, chunk );
 #ifdef SPARC
     debug_only(lrg->compute_set_mask_size());
-    assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
+    assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
 #endif
 
     //---------------
@@ -1277,17 +1362,16 @@
       // If the live range is not bound, then we actually had some choices
       // to make.  In this case, the mask has more bits in it than the colors
       // chosen.  Restrict the mask to just what was picked.
-      if( lrg->num_regs() == 1 ) { // Size 1 live range
+      int n_regs = lrg->num_regs();
+      assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+      if (n_regs == 1 || !lrg->_fat_proj) {
+        assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity");
         lrg->Clear();           // Clear the mask
         lrg->Insert(reg);       // Set regmask to match selected reg
-        lrg->set_mask_size(1);
-      } else if( !lrg->_fat_proj ) {
-        // For pairs, also insert the low bit of the pair
-        assert( lrg->num_regs() == 2, "unbound fatproj???" );
-        lrg->Clear();           // Clear the mask
-        lrg->Insert(reg);       // Set regmask to match selected reg
-        lrg->Insert(OptoReg::add(reg,-1));
-        lrg->set_mask_size(2);
+        // For vectors and pairs, also insert the low bit of the pair
+        for (int i = 1; i < n_regs; i++)
+          lrg->Insert(OptoReg::add(reg,-i));
+        lrg->set_mask_size(n_regs);
       } else {                  // Else fatproj
         // mask must be equal to fatproj bits, by definition
       }
@@ -1483,7 +1567,7 @@
 
   // Check for AddP-related opcodes
   if( !derived->is_Phi() ) {
-    assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg("but is: %s", derived->Name()));
+    assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg_res("but is: %s", derived->Name()));
     Node *base = derived->in(AddPNode::Base);
     derived_base_map[derived->_idx] = base;
     return base;
@@ -1860,12 +1944,20 @@
       sprintf(buf,"L%d",lidx);  // No register binding yet
     } else if( !lidx ) {        // Special, not allocated value
       strcpy(buf,"Special");
-    } else if( (lrgs(lidx).num_regs() == 1)
-                ? !lrgs(lidx).mask().is_bound1()
-                : !lrgs(lidx).mask().is_bound2() ) {
-      sprintf(buf,"L%d",lidx); // No register binding yet
-    } else {                    // Hah!  We have a bound machine register
-      print_reg( lrgs(lidx).reg(), this, buf );
+    } else {
+      if (lrgs(lidx)._is_vector) {
+        if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs()))
+          print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register
+        else
+          sprintf(buf,"L%d",lidx); // No register binding yet
+      } else if( (lrgs(lidx).num_regs() == 1)
+                 ? lrgs(lidx).mask().is_bound1()
+                 : lrgs(lidx).mask().is_bound_pair() ) {
+        // Hah!  We have a bound machine register
+        print_reg( lrgs(lidx).reg(), this, buf );
+      } else {
+        sprintf(buf,"L%d",lidx); // No register binding yet
+      }
     }
   }
   return buf+strlen(buf);
--- a/src/share/vm/opto/chaitin.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/chaitin.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -99,8 +99,15 @@
   void set_mask_size( int size ) {
     assert((size == 65535) || (size == (int)_mask.Size()), "");
     _mask_size = size;
-    debug_only(_msize_valid=1;)
-    debug_only( if( _num_regs == 2 && !_fat_proj ) _mask.VerifyPairs(); )
+#ifdef ASSERT
+    _msize_valid=1;
+    if (_is_vector) {
+      assert(!_fat_proj, "sanity");
+      _mask.verify_sets(_num_regs);
+    } else if (_num_regs == 2 && !_fat_proj) {
+      _mask.verify_pairs();
+    }
+#endif
   }
   void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
   int mask_size() const { assert( _msize_valid, "mask size not valid" );
@@ -116,7 +123,8 @@
   void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
   void Insert( OptoReg::Name reg ) { _mask.Insert(reg);  debug_only(_msize_valid=0;) }
   void Remove( OptoReg::Name reg ) { _mask.Remove(reg);  debug_only(_msize_valid=0;) }
-  void ClearToPairs() { _mask.ClearToPairs(); debug_only(_msize_valid=0;) }
+  void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
+  void clear_to_sets()  { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
 
   // Number of registers this live range uses when it colors
 private:
@@ -150,6 +158,7 @@
 
   uint   _is_oop:1,             // Live-range holds an oop
          _is_float:1,           // True if in float registers
+         _is_vector:1,          // True if in vector registers
          _was_spilled1:1,       // True if prior spilling on def
          _was_spilled2:1,       // True if twice prior spilling on def
          _is_bound:1,           // live range starts life with no
@@ -461,7 +470,7 @@
 
   // Split uncolorable live ranges
   // Return new number of live ranges
-  uint Split( uint maxlrg );
+  uint Split(uint maxlrg, ResourceArea* split_arena);
 
   // Copy 'was_spilled'-edness from one Node to another.
   void copy_was_spilled( Node *src, Node *dst );
--- a/src/share/vm/opto/classes.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/classes.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -147,7 +147,6 @@
 macro(LoadL)
 macro(LoadL_unaligned)
 macro(LoadPLocked)
-macro(LoadLLocked)
 macro(LoadP)
 macro(LoadN)
 macro(LoadRange)
@@ -246,92 +245,60 @@
 macro(XorL)
 macro(Vector)
 macro(AddVB)
-macro(AddVC)
 macro(AddVS)
 macro(AddVI)
 macro(AddVL)
 macro(AddVF)
 macro(AddVD)
 macro(SubVB)
-macro(SubVC)
 macro(SubVS)
 macro(SubVI)
 macro(SubVL)
 macro(SubVF)
 macro(SubVD)
+macro(MulVS)
+macro(MulVI)
 macro(MulVF)
 macro(MulVD)
 macro(DivVF)
 macro(DivVD)
 macro(LShiftVB)
-macro(LShiftVC)
 macro(LShiftVS)
 macro(LShiftVI)
+macro(LShiftVL)
+macro(RShiftVB)
+macro(RShiftVS)
+macro(RShiftVI)
+macro(RShiftVL)
 macro(URShiftVB)
-macro(URShiftVC)
 macro(URShiftVS)
 macro(URShiftVI)
+macro(URShiftVL)
 macro(AndV)
 macro(OrV)
 macro(XorV)
-macro(VectorLoad)
-macro(Load16B)
-macro(Load8B)
-macro(Load4B)
-macro(Load8C)
-macro(Load4C)
-macro(Load2C)
-macro(Load8S)
-macro(Load4S)
-macro(Load2S)
-macro(Load4I)
-macro(Load2I)
-macro(Load2L)
-macro(Load4F)
-macro(Load2F)
-macro(Load2D)
-macro(VectorStore)
-macro(Store16B)
-macro(Store8B)
-macro(Store4B)
-macro(Store8C)
-macro(Store4C)
-macro(Store2C)
-macro(Store4I)
-macro(Store2I)
-macro(Store2L)
-macro(Store4F)
-macro(Store2F)
-macro(Store2D)
+macro(LoadVector)
+macro(StoreVector)
 macro(Pack)
 macro(PackB)
 macro(PackS)
-macro(PackC)
 macro(PackI)
 macro(PackL)
 macro(PackF)
 macro(PackD)
-macro(Pack2x1B)
-macro(Pack2x2B)
-macro(Replicate16B)
-macro(Replicate8B)
-macro(Replicate4B)
-macro(Replicate8S)
-macro(Replicate4S)
-macro(Replicate2S)
-macro(Replicate8C)
-macro(Replicate4C)
-macro(Replicate2C)
-macro(Replicate4I)
-macro(Replicate2I)
-macro(Replicate2L)
-macro(Replicate4F)
-macro(Replicate2F)
-macro(Replicate2D)
+macro(Pack2L)
+macro(Pack2D)
+macro(ReplicateB)
+macro(ReplicateS)
+macro(ReplicateI)
+macro(ReplicateL)
+macro(ReplicateF)
+macro(ReplicateD)
 macro(Extract)
 macro(ExtractB)
+macro(ExtractUB)
+macro(ExtractC)
 macro(ExtractS)
-macro(ExtractC)
 macro(ExtractI)
 macro(ExtractL)
 macro(ExtractF)
--- a/src/share/vm/opto/compile.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/compile.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1707,7 +1707,6 @@
       if (major_progress()) print_method("PhaseIdealLoop before EA", 2);
       if (failing())  return;
     }
-    TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, true);
     ConnectionGraph::do_analysis(this, &igvn);
 
     if (failing())  return;
@@ -1719,6 +1718,7 @@
     if (failing())  return;
 
     if (congraph() != NULL && macro_count() > 0) {
+      NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )
       PhaseMacroExpand mexp(igvn);
       mexp.eliminate_macro_nodes();
       igvn.set_delay_transform(false);
@@ -1875,10 +1875,10 @@
 
     cfg.Estimate_Block_Frequency();
     cfg.GlobalCodeMotion(m,unique(),proj_list);
+    if (failing())  return;
 
     print_method("Global code motion", 2);
 
-    if (failing())  return;
     NOT_PRODUCT( verify_graph_edges(); )
 
     debug_only( cfg.verify(); )
@@ -2297,7 +2297,6 @@
   case Op_LoadL:
   case Op_LoadL_unaligned:
   case Op_LoadPLocked:
-  case Op_LoadLLocked:
   case Op_LoadP:
   case Op_LoadN:
   case Op_LoadRange:
@@ -2592,38 +2591,12 @@
     }
     break;
 
-  case Op_Load16B:
-  case Op_Load8B:
-  case Op_Load4B:
-  case Op_Load8S:
-  case Op_Load4S:
-  case Op_Load2S:
-  case Op_Load8C:
-  case Op_Load4C:
-  case Op_Load2C:
-  case Op_Load4I:
-  case Op_Load2I:
-  case Op_Load2L:
-  case Op_Load4F:
-  case Op_Load2F:
-  case Op_Load2D:
-  case Op_Store16B:
-  case Op_Store8B:
-  case Op_Store4B:
-  case Op_Store8C:
-  case Op_Store4C:
-  case Op_Store2C:
-  case Op_Store4I:
-  case Op_Store2I:
-  case Op_Store2L:
-  case Op_Store4F:
-  case Op_Store2F:
-  case Op_Store2D:
+  case Op_LoadVector:
+  case Op_StoreVector:
     break;
 
   case Op_PackB:
   case Op_PackS:
-  case Op_PackC:
   case Op_PackI:
   case Op_PackF:
   case Op_PackL:
@@ -2631,7 +2604,7 @@
     if (n->req()-1 > 2) {
       // Replace many operand PackNodes with a binary tree for matching
       PackNode* p = (PackNode*) n;
-      Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
+      Node* btp = p->binary_tree_pack(Compile::current(), 1, n->req());
       n->subsume_by(btp);
     }
     break;
@@ -3165,7 +3138,7 @@
     default: ShouldNotReachHere();
     }
     assert(constant_addr, "consts section too small");
-    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), err_msg("must be: %d == %d", constant_addr - _masm.code()->consts()->start(), con.offset()));
+    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), err_msg_res("must be: %d == %d", constant_addr - _masm.code()->consts()->start(), con.offset()));
   }
 }
 
@@ -3226,7 +3199,7 @@
   if (Compile::current()->in_scratch_emit_size())  return;
 
   assert(labels.is_nonempty(), "must be");
-  assert((uint) labels.length() == n->outcnt(), err_msg("must be equal: %d == %d", labels.length(), n->outcnt()));
+  assert((uint) labels.length() == n->outcnt(), err_msg_res("must be equal: %d == %d", labels.length(), n->outcnt()));
 
   // Since MachConstantNode::constant_offset() also contains
   // table_base_offset() we need to subtract the table_base_offset()
@@ -3238,7 +3211,7 @@
 
   for (uint i = 0; i < n->outcnt(); i++) {
     address* constant_addr = &jump_table_base[i];
-    assert(*constant_addr == (((address) n) + i), err_msg("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, *constant_addr, (((address) n) + i)));
+    assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, *constant_addr, (((address) n) + i)));
     *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);
     cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
   }
--- a/src/share/vm/opto/compile.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/compile.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -631,7 +631,7 @@
 
   // Decide how to build a call.
   // The profile factor is a discount to apply to this site's interp. profile.
-  CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor);
+  CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true);
   bool should_delay_inlining(ciMethod* call_method, JVMState* jvms);
 
   // Report if there were too many traps at a current method and bci.
--- a/src/share/vm/opto/doCall.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/doCall.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -59,13 +59,13 @@
 }
 #endif
 
-CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual,
+CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_is_virtual,
                                        JVMState* jvms, bool allow_inline,
-                                       float prof_factor) {
+                                       float prof_factor, bool allow_intrinsics) {
   ciMethod*       caller   = jvms->method();
   int             bci      = jvms->bci();
   Bytecodes::Code bytecode = caller->java_code_at_bci(bci);
-  guarantee(call_method != NULL, "failed method resolution");
+  guarantee(callee != NULL, "failed method resolution");
 
   // Dtrace currently doesn't work unless all calls are vanilla
   if (env()->dtrace_method_probes()) {
@@ -91,7 +91,7 @@
     int rid = (receiver_count >= 0)? log->identify(profile.receiver(0)): -1;
     int r2id = (rid != -1 && profile.has_receiver(1))? log->identify(profile.receiver(1)):-1;
     log->begin_elem("call method='%d' count='%d' prof_factor='%g'",
-                    log->identify(call_method), site_count, prof_factor);
+                    log->identify(callee), site_count, prof_factor);
     if (call_is_virtual)  log->print(" virtual='1'");
     if (allow_inline)     log->print(" inline='1'");
     if (receiver_count >= 0) {
@@ -108,8 +108,8 @@
   // then we return it as the inlined version of the call.
   // We do this before the strict f.p. check below because the
   // intrinsics handle strict f.p. correctly.
-  if (allow_inline) {
-    CallGenerator* cg = find_intrinsic(call_method, call_is_virtual);
+  if (allow_inline && allow_intrinsics) {
+    CallGenerator* cg = find_intrinsic(callee, call_is_virtual);
     if (cg != NULL)  return cg;
   }
 
@@ -117,19 +117,12 @@
   // NOTE: This must happen before normal inlining logic below since
   // MethodHandle.invoke* are native methods which obviously don't
   // have bytecodes and so normal inlining fails.
-  if (call_method->is_method_handle_invoke()) {
-    if (bytecode != Bytecodes::_invokedynamic) {
-      GraphKit kit(jvms);
-      Node* method_handle = kit.argument(0);
-      return CallGenerator::for_method_handle_call(method_handle, jvms, caller, call_method, profile);
-    }
-    else {
-      return CallGenerator::for_invokedynamic_call(jvms, caller, call_method, profile);
-    }
+  if (callee->is_method_handle_intrinsic()) {
+    return CallGenerator::for_method_handle_call(jvms, caller, callee);
   }
 
   // Do not inline strict fp into non-strict code, or the reverse
-  if (caller->is_strict() ^ call_method->is_strict()) {
+  if (caller->is_strict() ^ callee->is_strict()) {
     allow_inline = false;
   }
 
@@ -155,26 +148,26 @@
       }
       WarmCallInfo scratch_ci;
       if (!UseOldInlining)
-        scratch_ci.init(jvms, call_method, profile, prof_factor);
-      WarmCallInfo* ci = ilt->ok_to_inline(call_method, jvms, profile, &scratch_ci);
+        scratch_ci.init(jvms, callee, profile, prof_factor);
+      WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci);
       assert(ci != &scratch_ci, "do not let this pointer escape");
       bool allow_inline   = (ci != NULL && !ci->is_cold());
       bool require_inline = (allow_inline && ci->is_hot());
 
       if (allow_inline) {
-        CallGenerator* cg = CallGenerator::for_inline(call_method, expected_uses);
-        if (require_inline && cg != NULL && should_delay_inlining(call_method, jvms)) {
+        CallGenerator* cg = CallGenerator::for_inline(callee, expected_uses);
+        if (require_inline && cg != NULL && should_delay_inlining(callee, jvms)) {
           // Delay the inlining of this method to give us the
           // opportunity to perform some high level optimizations
           // first.
-          return CallGenerator::for_late_inline(call_method, cg);
+          return CallGenerator::for_late_inline(callee, cg);
         }
         if (cg == NULL) {
           // Fall through.
         } else if (require_inline || !InlineWarmCalls) {
           return cg;
         } else {
-          CallGenerator* cold_cg = call_generator(call_method, vtable_index, call_is_virtual, jvms, false, prof_factor);
+          CallGenerator* cold_cg = call_generator(callee, vtable_index, call_is_virtual, jvms, false, prof_factor);
           return CallGenerator::for_warm_call(ci, cold_cg, cg);
         }
       }
@@ -189,7 +182,7 @@
           (profile.morphism() == 2 && UseBimorphicInlining)) {
         // receiver_method = profile.method();
         // Profiles do not suggest methods now.  Look it up in the major receiver.
-        receiver_method = call_method->resolve_invoke(jvms->method()->holder(),
+        receiver_method = callee->resolve_invoke(jvms->method()->holder(),
                                                       profile.receiver(0));
       }
       if (receiver_method != NULL) {
@@ -201,7 +194,7 @@
           CallGenerator* next_hit_cg = NULL;
           ciMethod* next_receiver_method = NULL;
           if (profile.morphism() == 2 && UseBimorphicInlining) {
-            next_receiver_method = call_method->resolve_invoke(jvms->method()->holder(),
+            next_receiver_method = callee->resolve_invoke(jvms->method()->holder(),
                                                                profile.receiver(1));
             if (next_receiver_method != NULL) {
               next_hit_cg = this->call_generator(next_receiver_method,
@@ -224,12 +217,12 @@
              ) {
             // Generate uncommon trap for class check failure path
             // in case of monomorphic or bimorphic virtual call site.
-            miss_cg = CallGenerator::for_uncommon_trap(call_method, reason,
+            miss_cg = CallGenerator::for_uncommon_trap(callee, reason,
                         Deoptimization::Action_maybe_recompile);
           } else {
             // Generate virtual call for class check failure path
             // in case of polymorphic virtual call site.
-            miss_cg = CallGenerator::for_virtual_call(call_method, vtable_index);
+            miss_cg = CallGenerator::for_virtual_call(callee, vtable_index);
           }
           if (miss_cg != NULL) {
             if (next_hit_cg != NULL) {
@@ -252,11 +245,11 @@
   // There was no special inlining tactic, or it bailed out.
   // Use a more generic tactic, like a simple call.
   if (call_is_virtual) {
-    return CallGenerator::for_virtual_call(call_method, vtable_index);
+    return CallGenerator::for_virtual_call(callee, vtable_index);
   } else {
     // Class Hierarchy Analysis or Type Profile reveals a unique target,
     // or it is a static or special call.
-    return CallGenerator::for_direct_call(call_method, should_delay_inlining(call_method, jvms));
+    return CallGenerator::for_direct_call(callee, should_delay_inlining(callee, jvms));
   }
 }
 
@@ -348,40 +341,48 @@
   kill_dead_locals();
 
   // Set frequently used booleans
-  bool is_virtual = bc() == Bytecodes::_invokevirtual;
-  bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface;
-  bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial;
-  bool is_invokedynamic = bc() == Bytecodes::_invokedynamic;
+  const bool is_virtual = bc() == Bytecodes::_invokevirtual;
+  const bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface;
+  const bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial;
 
   // Find target being called
   bool             will_link;
-  ciMethod*        dest_method   = iter().get_method(will_link);
-  ciInstanceKlass* holder_klass  = dest_method->holder();
-  ciKlass* holder = iter().get_declared_method_holder();
+  ciSignature*     declared_signature = NULL;
+  ciMethod*        orig_callee  = iter().get_method(will_link, &declared_signature);  // callee in the bytecode
+  ciInstanceKlass* holder_klass = orig_callee->holder();
+  ciKlass*         holder       = iter().get_declared_method_holder();
   ciInstanceKlass* klass = ciEnv::get_instance_klass_for_declared_method_holder(holder);
-
-  int nargs = dest_method->arg_size();
-  if (is_invokedynamic)  nargs -= 1;
+  assert(declared_signature != NULL, "cannot be null");
 
   // uncommon-trap when callee is unloaded, uninitialized or will not link
   // bailout when too many arguments for register representation
-  if (!will_link || can_not_compile_call_site(dest_method, klass)) {
+  if (!will_link || can_not_compile_call_site(orig_callee, klass)) {
 #ifndef PRODUCT
     if (PrintOpto && (Verbose || WizardMode)) {
       method()->print_name(); tty->print_cr(" can not compile call at bci %d to:", bci());
-      dest_method->print_name(); tty->cr();
+      orig_callee->print_name(); tty->cr();
     }
 #endif
     return;
   }
   assert(holder_klass->is_loaded(), "");
-  assert((dest_method->is_static() || is_invokedynamic) == !has_receiver , "must match bc");
+  //assert((bc_callee->is_static() || is_invokedynamic) == !has_receiver , "must match bc");  // XXX invokehandle (cur_bc_raw)
   // Note: this takes into account invokeinterface of methods declared in java/lang/Object,
   // which should be invokevirtuals but according to the VM spec may be invokeinterfaces
   assert(holder_klass->is_interface() || holder_klass->super() == NULL || (bc() != Bytecodes::_invokeinterface), "must match bc");
   // Note:  In the absence of miranda methods, an abstract class K can perform
   // an invokevirtual directly on an interface method I.m if K implements I.
 
+  const int nargs = orig_callee->arg_size();
+
+  // Push appendix argument (MethodType, CallSite, etc.), if one.
+  if (iter().has_appendix()) {
+    ciObject* appendix_arg = iter().get_appendix();
+    const TypeOopPtr* appendix_arg_type = TypeOopPtr::make_from_constant(appendix_arg);
+    Node* appendix_arg_node = _gvn.makecon(appendix_arg_type);
+    push(appendix_arg_node);
+  }
+
   // ---------------------
   // Does Class Hierarchy Analysis reveal only a single target of a v-call?
   // Then we may inline or make a static call, but become dependent on there being only 1 target.
@@ -392,21 +393,21 @@
   // Choose call strategy.
   bool call_is_virtual = is_virtual_or_interface;
   int vtable_index = methodOopDesc::invalid_vtable_index;
-  ciMethod* call_method = dest_method;
+  ciMethod* callee = orig_callee;
 
   // Try to get the most accurate receiver type
   if (is_virtual_or_interface) {
     Node*             receiver_node = stack(sp() - nargs);
     const TypeOopPtr* receiver_type = _gvn.type(receiver_node)->isa_oopptr();
-    ciMethod* optimized_virtual_method = optimize_inlining(method(), bci(), klass, dest_method, receiver_type);
+    ciMethod* optimized_virtual_method = optimize_inlining(method(), bci(), klass, orig_callee, receiver_type);
 
     // Have the call been sufficiently improved such that it is no longer a virtual?
     if (optimized_virtual_method != NULL) {
-      call_method     = optimized_virtual_method;
+      callee          = optimized_virtual_method;
       call_is_virtual = false;
-    } else if (!UseInlineCaches && is_virtual && call_method->is_loaded()) {
+    } else if (!UseInlineCaches && is_virtual && callee->is_loaded()) {
       // We can make a vtable call at this site
-      vtable_index = call_method->resolve_vtable_index(method()->holder(), klass);
+      vtable_index = callee->resolve_vtable_index(method()->holder(), klass);
     }
   }
 
@@ -416,22 +417,25 @@
   bool try_inline = (C->do_inlining() || InlineAccessors);
 
   // ---------------------
-  inc_sp(- nargs);              // Temporarily pop args for JVM state of call
+  dec_sp(nargs);              // Temporarily pop args for JVM state of call
   JVMState* jvms = sync_jvms();
 
   // ---------------------
   // Decide call tactic.
   // This call checks with CHA, the interpreter profile, intrinsics table, etc.
   // It decides whether inlining is desirable or not.
-  CallGenerator* cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+  CallGenerator* cg = C->call_generator(callee, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+
+  // NOTE:  Don't use orig_callee and callee after this point!  Use cg->method() instead.
+  orig_callee = callee = NULL;
 
   // ---------------------
   // Round double arguments before call
-  round_double_arguments(dest_method);
+  round_double_arguments(cg->method());
 
 #ifndef PRODUCT
   // bump global counters for calls
-  count_compiled_calls(false/*at_method_entry*/, cg->is_inline());
+  count_compiled_calls(/*at_method_entry*/ false, cg->is_inline());
 
   // Record first part of parsing work for this call
   parse_histogram()->record_change();
@@ -447,29 +451,20 @@
   // because exceptions don't return to the call site.)
   profile_call(receiver);
 
-  JVMState* new_jvms;
-  if ((new_jvms = cg->generate(jvms)) == NULL) {
+  JVMState* new_jvms = cg->generate(jvms);
+  if (new_jvms == NULL) {
     // When inlining attempt fails (e.g., too many arguments),
     // it may contaminate the current compile state, making it
     // impossible to pull back and try again.  Once we call
     // cg->generate(), we are committed.  If it fails, the whole
     // compilation task is compromised.
     if (failing())  return;
-#ifndef PRODUCT
-    if (PrintOpto || PrintOptoInlining || PrintInlining) {
-      // Only one fall-back, so if an intrinsic fails, ignore any bytecodes.
-      if (cg->is_intrinsic() && call_method->code_size() > 0) {
-        tty->print("Bailed out of intrinsic, will not inline: ");
-        call_method->print_name(); tty->cr();
-      }
-    }
-#endif
+
     // This can happen if a library intrinsic is available, but refuses
     // the call site, perhaps because it did not match a pattern the
-    // intrinsic was expecting to optimize.  The fallback position is
-    // to call out-of-line.
-    try_inline = false;  // Inline tactic bailed out.
-    cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+    // intrinsic was expecting to optimize. Should always be possible to
+    // get a normal java call that may inline in that case
+    cg = C->call_generator(cg->method(), vtable_index, call_is_virtual, jvms, try_inline, prof_factor(), /* allow_intrinsics= */ false);
     if ((new_jvms = cg->generate(jvms)) == NULL) {
       guarantee(failing(), "call failed to generate:  calls should work");
       return;
@@ -478,8 +473,8 @@
 
   if (cg->is_inline()) {
     // Accumulate has_loops estimate
-    C->set_has_loops(C->has_loops() || call_method->has_loops());
-    C->env()->notice_inlined_method(call_method);
+    C->set_has_loops(C->has_loops() || cg->method()->has_loops());
+    C->env()->notice_inlined_method(cg->method());
   }
 
   // Reset parser state from [new_]jvms, which now carries results of the call.
@@ -501,20 +496,72 @@
     }
 
     // Round double result after a call from strict to non-strict code
-    round_double_result(dest_method);
+    round_double_result(cg->method());
+
+    ciType* rtype = cg->method()->return_type();
+    if (Bytecodes::has_optional_appendix(iter().cur_bc_raw())) {
+      // Be careful here with return types.
+      ciType* ctype = declared_signature->return_type();
+      if (ctype != rtype) {
+        BasicType rt = rtype->basic_type();
+        BasicType ct = ctype->basic_type();
+        Node* retnode = peek();
+        if (ct == T_VOID) {
+          // It's OK for a method  to return a value that is discarded.
+          // The discarding does not require any special action from the caller.
+          // The Java code knows this, at VerifyType.isNullConversion.
+          pop_node(rt);  // whatever it was, pop it
+          retnode = top();
+        } else if (rt == T_INT || is_subword_type(rt)) {
+          // FIXME: This logic should be factored out.
+          if (ct == T_BOOLEAN) {
+            retnode = _gvn.transform( new (C, 3) AndINode(retnode, intcon(0x1)) );
+          } else if (ct == T_CHAR) {
+            retnode = _gvn.transform( new (C, 3) AndINode(retnode, intcon(0xFFFF)) );
+          } else if (ct == T_BYTE) {
+            retnode = _gvn.transform( new (C, 3) LShiftINode(retnode, intcon(24)) );
+            retnode = _gvn.transform( new (C, 3) RShiftINode(retnode, intcon(24)) );
+          } else if (ct == T_SHORT) {
+            retnode = _gvn.transform( new (C, 3) LShiftINode(retnode, intcon(16)) );
+            retnode = _gvn.transform( new (C, 3) RShiftINode(retnode, intcon(16)) );
+          } else {
+            assert(ct == T_INT, err_msg_res("rt=%s, ct=%s", type2name(rt), type2name(ct)));
+          }
+        } else if (rt == T_OBJECT || rt == T_ARRAY) {
+          assert(ct == T_OBJECT || ct == T_ARRAY, err_msg_res("rt=%s, ct=%s", type2name(rt), type2name(ct)));
+          if (ctype->is_loaded()) {
+            const TypeOopPtr* arg_type = TypeOopPtr::make_from_klass(rtype->as_klass());
+            const Type*       sig_type = TypeOopPtr::make_from_klass(ctype->as_klass());
+            if (arg_type != NULL && !arg_type->higher_equal(sig_type)) {
+              Node* cast_obj = _gvn.transform(new (C, 2) CheckCastPPNode(control(), retnode, sig_type));
+              pop();
+              push(cast_obj);
+            }
+          }
+        } else {
+          assert(ct == rt, err_msg_res("unexpected mismatch rt=%d, ct=%d", rt, ct));
+          // push a zero; it's better than getting an oop/int mismatch
+          retnode = pop_node(rt);
+          retnode = zerocon(ct);
+          push_node(ct, retnode);
+        }
+        // Now that the value is well-behaved, continue with the call-site type.
+        rtype = ctype;
+      }
+    }
 
     // If the return type of the method is not loaded, assert that the
     // value we got is a null.  Otherwise, we need to recompile.
-    if (!dest_method->return_type()->is_loaded()) {
+    if (!rtype->is_loaded()) {
 #ifndef PRODUCT
       if (PrintOpto && (Verbose || WizardMode)) {
         method()->print_name(); tty->print_cr(" asserting nullness of result at bci: %d", bci());
-        dest_method->print_name(); tty->cr();
+        cg->method()->print_name(); tty->cr();
       }
 #endif
       if (C->log() != NULL) {
         C->log()->elem("assert_null reason='return' klass='%d'",
-                       C->log()->identify(dest_method->return_type()));
+                       C->log()->identify(rtype));
       }
       // If there is going to be a trap, put it at the next bytecode:
       set_bci(iter().next_bci());
@@ -594,8 +641,8 @@
 #ifndef PRODUCT
       // We do not expect the same handler bci to take both cold unloaded
       // and hot loaded exceptions.  But, watch for it.
-      if (extype->is_loaded()) {
-        tty->print_cr("Warning: Handler @%d takes mixed loaded/unloaded exceptions in ");
+      if ((Verbose || WizardMode) && extype->is_loaded()) {
+        tty->print("Warning: Handler @%d takes mixed loaded/unloaded exceptions in ", bci());
         method()->print_name(); tty->cr();
       } else if (PrintOpto && (Verbose || WizardMode)) {
         tty->print("Bailing out on unloaded exception type ");
@@ -789,7 +836,7 @@
     if( at_method_entry ) {
       // bump invocation counter if top method (for statistics)
       if (CountCompiledCalls && depth() == 1) {
-        const TypeInstPtr* addr_type = TypeInstPtr::make(method());
+        const TypeOopPtr* addr_type = TypeOopPtr::make_from_constant(method());
         Node* adr1 = makecon(addr_type);
         Node* adr2 = basic_plus_adr(adr1, adr1, in_bytes(methodOopDesc::compiled_invocation_counter_offset()));
         increment_counter(adr2);
--- a/src/share/vm/opto/domgraph.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/domgraph.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -465,15 +465,11 @@
           // Kill dead input path
           assert( !visited.test(whead->in(i)->_idx),
                   "input with no loop must be dead" );
-          _igvn.hash_delete(whead);
-          whead->del_req(i);
-          _igvn._worklist.push(whead);
+          _igvn.delete_input_of(whead, i);
           for (DUIterator_Fast jmax, j = whead->fast_outs(jmax); j < jmax; j++) {
             Node* p = whead->fast_out(j);
             if( p->is_Phi() ) {
-              _igvn.hash_delete(p);
-              p->del_req(i);
-              _igvn._worklist.push(p);
+              _igvn.delete_input_of(p, i);
             }
           }
           i--;                  // Rerun same iteration
--- a/src/share/vm/opto/escape.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/escape.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "ci/bcEscapeAnalyzer.hpp"
+#include "compiler/compileLog.hpp"
 #include "libadt/vectset.hpp"
 #include "memory/allocation.hpp"
 #include "opto/c2compiler.hpp"
@@ -34,125 +35,1949 @@
 #include "opto/phaseX.hpp"
 #include "opto/rootnode.hpp"
 
-void PointsToNode::add_edge(uint targIdx, PointsToNode::EdgeType et) {
-  uint v = (targIdx << EdgeShift) + ((uint) et);
-  if (_edges == NULL) {
-     Arena *a = Compile::current()->comp_arena();
-    _edges = new(a) GrowableArray<uint>(a, INITIAL_EDGE_COUNT, 0, 0);
-  }
-  _edges->append_if_missing(v);
-}
-
-void PointsToNode::remove_edge(uint targIdx, PointsToNode::EdgeType et) {
-  uint v = (targIdx << EdgeShift) + ((uint) et);
-
-  _edges->remove(v);
-}
-
-#ifndef PRODUCT
-static const char *node_type_names[] = {
-  "UnknownType",
-  "JavaObject",
-  "LocalVar",
-  "Field"
-};
-
-static const char *esc_names[] = {
-  "UnknownEscape",
-  "NoEscape",
-  "ArgEscape",
-  "GlobalEscape"
-};
-
-static const char *edge_type_suffix[] = {
- "?", // UnknownEdge
- "P", // PointsToEdge
- "D", // DeferredEdge
- "F"  // FieldEdge
-};
-
-void PointsToNode::dump(bool print_state) const {
-  NodeType nt = node_type();
-  tty->print("%s ", node_type_names[(int) nt]);
-  if (print_state) {
-    EscapeState es = escape_state();
-    tty->print("%s %s ", esc_names[(int) es], _scalar_replaceable ? "":"NSR");
-  }
-  tty->print("[[");
-  for (uint i = 0; i < edge_count(); i++) {
-    tty->print(" %d%s", edge_target(i), edge_type_suffix[(int) edge_type(i)]);
-  }
-  tty->print("]]  ");
-  if (_node == NULL)
-    tty->print_cr("<null>");
-  else
-    _node->dump();
-}
-#endif
-
 ConnectionGraph::ConnectionGraph(Compile * C, PhaseIterGVN *igvn) :
-  _nodes(C->comp_arena(), C->unique(), C->unique(), PointsToNode()),
-  _processed(C->comp_arena()),
-  pt_ptset(C->comp_arena()),
-  pt_visited(C->comp_arena()),
-  pt_worklist(C->comp_arena(), 4, 0, 0),
+  _nodes(C->comp_arena(), C->unique(), C->unique(), NULL),
   _collecting(true),
-  _progress(false),
+  _verify(false),
   _compile(C),
   _igvn(igvn),
   _node_map(C->comp_arena()) {
-
-  _phantom_object = C->top()->_idx,
-  add_node(C->top(), PointsToNode::JavaObject, PointsToNode::GlobalEscape,true);
-
+  // Add unknown java object.
+  add_java_object(C->top(), PointsToNode::GlobalEscape);
+  phantom_obj = ptnode_adr(C->top()->_idx)->as_JavaObject();
   // Add ConP(#NULL) and ConN(#NULL) nodes.
   Node* oop_null = igvn->zerocon(T_OBJECT);
-  _oop_null = oop_null->_idx;
-  assert(_oop_null < nodes_size(), "should be created already");
-  add_node(oop_null, PointsToNode::JavaObject, PointsToNode::NoEscape, true);
-
+  assert(oop_null->_idx < nodes_size(), "should be created already");
+  add_java_object(oop_null, PointsToNode::NoEscape);
+  null_obj = ptnode_adr(oop_null->_idx)->as_JavaObject();
   if (UseCompressedOops) {
     Node* noop_null = igvn->zerocon(T_NARROWOOP);
-    _noop_null = noop_null->_idx;
-    assert(_noop_null < nodes_size(), "should be created already");
-    add_node(noop_null, PointsToNode::JavaObject, PointsToNode::NoEscape, true);
-  } else {
-    _noop_null = _oop_null; // Should be initialized
+    assert(noop_null->_idx < nodes_size(), "should be created already");
+    map_ideal_node(noop_null, null_obj);
   }
   _pcmp_neq = NULL; // Should be initialized
   _pcmp_eq  = NULL;
 }
 
-void ConnectionGraph::add_pointsto_edge(uint from_i, uint to_i) {
-  PointsToNode *f = ptnode_adr(from_i);
-  PointsToNode *t = ptnode_adr(to_i);
+bool ConnectionGraph::has_candidates(Compile *C) {
+  // EA brings benefits only when the code has allocations and/or locks which
+  // are represented by ideal Macro nodes.
+  int cnt = C->macro_count();
+  for( int i=0; i < cnt; i++ ) {
+    Node *n = C->macro_node(i);
+    if ( n->is_Allocate() )
+      return true;
+    if( n->is_Lock() ) {
+      Node* obj = n->as_Lock()->obj_node()->uncast();
+      if( !(obj->is_Parm() || obj->is_Con()) )
+        return true;
+    }
+  }
+  return false;
+}
+
+void ConnectionGraph::do_analysis(Compile *C, PhaseIterGVN *igvn) {
+  Compile::TracePhase t2("escapeAnalysis", &Phase::_t_escapeAnalysis, true);
+  ResourceMark rm;
+
+  // Add ConP#NULL and ConN#NULL nodes before ConnectionGraph construction
+  // to create space for them in ConnectionGraph::_nodes[].
+  Node* oop_null = igvn->zerocon(T_OBJECT);
+  Node* noop_null = igvn->zerocon(T_NARROWOOP);
+  ConnectionGraph* congraph = new(C->comp_arena()) ConnectionGraph(C, igvn);
+  // Perform escape analysis
+  if (congraph->compute_escape()) {
+    // There are non escaping objects.
+    C->set_congraph(congraph);
+  }
+  // Cleanup.
+  if (oop_null->outcnt() == 0)
+    igvn->hash_delete(oop_null);
+  if (noop_null->outcnt() == 0)
+    igvn->hash_delete(noop_null);
+}
+
+bool ConnectionGraph::compute_escape() {
+  Compile* C = _compile;
+  PhaseGVN* igvn = _igvn;
+
+  // Worklists used by EA.
+  Unique_Node_List delayed_worklist;
+  GrowableArray<Node*> alloc_worklist;
+  GrowableArray<Node*> ptr_cmp_worklist;
+  GrowableArray<Node*> storestore_worklist;
+  GrowableArray<PointsToNode*>   ptnodes_worklist;
+  GrowableArray<JavaObjectNode*> java_objects_worklist;
+  GrowableArray<JavaObjectNode*> non_escaped_worklist;
+  GrowableArray<FieldNode*>      oop_fields_worklist;
+  DEBUG_ONLY( GrowableArray<Node*> addp_worklist; )
+
+  { Compile::TracePhase t3("connectionGraph", &Phase::_t_connectionGraph, true);
+
+  // 1. Populate Connection Graph (CG) with PointsTo nodes.
+  ideal_nodes.map(C->unique(), NULL);  // preallocate space
+  // Initialize worklist
+  if (C->root() != NULL) {
+    ideal_nodes.push(C->root());
+  }
+  for( uint next = 0; next < ideal_nodes.size(); ++next ) {
+    Node* n = ideal_nodes.at(next);
+    // Create PointsTo nodes and add them to Connection Graph. Called
+    // only once per ideal node since ideal_nodes is Unique_Node list.
+    add_node_to_connection_graph(n, &delayed_worklist);
+    PointsToNode* ptn = ptnode_adr(n->_idx);
+    if (ptn != NULL) {
+      ptnodes_worklist.append(ptn);
+      if (ptn->is_JavaObject()) {
+        java_objects_worklist.append(ptn->as_JavaObject());
+        if ((n->is_Allocate() || n->is_CallStaticJava()) &&
+            (ptn->escape_state() < PointsToNode::GlobalEscape)) {
+          // Only allocations and java static calls results are interesting.
+          non_escaped_worklist.append(ptn->as_JavaObject());
+        }
+      } else if (ptn->is_Field() && ptn->as_Field()->is_oop()) {
+        oop_fields_worklist.append(ptn->as_Field());
+      }
+    }
+    if (n->is_MergeMem()) {
+      // Collect all MergeMem nodes to add memory slices for
+      // scalar replaceable objects in split_unique_types().
+      _mergemem_worklist.append(n->as_MergeMem());
+    } else if (OptimizePtrCompare && n->is_Cmp() &&
+               (n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) {
+      // Collect compare pointers nodes.
+      ptr_cmp_worklist.append(n);
+    } else if (n->is_MemBarStoreStore()) {
+      // Collect all MemBarStoreStore nodes so that depending on the
+      // escape status of the associated Allocate node some of them
+      // may be eliminated.
+      storestore_worklist.append(n);
+#ifdef ASSERT
+    } else if(n->is_AddP()) {
+      // Collect address nodes for graph verification.
+      addp_worklist.append(n);
+#endif
+    }
+    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+      Node* m = n->fast_out(i);   // Get user
+      ideal_nodes.push(m);
+    }
+  }
+  if (non_escaped_worklist.length() == 0) {
+    _collecting = false;
+    return false; // Nothing to do.
+  }
+  // Add final simple edges to graph.
+  while(delayed_worklist.size() > 0) {
+    Node* n = delayed_worklist.pop();
+    add_final_edges(n);
+  }
+  int ptnodes_length = ptnodes_worklist.length();
+
+#ifdef ASSERT
+  if (VerifyConnectionGraph) {
+    // Verify that no new simple edges could be created and all
+    // local vars has edges.
+    _verify = true;
+    for (int next = 0; next < ptnodes_length; ++next) {
+      PointsToNode* ptn = ptnodes_worklist.at(next);
+      add_final_edges(ptn->ideal_node());
+      if (ptn->is_LocalVar() && ptn->edge_count() == 0) {
+        ptn->dump();
+        assert(ptn->as_LocalVar()->edge_count() > 0, "sanity");
+      }
+    }
+    _verify = false;
+  }
+#endif
+
+  // 2. Finish Graph construction by propagating references to all
+  //    java objects through graph.
+  if (!complete_connection_graph(ptnodes_worklist, non_escaped_worklist,
+                                 java_objects_worklist, oop_fields_worklist)) {
+    // All objects escaped or hit time or iterations limits.
+    _collecting = false;
+    return false;
+  }
+
+  // 3. Adjust scalar_replaceable state of nonescaping objects and push
+  //    scalar replaceable allocations on alloc_worklist for processing
+  //    in split_unique_types().
+  int non_escaped_length = non_escaped_worklist.length();
+  for (int next = 0; next < non_escaped_length; next++) {
+    JavaObjectNode* ptn = non_escaped_worklist.at(next);
+    if (ptn->escape_state() == PointsToNode::NoEscape &&
+        ptn->scalar_replaceable()) {
+      adjust_scalar_replaceable_state(ptn);
+      if (ptn->scalar_replaceable()) {
+        alloc_worklist.append(ptn->ideal_node());
+      }
+    }
+  }
+
+#ifdef ASSERT
+  if (VerifyConnectionGraph) {
+    // Verify that graph is complete - no new edges could be added or needed.
+    verify_connection_graph(ptnodes_worklist, non_escaped_worklist,
+                            java_objects_worklist, addp_worklist);
+  }
+  assert(C->unique() == nodes_size(), "no new ideal nodes should be added during ConnectionGraph build");
+  assert(null_obj->escape_state() == PointsToNode::NoEscape &&
+         null_obj->edge_count() == 0 &&
+         !null_obj->arraycopy_src() &&
+         !null_obj->arraycopy_dst(), "sanity");
+#endif
+
+  _collecting = false;
+
+  } // TracePhase t3("connectionGraph")
+
+  // 4. Optimize ideal graph based on EA information.
+  bool has_non_escaping_obj = (non_escaped_worklist.length() > 0);
+  if (has_non_escaping_obj) {
+    optimize_ideal_graph(ptr_cmp_worklist, storestore_worklist);
+  }
+
+#ifndef PRODUCT
+  if (PrintEscapeAnalysis) {
+    dump(ptnodes_worklist); // Dump ConnectionGraph
+  }
+#endif
+
+  bool has_scalar_replaceable_candidates = (alloc_worklist.length() > 0);
+#ifdef ASSERT
+  if (VerifyConnectionGraph) {
+    int alloc_length = alloc_worklist.length();
+    for (int next = 0; next < alloc_length; ++next) {
+      Node* n = alloc_worklist.at(next);
+      PointsToNode* ptn = ptnode_adr(n->_idx);
+      assert(ptn->escape_state() == PointsToNode::NoEscape && ptn->scalar_replaceable(), "sanity");
+    }
+  }
+#endif
+
+  // 5. Separate memory graph for scalar replaceable allcations.
+  if (has_scalar_replaceable_candidates &&
+      C->AliasLevel() >= 3 && EliminateAllocations) {
+    // Now use the escape information to create unique types for
+    // scalar replaceable objects.
+    split_unique_types(alloc_worklist);
+    if (C->failing())  return false;
+    C->print_method("After Escape Analysis", 2);
+
+#ifdef ASSERT
+  } else if (Verbose && (PrintEscapeAnalysis || PrintEliminateAllocations)) {
+    tty->print("=== No allocations eliminated for ");
+    C->method()->print_short_name();
+    if(!EliminateAllocations) {
+      tty->print(" since EliminateAllocations is off ===");
+    } else if(!has_scalar_replaceable_candidates) {
+      tty->print(" since there are no scalar replaceable candidates ===");
+    } else if(C->AliasLevel() < 3) {
+      tty->print(" since AliasLevel < 3 ===");
+    }
+    tty->cr();
+#endif
+  }
+  return has_non_escaping_obj;
+}
+
+// Populate Connection Graph with PointsTo nodes and create simple
+// connection graph edges.
+void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) {
+  assert(!_verify, "this method sould not be called for verification");
+  PhaseGVN* igvn = _igvn;
+  uint n_idx = n->_idx;
+  PointsToNode* n_ptn = ptnode_adr(n_idx);
+  if (n_ptn != NULL)
+    return; // No need to redefine PointsTo node during first iteration.
+
+  if (n->is_Call()) {
+    // Arguments to allocation and locking don't escape.
+    if (n->is_AbstractLock()) {
+      // Put Lock and Unlock nodes on IGVN worklist to process them during
+      // first IGVN optimization when escape information is still available.
+      record_for_optimizer(n);
+    } else if (n->is_Allocate()) {
+      add_call_node(n->as_Call());
+      record_for_optimizer(n);
+    } else {
+      if (n->is_CallStaticJava()) {
+        const char* name = n->as_CallStaticJava()->_name;
+        if (name != NULL && strcmp(name, "uncommon_trap") == 0)
+          return; // Skip uncommon traps
+      }
+      // Don't mark as processed since call's arguments have to be processed.
+      delayed_worklist->push(n);
+      // Check if a call returns an object.
+      if (n->as_Call()->returns_pointer() &&
+          n->as_Call()->proj_out(TypeFunc::Parms) != NULL) {
+        add_call_node(n->as_Call());
+      }
+    }
+    return;
+  }
+  // Put this check here to process call arguments since some call nodes
+  // point to phantom_obj.
+  if (n_ptn == phantom_obj || n_ptn == null_obj)
+    return; // Skip predefined nodes.
 
-  assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
-  assert(f->node_type() == PointsToNode::LocalVar || f->node_type() == PointsToNode::Field, "invalid source of PointsTo edge");
-  assert(t->node_type() == PointsToNode::JavaObject, "invalid destination of PointsTo edge");
-  if (to_i == _phantom_object) { // Quick test for most common object
-    if (f->has_unknown_ptr()) {
-      return;
+  int opcode = n->Opcode();
+  switch (opcode) {
+    case Op_AddP: {
+      Node* base = get_addp_base(n);
+      PointsToNode* ptn_base = ptnode_adr(base->_idx);
+      // Field nodes are created for all field types. They are used in
+      // adjust_scalar_replaceable_state() and split_unique_types().
+      // Note, non-oop fields will have only base edges in Connection
+      // Graph because such fields are not used for oop loads and stores.
+      int offset = address_offset(n, igvn);
+      add_field(n, PointsToNode::NoEscape, offset);
+      if (ptn_base == NULL) {
+        delayed_worklist->push(n); // Process it later.
+      } else {
+        n_ptn = ptnode_adr(n_idx);
+        add_base(n_ptn->as_Field(), ptn_base);
+      }
+      break;
+    }
+    case Op_CastX2P: {
+      map_ideal_node(n, phantom_obj);
+      break;
+    }
+    case Op_CastPP:
+    case Op_CheckCastPP:
+    case Op_EncodeP:
+    case Op_DecodeN: {
+      add_local_var_and_edge(n, PointsToNode::NoEscape,
+                             n->in(1), delayed_worklist);
+      break;
+    }
+    case Op_CMoveP: {
+      add_local_var(n, PointsToNode::NoEscape);
+      // Do not add edges during first iteration because some could be
+      // not defined yet.
+      delayed_worklist->push(n);
+      break;
+    }
+    case Op_ConP:
+    case Op_ConN: {
+      // assume all oop constants globally escape except for null
+      PointsToNode::EscapeState es;
+      if (igvn->type(n) == TypePtr::NULL_PTR ||
+          igvn->type(n) == TypeNarrowOop::NULL_PTR) {
+        es = PointsToNode::NoEscape;
+      } else {
+        es = PointsToNode::GlobalEscape;
+      }
+      add_java_object(n, es);
+      break;
+    }
+    case Op_CreateEx: {
+      // assume that all exception objects globally escape
+      add_java_object(n, PointsToNode::GlobalEscape);
+      break;
+    }
+    case Op_LoadKlass:
+    case Op_LoadNKlass: {
+      // Unknown class is loaded
+      map_ideal_node(n, phantom_obj);
+      break;
+    }
+    case Op_LoadP:
+    case Op_LoadN:
+    case Op_LoadPLocked: {
+      // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
+      // ThreadLocal has RawPrt type.
+      const Type* t = igvn->type(n);
+      if (t->make_ptr() != NULL) {
+        Node* adr = n->in(MemNode::Address);
+#ifdef ASSERT
+        if (!adr->is_AddP()) {
+          assert(igvn->type(adr)->isa_rawptr(), "sanity");
+        } else {
+          assert((ptnode_adr(adr->_idx) == NULL ||
+                  ptnode_adr(adr->_idx)->as_Field()->is_oop()), "sanity");
+        }
+#endif
+        add_local_var_and_edge(n, PointsToNode::NoEscape,
+                               adr, delayed_worklist);
+      }
+      break;
+    }
+    case Op_Parm: {
+      map_ideal_node(n, phantom_obj);
+      break;
+    }
+    case Op_PartialSubtypeCheck: {
+      // Produces Null or notNull and is used in only in CmpP so
+      // phantom_obj could be used.
+      map_ideal_node(n, phantom_obj); // Result is unknown
+      break;
+    }
+    case Op_Phi: {
+      // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
+      // ThreadLocal has RawPrt type.
+      const Type* t = n->as_Phi()->type();
+      if (t->make_ptr() != NULL) {
+        add_local_var(n, PointsToNode::NoEscape);
+        // Do not add edges during first iteration because some could be
+        // not defined yet.
+        delayed_worklist->push(n);
+      }
+      break;
+    }
+    case Op_Proj: {
+      // we are only interested in the oop result projection from a call
+      if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() &&
+          n->in(0)->as_Call()->returns_pointer()) {
+        add_local_var_and_edge(n, PointsToNode::NoEscape,
+                               n->in(0), delayed_worklist);
+      }
+      break;
+    }
+    case Op_Rethrow: // Exception object escapes
+    case Op_Return: {
+      if (n->req() > TypeFunc::Parms &&
+          igvn->type(n->in(TypeFunc::Parms))->isa_oopptr()) {
+        // Treat Return value as LocalVar with GlobalEscape escape state.
+        add_local_var_and_edge(n, PointsToNode::GlobalEscape,
+                               n->in(TypeFunc::Parms), delayed_worklist);
+      }
+      break;
+    }
+    case Op_StoreP:
+    case Op_StoreN:
+    case Op_StorePConditional:
+    case Op_CompareAndSwapP:
+    case Op_CompareAndSwapN: {
+      Node* adr = n->in(MemNode::Address);
+      const Type *adr_type = igvn->type(adr);
+      adr_type = adr_type->make_ptr();
+      if (adr_type->isa_oopptr() ||
+          (opcode == Op_StoreP || opcode == Op_StoreN) &&
+                        (adr_type == TypeRawPtr::NOTNULL &&
+                         adr->in(AddPNode::Address)->is_Proj() &&
+                         adr->in(AddPNode::Address)->in(0)->is_Allocate())) {
+        delayed_worklist->push(n); // Process it later.
+#ifdef ASSERT
+        assert(adr->is_AddP(), "expecting an AddP");
+        if (adr_type == TypeRawPtr::NOTNULL) {
+          // Verify a raw address for a store captured by Initialize node.
+          int offs = (int)igvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
+          assert(offs != Type::OffsetBot, "offset must be a constant");
+        }
+#endif
+      } else {
+        // Ignore copy the displaced header to the BoxNode (OSR compilation).
+        if (adr->is_BoxLock())
+          break;
+        // Stored value escapes in unsafe access.
+        if ((opcode == Op_StoreP) && (adr_type == TypeRawPtr::BOTTOM)) {
+          // Pointer stores in G1 barriers looks like unsafe access.
+          // Ignore such stores to be able scalar replace non-escaping
+          // allocations.
+          if (UseG1GC && adr->is_AddP()) {
+            Node* base = get_addp_base(adr);
+            if (base->Opcode() == Op_LoadP &&
+                base->in(MemNode::Address)->is_AddP()) {
+              adr = base->in(MemNode::Address);
+              Node* tls = get_addp_base(adr);
+              if (tls->Opcode() == Op_ThreadLocal) {
+                int offs = (int)igvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
+                if (offs == in_bytes(JavaThread::satb_mark_queue_offset() +
+                                     PtrQueue::byte_offset_of_buf())) {
+                  break; // G1 pre barier previous oop value store.
+                }
+                if (offs == in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_buf())) {
+                  break; // G1 post barier card address store.
+                }
+              }
+            }
+          }
+          delayed_worklist->push(n); // Process unsafe access later.
+          break;
+        }
+#ifdef ASSERT
+        n->dump(1);
+        assert(false, "not unsafe or G1 barrier raw StoreP");
+#endif
+      }
+      break;
+    }
+    case Op_AryEq:
+    case Op_StrComp:
+    case Op_StrEquals:
+    case Op_StrIndexOf: {
+      add_local_var(n, PointsToNode::ArgEscape);
+      delayed_worklist->push(n); // Process it later.
+      break;
+    }
+    case Op_ThreadLocal: {
+      add_java_object(n, PointsToNode::ArgEscape);
+      break;
+    }
+    default:
+      ; // Do nothing for nodes not related to EA.
+  }
+  return;
+}
+
+#ifdef ASSERT
+#define ELSE_FAIL(name)                               \
+      /* Should not be called for not pointer type. */  \
+      n->dump(1);                                       \
+      assert(false, name);                              \
+      break;
+#else
+#define ELSE_FAIL(name) \
+      break;
+#endif
+
+// Add final simple edges to graph.
+void ConnectionGraph::add_final_edges(Node *n) {
+  PointsToNode* n_ptn = ptnode_adr(n->_idx);
+#ifdef ASSERT
+  if (_verify && n_ptn->is_JavaObject())
+    return; // This method does not change graph for JavaObject.
+#endif
+
+  if (n->is_Call()) {
+    process_call_arguments(n->as_Call());
+    return;
+  }
+  assert(n->is_Store() || n->is_LoadStore() ||
+         (n_ptn != NULL) && (n_ptn->ideal_node() != NULL),
+         "node should be registered already");
+  int opcode = n->Opcode();
+  switch (opcode) {
+    case Op_AddP: {
+      Node* base = get_addp_base(n);
+      PointsToNode* ptn_base = ptnode_adr(base->_idx);
+      assert(ptn_base != NULL, "field's base should be registered");
+      add_base(n_ptn->as_Field(), ptn_base);
+      break;
+    }
+    case Op_CastPP:
+    case Op_CheckCastPP:
+    case Op_EncodeP:
+    case Op_DecodeN: {
+      add_local_var_and_edge(n, PointsToNode::NoEscape,
+                             n->in(1), NULL);
+      break;
+    }
+    case Op_CMoveP: {
+      for (uint i = CMoveNode::IfFalse; i < n->req(); i++) {
+        Node* in = n->in(i);
+        if (in == NULL)
+          continue;  // ignore NULL
+        Node* uncast_in = in->uncast();
+        if (uncast_in->is_top() || uncast_in == n)
+          continue;  // ignore top or inputs which go back this node
+        PointsToNode* ptn = ptnode_adr(in->_idx);
+        assert(ptn != NULL, "node should be registered");
+        add_edge(n_ptn, ptn);
+      }
+      break;
+    }
+    case Op_LoadP:
+    case Op_LoadN:
+    case Op_LoadPLocked: {
+      // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
+      // ThreadLocal has RawPrt type.
+      const Type* t = _igvn->type(n);
+      if (t->make_ptr() != NULL) {
+        Node* adr = n->in(MemNode::Address);
+        add_local_var_and_edge(n, PointsToNode::NoEscape, adr, NULL);
+        break;
+      }
+      ELSE_FAIL("Op_LoadP");
+    }
+    case Op_Phi: {
+      // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
+      // ThreadLocal has RawPrt type.
+      const Type* t = n->as_Phi()->type();
+      if (t->make_ptr() != NULL) {
+        for (uint i = 1; i < n->req(); i++) {
+          Node* in = n->in(i);
+          if (in == NULL)
+            continue;  // ignore NULL
+          Node* uncast_in = in->uncast();
+          if (uncast_in->is_top() || uncast_in == n)
+            continue;  // ignore top or inputs which go back this node
+          PointsToNode* ptn = ptnode_adr(in->_idx);
+          assert(ptn != NULL, "node should be registered");
+          add_edge(n_ptn, ptn);
+        }
+        break;
+      }
+      ELSE_FAIL("Op_Phi");
+    }
+    case Op_Proj: {
+      // we are only interested in the oop result projection from a call
+      if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() &&
+          n->in(0)->as_Call()->returns_pointer()) {
+        add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0), NULL);
+        break;
+      }
+      ELSE_FAIL("Op_Proj");
+    }
+    case Op_Rethrow: // Exception object escapes
+    case Op_Return: {
+      if (n->req() > TypeFunc::Parms &&
+          _igvn->type(n->in(TypeFunc::Parms))->isa_oopptr()) {
+        // Treat Return value as LocalVar with GlobalEscape escape state.
+        add_local_var_and_edge(n, PointsToNode::GlobalEscape,
+                               n->in(TypeFunc::Parms), NULL);
+        break;
+      }
+      ELSE_FAIL("Op_Return");
+    }
+    case Op_StoreP:
+    case Op_StoreN:
+    case Op_StorePConditional:
+    case Op_CompareAndSwapP:
+    case Op_CompareAndSwapN: {
+      Node* adr = n->in(MemNode::Address);
+      const Type *adr_type = _igvn->type(adr);
+      adr_type = adr_type->make_ptr();
+      if (adr_type->isa_oopptr() ||
+          (opcode == Op_StoreP || opcode == Op_StoreN) &&
+                        (adr_type == TypeRawPtr::NOTNULL &&
+                         adr->in(AddPNode::Address)->is_Proj() &&
+                         adr->in(AddPNode::Address)->in(0)->is_Allocate())) {
+        // Point Address to Value
+        PointsToNode* adr_ptn = ptnode_adr(adr->_idx);
+        assert(adr_ptn != NULL &&
+               adr_ptn->as_Field()->is_oop(), "node should be registered");
+        Node *val = n->in(MemNode::ValueIn);
+        PointsToNode* ptn = ptnode_adr(val->_idx);
+        assert(ptn != NULL, "node should be registered");
+        add_edge(adr_ptn, ptn);
+        break;
+      } else if ((opcode == Op_StoreP) && (adr_type == TypeRawPtr::BOTTOM)) {
+        // Stored value escapes in unsafe access.
+        Node *val = n->in(MemNode::ValueIn);
+        PointsToNode* ptn = ptnode_adr(val->_idx);
+        assert(ptn != NULL, "node should be registered");
+        ptn->set_escape_state(PointsToNode::GlobalEscape);
+        // Add edge to object for unsafe access with offset.
+        PointsToNode* adr_ptn = ptnode_adr(adr->_idx);
+        assert(adr_ptn != NULL, "node should be registered");
+        if (adr_ptn->is_Field()) {
+          assert(adr_ptn->as_Field()->is_oop(), "should be oop field");
+          add_edge(adr_ptn, ptn);
+        }
+        break;
+      }
+      ELSE_FAIL("Op_StoreP");
+    }
+    case Op_AryEq:
+    case Op_StrComp:
+    case Op_StrEquals:
+    case Op_StrIndexOf: {
+      // char[] arrays passed to string intrinsic do not escape but
+      // they are not scalar replaceable. Adjust escape state for them.
+      // Start from in(2) edge since in(1) is memory edge.
+      for (uint i = 2; i < n->req(); i++) {
+        Node* adr = n->in(i);
+        const Type* at = _igvn->type(adr);
+        if (!adr->is_top() && at->isa_ptr()) {
+          assert(at == Type::TOP || at == TypePtr::NULL_PTR ||
+                 at->isa_ptr() != NULL, "expecting a pointer");
+          if (adr->is_AddP()) {
+            adr = get_addp_base(adr);
+          }
+          PointsToNode* ptn = ptnode_adr(adr->_idx);
+          assert(ptn != NULL, "node should be registered");
+          add_edge(n_ptn, ptn);
+        }
+      }
+      break;
+    }
+    default: {
+      // This method should be called only for EA specific nodes which may
+      // miss some edges when they were created.
+#ifdef ASSERT
+      n->dump(1);
+#endif
+      guarantee(false, "unknown node");
+    }
+  }
+  return;
+}
+
+void ConnectionGraph::add_call_node(CallNode* call) {
+  assert(call->returns_pointer(), "only for call which returns pointer");
+  uint call_idx = call->_idx;
+  if (call->is_Allocate()) {
+    Node* k = call->in(AllocateNode::KlassNode);
+    const TypeKlassPtr* kt = k->bottom_type()->isa_klassptr();
+    assert(kt != NULL, "TypeKlassPtr  required.");
+    ciKlass* cik = kt->klass();
+    PointsToNode::EscapeState es = PointsToNode::NoEscape;
+    bool scalar_replaceable = true;
+    if (call->is_AllocateArray()) {
+      if (!cik->is_array_klass()) { // StressReflectiveCode
+        es = PointsToNode::GlobalEscape;
+      } else {
+        int length = call->in(AllocateNode::ALength)->find_int_con(-1);
+        if (length < 0 || length > EliminateAllocationArraySizeLimit) {
+          // Not scalar replaceable if the length is not constant or too big.
+          scalar_replaceable = false;
+        }
+      }
+    } else {  // Allocate instance
+      if (cik->is_subclass_of(_compile->env()->Thread_klass()) ||
+         !cik->is_instance_klass() || // StressReflectiveCode
+          cik->as_instance_klass()->has_finalizer()) {
+        es = PointsToNode::GlobalEscape;
+      }
+    }
+    add_java_object(call, es);
+    PointsToNode* ptn = ptnode_adr(call_idx);
+    if (!scalar_replaceable && ptn->scalar_replaceable()) {
+      ptn->set_scalar_replaceable(false);
+    }
+  } else if (call->is_CallStaticJava()) {
+    // Call nodes could be different types:
+    //
+    // 1. CallDynamicJavaNode (what happened during call is unknown):
+    //
+    //    - mapped to GlobalEscape JavaObject node if oop is returned;
+    //
+    //    - all oop arguments are escaping globally;
+    //
+    // 2. CallStaticJavaNode (execute bytecode analysis if possible):
+    //
+    //    - the same as CallDynamicJavaNode if can't do bytecode analysis;
+    //
+    //    - mapped to GlobalEscape JavaObject node if unknown oop is returned;
+    //    - mapped to NoEscape JavaObject node if non-escaping object allocated
+    //      during call is returned;
+    //    - mapped to ArgEscape LocalVar node pointed to object arguments
+    //      which are returned and does not escape during call;
+    //
+    //    - oop arguments escaping status is defined by bytecode analysis;
+    //
+    // For a static call, we know exactly what method is being called.
+    // Use bytecode estimator to record whether the call's return value escapes.
+    ciMethod* meth = call->as_CallJava()->method();
+    if (meth == NULL) {
+      const char* name = call->as_CallStaticJava()->_name;
+      assert(strncmp(name, "_multianewarray", 15) == 0, "TODO: add failed case check");
+      // Returns a newly allocated unescaped object.
+      add_java_object(call, PointsToNode::NoEscape);
+      ptnode_adr(call_idx)->set_scalar_replaceable(false);
     } else {
-      f->set_has_unknown_ptr();
+      BCEscapeAnalyzer* call_analyzer = meth->get_bcea();
+      call_analyzer->copy_dependencies(_compile->dependencies());
+      if (call_analyzer->is_return_allocated()) {
+        // Returns a newly allocated unescaped object, simply
+        // update dependency information.
+        // Mark it as NoEscape so that objects referenced by
+        // it's fields will be marked as NoEscape at least.
+        add_java_object(call, PointsToNode::NoEscape);
+        ptnode_adr(call_idx)->set_scalar_replaceable(false);
+      } else {
+        // Determine whether any arguments are returned.
+        const TypeTuple* d = call->tf()->domain();
+        bool ret_arg = false;
+        for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+          if (d->field_at(i)->isa_ptr() != NULL &&
+              call_analyzer->is_arg_returned(i - TypeFunc::Parms)) {
+            ret_arg = true;
+            break;
+          }
+        }
+        if (ret_arg) {
+          add_local_var(call, PointsToNode::ArgEscape);
+        } else {
+          // Returns unknown object.
+          map_ideal_node(call, phantom_obj);
+        }
+      }
+    }
+  } else {
+    // An other type of call, assume the worst case:
+    // returned value is unknown and globally escapes.
+    assert(call->Opcode() == Op_CallDynamicJava, "add failed case check");
+    map_ideal_node(call, phantom_obj);
+  }
+}
+
+void ConnectionGraph::process_call_arguments(CallNode *call) {
+    bool is_arraycopy = false;
+    switch (call->Opcode()) {
+#ifdef ASSERT
+    case Op_Allocate:
+    case Op_AllocateArray:
+    case Op_Lock:
+    case Op_Unlock:
+      assert(false, "should be done already");
+      break;
+#endif
+    case Op_CallLeafNoFP:
+      is_arraycopy = (call->as_CallLeaf()->_name != NULL &&
+                      strstr(call->as_CallLeaf()->_name, "arraycopy") != 0);
+      // fall through
+    case Op_CallLeaf: {
+      // Stub calls, objects do not escape but they are not scale replaceable.
+      // Adjust escape state for outgoing arguments.
+      const TypeTuple * d = call->tf()->domain();
+      bool src_has_oops = false;
+      for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+        const Type* at = d->field_at(i);
+        Node *arg = call->in(i);
+        const Type *aat = _igvn->type(arg);
+        if (arg->is_top() || !at->isa_ptr() || !aat->isa_ptr())
+          continue;
+        if (arg->is_AddP()) {
+          //
+          // The inline_native_clone() case when the arraycopy stub is called
+          // after the allocation before Initialize and CheckCastPP nodes.
+          // Or normal arraycopy for object arrays case.
+          //
+          // Set AddP's base (Allocate) as not scalar replaceable since
+          // pointer to the base (with offset) is passed as argument.
+          //
+          arg = get_addp_base(arg);
+        }
+        PointsToNode* arg_ptn = ptnode_adr(arg->_idx);
+        assert(arg_ptn != NULL, "should be registered");
+        PointsToNode::EscapeState arg_esc = arg_ptn->escape_state();
+        if (is_arraycopy || arg_esc < PointsToNode::ArgEscape) {
+          assert(aat == Type::TOP || aat == TypePtr::NULL_PTR ||
+                 aat->isa_ptr() != NULL, "expecting an Ptr");
+          bool arg_has_oops = aat->isa_oopptr() &&
+                              (aat->isa_oopptr()->klass() == NULL || aat->isa_instptr() ||
+                               (aat->isa_aryptr() && aat->isa_aryptr()->klass()->is_obj_array_klass()));
+          if (i == TypeFunc::Parms) {
+            src_has_oops = arg_has_oops;
+          }
+          //
+          // src or dst could be j.l.Object when other is basic type array:
+          //
+          //   arraycopy(char[],0,Object*,0,size);
+          //   arraycopy(Object*,0,char[],0,size);
+          //
+          // Don't add edges in such cases.
+          //
+          bool arg_is_arraycopy_dest = src_has_oops && is_arraycopy &&
+                                       arg_has_oops && (i > TypeFunc::Parms);
+#ifdef ASSERT
+          if (!(is_arraycopy ||
+                call->as_CallLeaf()->_name != NULL &&
+                (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
+                 strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
+          ) {
+            call->dump();
+            assert(false, "EA: unexpected CallLeaf");
+          }
+#endif
+          // Always process arraycopy's destination object since
+          // we need to add all possible edges to references in
+          // source object.
+          if (arg_esc >= PointsToNode::ArgEscape &&
+              !arg_is_arraycopy_dest) {
+            continue;
+          }
+          set_escape_state(arg_ptn, PointsToNode::ArgEscape);
+          if (arg_is_arraycopy_dest) {
+            Node* src = call->in(TypeFunc::Parms);
+            if (src->is_AddP()) {
+              src = get_addp_base(src);
+            }
+            PointsToNode* src_ptn = ptnode_adr(src->_idx);
+            assert(src_ptn != NULL, "should be registered");
+            if (arg_ptn != src_ptn) {
+              // Special arraycopy edge:
+              // A destination object's field can't have the source object
+              // as base since objects escape states are not related.
+              // Only escape state of destination object's fields affects
+              // escape state of fields in source object.
+              add_arraycopy(call, PointsToNode::ArgEscape, src_ptn, arg_ptn);
+            }
+          }
+        }
+      }
+      break;
+    }
+    case Op_CallStaticJava: {
+      // For a static call, we know exactly what method is being called.
+      // Use bytecode estimator to record the call's escape affects
+#ifdef ASSERT
+      const char* name = call->as_CallStaticJava()->_name;
+      assert((name == NULL || strcmp(name, "uncommon_trap") != 0), "normal calls only");
+#endif
+      ciMethod* meth = call->as_CallJava()->method();
+      BCEscapeAnalyzer* call_analyzer = (meth !=NULL) ? meth->get_bcea() : NULL;
+      // fall-through if not a Java method or no analyzer information
+      if (call_analyzer != NULL) {
+        PointsToNode* call_ptn = ptnode_adr(call->_idx);
+        const TypeTuple* d = call->tf()->domain();
+        for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+          const Type* at = d->field_at(i);
+          int k = i - TypeFunc::Parms;
+          Node* arg = call->in(i);
+          PointsToNode* arg_ptn = ptnode_adr(arg->_idx);
+          if (at->isa_ptr() != NULL &&
+              call_analyzer->is_arg_returned(k)) {
+            // The call returns arguments.
+            if (call_ptn != NULL) { // Is call's result used?
+              assert(call_ptn->is_LocalVar(), "node should be registered");
+              assert(arg_ptn != NULL, "node should be registered");
+              add_edge(call_ptn, arg_ptn);
+            }
+          }
+          if (at->isa_oopptr() != NULL &&
+              arg_ptn->escape_state() < PointsToNode::GlobalEscape) {
+            if (!call_analyzer->is_arg_stack(k)) {
+              // The argument global escapes
+              set_escape_state(arg_ptn, PointsToNode::GlobalEscape);
+            } else {
+              set_escape_state(arg_ptn, PointsToNode::ArgEscape);
+              if (!call_analyzer->is_arg_local(k)) {
+                // The argument itself doesn't escape, but any fields might
+                set_fields_escape_state(arg_ptn, PointsToNode::GlobalEscape);
+              }
+            }
+          }
+        }
+        if (call_ptn != NULL && call_ptn->is_LocalVar()) {
+          // The call returns arguments.
+          assert(call_ptn->edge_count() > 0, "sanity");
+          if (!call_analyzer->is_return_local()) {
+            // Returns also unknown object.
+            add_edge(call_ptn, phantom_obj);
+          }
+        }
+        break;
+      }
+    }
+    default: {
+      // Fall-through here if not a Java method or no analyzer information
+      // or some other type of call, assume the worst case: all arguments
+      // globally escape.
+      const TypeTuple* d = call->tf()->domain();
+      for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+        const Type* at = d->field_at(i);
+        if (at->isa_oopptr() != NULL) {
+          Node* arg = call->in(i);
+          if (arg->is_AddP()) {
+            arg = get_addp_base(arg);
+          }
+          assert(ptnode_adr(arg->_idx) != NULL, "should be defined already");
+          set_escape_state(ptnode_adr(arg->_idx), PointsToNode::GlobalEscape);
+        }
+      }
     }
   }
-  add_edge(f, to_i, PointsToNode::PointsToEdge);
+}
+
+
+// Finish Graph construction.
+bool ConnectionGraph::complete_connection_graph(
+                         GrowableArray<PointsToNode*>&   ptnodes_worklist,
+                         GrowableArray<JavaObjectNode*>& non_escaped_worklist,
+                         GrowableArray<JavaObjectNode*>& java_objects_worklist,
+                         GrowableArray<FieldNode*>&      oop_fields_worklist) {
+  // Normally only 1-3 passes needed to build Connection Graph depending
+  // on graph complexity. Observed 8 passes in jvm2008 compiler.compiler.
+  // Set limit to 20 to catch situation when something did go wrong and
+  // bailout Escape Analysis.
+  // Also limit build time to 30 sec (60 in debug VM).
+#define CG_BUILD_ITER_LIMIT 20
+#ifdef ASSERT
+#define CG_BUILD_TIME_LIMIT 60.0
+#else
+#define CG_BUILD_TIME_LIMIT 30.0
+#endif
+
+  // Propagate GlobalEscape and ArgEscape escape states and check that
+  // we still have non-escaping objects. The method pushs on _worklist
+  // Field nodes which reference phantom_object.
+  if (!find_non_escaped_objects(ptnodes_worklist, non_escaped_worklist)) {
+    return false; // Nothing to do.
+  }
+  // Now propagate references to all JavaObject nodes.
+  int java_objects_length = java_objects_worklist.length();
+  elapsedTimer time;
+  int new_edges = 1;
+  int iterations = 0;
+  do {
+    while ((new_edges > 0) &&
+          (iterations++   < CG_BUILD_ITER_LIMIT) &&
+          (time.seconds() < CG_BUILD_TIME_LIMIT)) {
+      time.start();
+      new_edges = 0;
+      // Propagate references to phantom_object for nodes pushed on _worklist
+      // by find_non_escaped_objects() and find_field_value().
+      new_edges += add_java_object_edges(phantom_obj, false);
+      for (int next = 0; next < java_objects_length; ++next) {
+        JavaObjectNode* ptn = java_objects_worklist.at(next);
+        new_edges += add_java_object_edges(ptn, true);
+      }
+      if (new_edges > 0) {
+        // Update escape states on each iteration if graph was updated.
+        if (!find_non_escaped_objects(ptnodes_worklist, non_escaped_worklist)) {
+          return false; // Nothing to do.
+        }
+      }
+      time.stop();
+    }
+    if ((iterations     < CG_BUILD_ITER_LIMIT) &&
+        (time.seconds() < CG_BUILD_TIME_LIMIT)) {
+      time.start();
+      // Find fields which have unknown value.
+      int fields_length = oop_fields_worklist.length();
+      for (int next = 0; next < fields_length; next++) {
+        FieldNode* field = oop_fields_worklist.at(next);
+        if (field->edge_count() == 0) {
+          new_edges += find_field_value(field);
+          // This code may added new edges to phantom_object.
+          // Need an other cycle to propagate references to phantom_object.
+        }
+      }
+      time.stop();
+    } else {
+      new_edges = 0; // Bailout
+    }
+  } while (new_edges > 0);
+
+  // Bailout if passed limits.
+  if ((iterations     >= CG_BUILD_ITER_LIMIT) ||
+      (time.seconds() >= CG_BUILD_TIME_LIMIT)) {
+    Compile* C = _compile;
+    if (C->log() != NULL) {
+      C->log()->begin_elem("connectionGraph_bailout reason='reached ");
+      C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time");
+      C->log()->end_elem(" limit'");
+    }
+    assert(false, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
+           time.seconds(), iterations, nodes_size(), ptnodes_worklist.length()));
+    // Possible infinite build_connection_graph loop,
+    // bailout (no changes to ideal graph were made).
+    return false;
+  }
+#ifdef ASSERT
+  if (Verbose && PrintEscapeAnalysis) {
+    tty->print_cr("EA: %d iterations to build connection graph with %d nodes and worklist size %d",
+                  iterations, nodes_size(), ptnodes_worklist.length());
+  }
+#endif
+
+#undef CG_BUILD_ITER_LIMIT
+#undef CG_BUILD_TIME_LIMIT
+
+  // Find fields initialized by NULL for non-escaping Allocations.
+  int non_escaped_length = non_escaped_worklist.length();
+  for (int next = 0; next < non_escaped_length; next++) {
+    JavaObjectNode* ptn = non_escaped_worklist.at(next);
+    PointsToNode::EscapeState es = ptn->escape_state();
+    assert(es <= PointsToNode::ArgEscape, "sanity");
+    if (es == PointsToNode::NoEscape) {
+      if (find_init_values(ptn, null_obj, _igvn) > 0) {
+        // Adding references to NULL object does not change escape states
+        // since it does not escape. Also no fields are added to NULL object.
+        add_java_object_edges(null_obj, false);
+      }
+    }
+    Node* n = ptn->ideal_node();
+    if (n->is_Allocate()) {
+      // The object allocated by this Allocate node will never be
+      // seen by an other thread. Mark it so that when it is
+      // expanded no MemBarStoreStore is added.
+      InitializeNode* ini = n->as_Allocate()->initialization();
+      if (ini != NULL)
+        ini->set_does_not_escape();
+    }
+  }
+  return true; // Finished graph construction.
+}
+
+// Propagate GlobalEscape and ArgEscape escape states to all nodes
+// and check that we still have non-escaping java objects.
+bool ConnectionGraph::find_non_escaped_objects(GrowableArray<PointsToNode*>& ptnodes_worklist,
+                                               GrowableArray<JavaObjectNode*>& non_escaped_worklist) {
+  GrowableArray<PointsToNode*> escape_worklist;
+  // First, put all nodes with GlobalEscape and ArgEscape states on worklist.
+  int ptnodes_length = ptnodes_worklist.length();
+  for (int next = 0; next < ptnodes_length; ++next) {
+    PointsToNode* ptn = ptnodes_worklist.at(next);
+    if (ptn->escape_state() >= PointsToNode::ArgEscape ||
+        ptn->fields_escape_state() >= PointsToNode::ArgEscape) {
+      escape_worklist.push(ptn);
+    }
+  }
+  // Set escape states to referenced nodes (edges list).
+  while (escape_worklist.length() > 0) {
+    PointsToNode* ptn = escape_worklist.pop();
+    PointsToNode::EscapeState es  = ptn->escape_state();
+    PointsToNode::EscapeState field_es = ptn->fields_escape_state();
+    if (ptn->is_Field() && ptn->as_Field()->is_oop() &&
+        es >= PointsToNode::ArgEscape) {
+      // GlobalEscape or ArgEscape state of field means it has unknown value.
+      if (add_edge(ptn, phantom_obj)) {
+        // New edge was added
+        add_field_uses_to_worklist(ptn->as_Field());
+      }
+    }
+    for (EdgeIterator i(ptn); i.has_next(); i.next()) {
+      PointsToNode* e = i.get();
+      if (e->is_Arraycopy()) {
+        assert(ptn->arraycopy_dst(), "sanity");
+        // Propagate only fields escape state through arraycopy edge.
+        if (e->fields_escape_state() < field_es) {
+          set_fields_escape_state(e, field_es);
+          escape_worklist.push(e);
+        }
+      } else if (es >= field_es) {
+        // fields_escape_state is also set to 'es' if it is less than 'es'.
+        if (e->escape_state() < es) {
+          set_escape_state(e, es);
+          escape_worklist.push(e);
+        }
+      } else {
+        // Propagate field escape state.
+        bool es_changed = false;
+        if (e->fields_escape_state() < field_es) {
+          set_fields_escape_state(e, field_es);
+          es_changed = true;
+        }
+        if ((e->escape_state() < field_es) &&
+            e->is_Field() && ptn->is_JavaObject() &&
+            e->as_Field()->is_oop()) {
+          // Change escape state of referenced fileds.
+          set_escape_state(e, field_es);
+          es_changed = true;;
+        } else if (e->escape_state() < es) {
+          set_escape_state(e, es);
+          es_changed = true;;
+        }
+        if (es_changed) {
+          escape_worklist.push(e);
+        }
+      }
+    }
+  }
+  // Remove escaped objects from non_escaped list.
+  for (int next = non_escaped_worklist.length()-1; next >= 0 ; --next) {
+    JavaObjectNode* ptn = non_escaped_worklist.at(next);
+    if (ptn->escape_state() >= PointsToNode::GlobalEscape) {
+      non_escaped_worklist.delete_at(next);
+    }
+    if (ptn->escape_state() == PointsToNode::NoEscape) {
+      // Find fields in non-escaped allocations which have unknown value.
+      find_init_values(ptn, phantom_obj, NULL);
+    }
+  }
+  return (non_escaped_worklist.length() > 0);
+}
+
+// Add all references to JavaObject node by walking over all uses.
+int ConnectionGraph::add_java_object_edges(JavaObjectNode* jobj, bool populate_worklist) {
+  int new_edges = 0;
+  if (populate_worklist) {
+    // Populate _worklist by uses of jobj's uses.
+    for (UseIterator i(jobj); i.has_next(); i.next()) {
+      PointsToNode* use = i.get();
+      if (use->is_Arraycopy())
+        continue;
+      add_uses_to_worklist(use);
+      if (use->is_Field() && use->as_Field()->is_oop()) {
+        // Put on worklist all field's uses (loads) and
+        // related field nodes (same base and offset).
+        add_field_uses_to_worklist(use->as_Field());
+      }
+    }
+  }
+  while(_worklist.length() > 0) {
+    PointsToNode* use = _worklist.pop();
+    if (PointsToNode::is_base_use(use)) {
+      // Add reference from jobj to field and from field to jobj (field's base).
+      use = PointsToNode::get_use_node(use)->as_Field();
+      if (add_base(use->as_Field(), jobj)) {
+        new_edges++;
+      }
+      continue;
+    }
+    assert(!use->is_JavaObject(), "sanity");
+    if (use->is_Arraycopy()) {
+      if (jobj == null_obj) // NULL object does not have field edges
+        continue;
+      // Added edge from Arraycopy node to arraycopy's source java object
+      if (add_edge(use, jobj)) {
+        jobj->set_arraycopy_src();
+        new_edges++;
+      }
+      // and stop here.
+      continue;
+    }
+    if (!add_edge(use, jobj))
+      continue; // No new edge added, there was such edge already.
+    new_edges++;
+    if (use->is_LocalVar()) {
+      add_uses_to_worklist(use);
+      if (use->arraycopy_dst()) {
+        for (EdgeIterator i(use); i.has_next(); i.next()) {
+          PointsToNode* e = i.get();
+          if (e->is_Arraycopy()) {
+            if (jobj == null_obj) // NULL object does not have field edges
+              continue;
+            // Add edge from arraycopy's destination java object to Arraycopy node.
+            if (add_edge(jobj, e)) {
+              new_edges++;
+              jobj->set_arraycopy_dst();
+            }
+          }
+        }
+      }
+    } else {
+      // Added new edge to stored in field values.
+      // Put on worklist all field's uses (loads) and
+      // related field nodes (same base and offset).
+      add_field_uses_to_worklist(use->as_Field());
+    }
+  }
+  return new_edges;
+}
+
+// Put on worklist all related field nodes.
+void ConnectionGraph::add_field_uses_to_worklist(FieldNode* field) {
+  assert(field->is_oop(), "sanity");
+  int offset = field->offset();
+  add_uses_to_worklist(field);
+  // Loop over all bases of this field and push on worklist Field nodes
+  // with the same offset and base (since they may reference the same field).
+  for (BaseIterator i(field); i.has_next(); i.next()) {
+    PointsToNode* base = i.get();
+    add_fields_to_worklist(field, base);
+    // Check if the base was source object of arraycopy and go over arraycopy's
+    // destination objects since values stored to a field of source object are
+    // accessable by uses (loads) of fields of destination objects.
+    if (base->arraycopy_src()) {
+      for (UseIterator j(base); j.has_next(); j.next()) {
+        PointsToNode* arycp = j.get();
+        if (arycp->is_Arraycopy()) {
+          for (UseIterator k(arycp); k.has_next(); k.next()) {
+            PointsToNode* abase = k.get();
+            if (abase->arraycopy_dst() && abase != base) {
+              // Look for the same arracopy reference.
+              add_fields_to_worklist(field, abase);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+// Put on worklist all related field nodes.
+void ConnectionGraph::add_fields_to_worklist(FieldNode* field, PointsToNode* base) {
+  int offset = field->offset();
+  if (base->is_LocalVar()) {
+    for (UseIterator j(base); j.has_next(); j.next()) {
+      PointsToNode* f = j.get();
+      if (PointsToNode::is_base_use(f)) { // Field
+        f = PointsToNode::get_use_node(f);
+        if (f == field || !f->as_Field()->is_oop())
+          continue;
+        int offs = f->as_Field()->offset();
+        if (offs == offset || offset == Type::OffsetBot || offs == Type::OffsetBot) {
+          add_to_worklist(f);
+        }
+      }
+    }
+  } else {
+    assert(base->is_JavaObject(), "sanity");
+    if (// Skip phantom_object since it is only used to indicate that
+        // this field's content globally escapes.
+        (base != phantom_obj) &&
+        // NULL object node does not have fields.
+        (base != null_obj)) {
+      for (EdgeIterator i(base); i.has_next(); i.next()) {
+        PointsToNode* f = i.get();
+        // Skip arraycopy edge since store to destination object field
+        // does not update value in source object field.
+        if (f->is_Arraycopy()) {
+          assert(base->arraycopy_dst(), "sanity");
+          continue;
+        }
+        if (f == field || !f->as_Field()->is_oop())
+          continue;
+        int offs = f->as_Field()->offset();
+        if (offs == offset || offset == Type::OffsetBot || offs == Type::OffsetBot) {
+          add_to_worklist(f);
+        }
+      }
+    }
+  }
+}
+
+// Find fields which have unknown value.
+int ConnectionGraph::find_field_value(FieldNode* field) {
+  // Escaped fields should have init value already.
+  assert(field->escape_state() == PointsToNode::NoEscape, "sanity");
+  int new_edges = 0;
+  for (BaseIterator i(field); i.has_next(); i.next()) {
+    PointsToNode* base = i.get();
+    if (base->is_JavaObject()) {
+      // Skip Allocate's fields which will be processed later.
+      if (base->ideal_node()->is_Allocate())
+        return 0;
+      assert(base == null_obj, "only NULL ptr base expected here");
+    }
+  }
+  if (add_edge(field, phantom_obj)) {
+    // New edge was added
+    new_edges++;
+    add_field_uses_to_worklist(field);
+  }
+  return new_edges;
+}
+
+// Find fields initializing values for allocations.
+int ConnectionGraph::find_init_values(JavaObjectNode* pta, PointsToNode* init_val, PhaseTransform* phase) {
+  assert(pta->escape_state() == PointsToNode::NoEscape, "Not escaped Allocate nodes only");
+  int new_edges = 0;
+  Node* alloc = pta->ideal_node();
+  if (init_val == phantom_obj) {
+    // Do nothing for Allocate nodes since its fields values are "known".
+    if (alloc->is_Allocate())
+      return 0;
+    assert(alloc->as_CallStaticJava(), "sanity");
+#ifdef ASSERT
+    if (alloc->as_CallStaticJava()->method() == NULL) {
+      const char* name = alloc->as_CallStaticJava()->_name;
+      assert(strncmp(name, "_multianewarray", 15) == 0, "sanity");
+    }
+#endif
+    // Non-escaped allocation returned from Java or runtime call have
+    // unknown values in fields.
+    for (EdgeIterator i(pta); i.has_next(); i.next()) {
+      PointsToNode* ptn = i.get();
+      if (ptn->is_Field() && ptn->as_Field()->is_oop()) {
+        if (add_edge(ptn, phantom_obj)) {
+          // New edge was added
+          new_edges++;
+          add_field_uses_to_worklist(ptn->as_Field());
+        }
+      }
+    }
+    return new_edges;
+  }
+  assert(init_val == null_obj, "sanity");
+  // Do nothing for Call nodes since its fields values are unknown.
+  if (!alloc->is_Allocate())
+    return 0;
+
+  InitializeNode* ini = alloc->as_Allocate()->initialization();
+  Compile* C = _compile;
+  bool visited_bottom_offset = false;
+  GrowableArray<int> offsets_worklist;
+
+  // Check if an oop field's initializing value is recorded and add
+  // a corresponding NULL if field's value if it is not recorded.
+  // Connection Graph does not record a default initialization by NULL
+  // captured by Initialize node.
+  //
+  for (EdgeIterator i(pta); i.has_next(); i.next()) {
+    PointsToNode* ptn = i.get(); // Field (AddP)
+    if (!ptn->is_Field() || !ptn->as_Field()->is_oop())
+      continue; // Not oop field
+    int offset = ptn->as_Field()->offset();
+    if (offset == Type::OffsetBot) {
+      if (!visited_bottom_offset) {
+        // OffsetBot is used to reference array's element,
+        // always add reference to NULL to all Field nodes since we don't
+        // known which element is referenced.
+        if (add_edge(ptn, null_obj)) {
+          // New edge was added
+          new_edges++;
+          add_field_uses_to_worklist(ptn->as_Field());
+          visited_bottom_offset = true;
+        }
+      }
+    } else {
+      // Check only oop fields.
+      const Type* adr_type = ptn->ideal_node()->as_AddP()->bottom_type();
+      if (adr_type->isa_rawptr()) {
+#ifdef ASSERT
+        // Raw pointers are used for initializing stores so skip it
+        // since it should be recorded already
+        Node* base = get_addp_base(ptn->ideal_node());
+        assert(adr_type->isa_rawptr() && base->is_Proj() &&
+               (base->in(0) == alloc),"unexpected pointer type");
+#endif
+        continue;
+      }
+      if (!offsets_worklist.contains(offset)) {
+        offsets_worklist.append(offset);
+        Node* value = NULL;
+        if (ini != NULL) {
+          BasicType ft = UseCompressedOops ? T_NARROWOOP : T_OBJECT;
+          Node* store = ini->find_captured_store(offset, type2aelembytes(ft), phase);
+          if (store != NULL && store->is_Store()) {
+            value = store->in(MemNode::ValueIn);
+          } else {
+            // There could be initializing stores which follow allocation.
+            // For example, a volatile field store is not collected
+            // by Initialize node.
+            //
+            // Need to check for dependent loads to separate such stores from
+            // stores which follow loads. For now, add initial value NULL so
+            // that compare pointers optimization works correctly.
+          }
+        }
+        if (value == NULL) {
+          // A field's initializing value was not recorded. Add NULL.
+          if (add_edge(ptn, null_obj)) {
+            // New edge was added
+            new_edges++;
+            add_field_uses_to_worklist(ptn->as_Field());
+          }
+        }
+      }
+    }
+  }
+  return new_edges;
 }
 
-void ConnectionGraph::add_deferred_edge(uint from_i, uint to_i) {
-  PointsToNode *f = ptnode_adr(from_i);
-  PointsToNode *t = ptnode_adr(to_i);
+// Adjust scalar_replaceable state after Connection Graph is built.
+void ConnectionGraph::adjust_scalar_replaceable_state(JavaObjectNode* jobj) {
+  // Search for non-escaping objects which are not scalar replaceable
+  // and mark them to propagate the state to referenced objects.
+
+  // 1. An object is not scalar replaceable if the field into which it is
+  // stored has unknown offset (stored into unknown element of an array).
+  //
+  for (UseIterator i(jobj); i.has_next(); i.next()) {
+    PointsToNode* use = i.get();
+    assert(!use->is_Arraycopy(), "sanity");
+    if (use->is_Field()) {
+      FieldNode* field = use->as_Field();
+      assert(field->is_oop() && field->scalar_replaceable() &&
+             field->fields_escape_state() == PointsToNode::NoEscape, "sanity");
+      if (field->offset() == Type::OffsetBot) {
+        jobj->set_scalar_replaceable(false);
+        return;
+      }
+    }
+    assert(use->is_Field() || use->is_LocalVar(), "sanity");
+    // 2. An object is not scalar replaceable if it is merged with other objects.
+    for (EdgeIterator j(use); j.has_next(); j.next()) {
+      PointsToNode* ptn = j.get();
+      if (ptn->is_JavaObject() && ptn != jobj) {
+        // Mark all objects.
+        jobj->set_scalar_replaceable(false);
+         ptn->set_scalar_replaceable(false);
+      }
+    }
+    if (!jobj->scalar_replaceable()) {
+      return;
+    }
+  }
+
+  for (EdgeIterator j(jobj); j.has_next(); j.next()) {
+    // Non-escaping object node should point only to field nodes.
+    FieldNode* field = j.get()->as_Field();
+    int offset = field->as_Field()->offset();
+
+    // 3. An object is not scalar replaceable if it has a field with unknown
+    // offset (array's element is accessed in loop).
+    if (offset == Type::OffsetBot) {
+      jobj->set_scalar_replaceable(false);
+      return;
+    }
+    // 4. Currently an object is not scalar replaceable if a LoadStore node
+    // access its field since the field value is unknown after it.
+    //
+    Node* n = field->ideal_node();
+    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+      if (n->fast_out(i)->is_LoadStore()) {
+        jobj->set_scalar_replaceable(false);
+        return;
+      }
+    }
+
+    // 5. Or the address may point to more then one object. This may produce
+    // the false positive result (set not scalar replaceable)
+    // since the flow-insensitive escape analysis can't separate
+    // the case when stores overwrite the field's value from the case
+    // when stores happened on different control branches.
+    //
+    // Note: it will disable scalar replacement in some cases:
+    //
+    //    Point p[] = new Point[1];
+    //    p[0] = new Point(); // Will be not scalar replaced
+    //
+    // but it will save us from incorrect optimizations in next cases:
+    //
+    //    Point p[] = new Point[1];
+    //    if ( x ) p[0] = new Point(); // Will be not scalar replaced
+    //
+    if (field->base_count() > 1) {
+      for (BaseIterator i(field); i.has_next(); i.next()) {
+        PointsToNode* base = i.get();
+        // Don't take into account LocalVar nodes which
+        // may point to only one object which should be also
+        // this field's base by now.
+        if (base->is_JavaObject() && base != jobj) {
+          // Mark all bases.
+          jobj->set_scalar_replaceable(false);
+          base->set_scalar_replaceable(false);
+        }
+      }
+    }
+  }
+}
+
+#ifdef ASSERT
+void ConnectionGraph::verify_connection_graph(
+                         GrowableArray<PointsToNode*>&   ptnodes_worklist,
+                         GrowableArray<JavaObjectNode*>& non_escaped_worklist,
+                         GrowableArray<JavaObjectNode*>& java_objects_worklist,
+                         GrowableArray<Node*>& addp_worklist) {
+  // Verify that graph is complete - no new edges could be added.
+  int java_objects_length = java_objects_worklist.length();
+  int non_escaped_length  = non_escaped_worklist.length();
+  int new_edges = 0;
+  for (int next = 0; next < java_objects_length; ++next) {
+    JavaObjectNode* ptn = java_objects_worklist.at(next);
+    new_edges += add_java_object_edges(ptn, true);
+  }
+  assert(new_edges == 0, "graph was not complete");
+  // Verify that escape state is final.
+  int length = non_escaped_worklist.length();
+  find_non_escaped_objects(ptnodes_worklist, non_escaped_worklist);
+  assert((non_escaped_length == non_escaped_worklist.length()) &&
+         (non_escaped_length == length) &&
+         (_worklist.length() == 0), "escape state was not final");
+
+  // Verify fields information.
+  int addp_length = addp_worklist.length();
+  for (int next = 0; next < addp_length; ++next ) {
+    Node* n = addp_worklist.at(next);
+    FieldNode* field = ptnode_adr(n->_idx)->as_Field();
+    if (field->is_oop()) {
+      // Verify that field has all bases
+      Node* base = get_addp_base(n);
+      PointsToNode* ptn = ptnode_adr(base->_idx);
+      if (ptn->is_JavaObject()) {
+        assert(field->has_base(ptn->as_JavaObject()), "sanity");
+      } else {
+        assert(ptn->is_LocalVar(), "sanity");
+        for (EdgeIterator i(ptn); i.has_next(); i.next()) {
+          PointsToNode* e = i.get();
+          if (e->is_JavaObject()) {
+            assert(field->has_base(e->as_JavaObject()), "sanity");
+          }
+        }
+      }
+      // Verify that all fields have initializing values.
+      if (field->edge_count() == 0) {
+        field->dump();
+        assert(field->edge_count() > 0, "sanity");
+      }
+    }
+  }
+}
+#endif
+
+// Optimize ideal graph.
+void ConnectionGraph::optimize_ideal_graph(GrowableArray<Node*>& ptr_cmp_worklist,
+                                           GrowableArray<Node*>& storestore_worklist) {
+  Compile* C = _compile;
+  PhaseIterGVN* igvn = _igvn;
+  if (EliminateLocks) {
+    // Mark locks before changing ideal graph.
+    int cnt = C->macro_count();
+    for( int i=0; i < cnt; i++ ) {
+      Node *n = C->macro_node(i);
+      if (n->is_AbstractLock()) { // Lock and Unlock nodes
+        AbstractLockNode* alock = n->as_AbstractLock();
+        if (!alock->is_non_esc_obj()) {
+          if (not_global_escape(alock->obj_node())) {
+            assert(!alock->is_eliminated() || alock->is_coarsened(), "sanity");
+            // The lock could be marked eliminated by lock coarsening
+            // code during first IGVN before EA. Replace coarsened flag
+            // to eliminate all associated locks/unlocks.
+            alock->set_non_esc_obj();
+          }
+        }
+      }
+    }
+  }
+
+  if (OptimizePtrCompare) {
+    // Add ConI(#CC_GT) and ConI(#CC_EQ).
+    _pcmp_neq = igvn->makecon(TypeInt::CC_GT);
+    _pcmp_eq = igvn->makecon(TypeInt::CC_EQ);
+    // Optimize objects compare.
+    while (ptr_cmp_worklist.length() != 0) {
+      Node *n = ptr_cmp_worklist.pop();
+      Node *res = optimize_ptr_compare(n);
+      if (res != NULL) {
+#ifndef PRODUCT
+        if (PrintOptimizePtrCompare) {
+          tty->print_cr("++++ Replaced: %d %s(%d,%d) --> %s", n->_idx, (n->Opcode() == Op_CmpP ? "CmpP" : "CmpN"), n->in(1)->_idx, n->in(2)->_idx, (res == _pcmp_eq ? "EQ" : "NotEQ"));
+          if (Verbose) {
+            n->dump(1);
+          }
+        }
+#endif
+        igvn->replace_node(n, res);
+      }
+    }
+    // cleanup
+    if (_pcmp_neq->outcnt() == 0)
+      igvn->hash_delete(_pcmp_neq);
+    if (_pcmp_eq->outcnt()  == 0)
+      igvn->hash_delete(_pcmp_eq);
+  }
+
+  // For MemBarStoreStore nodes added in library_call.cpp, check
+  // escape status of associated AllocateNode and optimize out
+  // MemBarStoreStore node if the allocated object never escapes.
+  while (storestore_worklist.length() != 0) {
+    Node *n = storestore_worklist.pop();
+    MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore();
+    Node *alloc = storestore->in(MemBarNode::Precedent)->in(0);
+    assert (alloc->is_Allocate(), "storestore should point to AllocateNode");
+    if (not_global_escape(alloc)) {
+      MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot);
+      mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory));
+      mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control));
+      igvn->register_new_node_with_optimizer(mb);
+      igvn->replace_node(storestore, mb);
+    }
+  }
+}
+
+// Optimize objects compare.
+Node* ConnectionGraph::optimize_ptr_compare(Node* n) {
+  assert(OptimizePtrCompare, "sanity");
+  PointsToNode* ptn1 = ptnode_adr(n->in(1)->_idx);
+  PointsToNode* ptn2 = ptnode_adr(n->in(2)->_idx);
+  JavaObjectNode* jobj1 = unique_java_object(n->in(1));
+  JavaObjectNode* jobj2 = unique_java_object(n->in(2));
+  assert(ptn1->is_JavaObject() || ptn1->is_LocalVar(), "sanity");
+  assert(ptn2->is_JavaObject() || ptn2->is_LocalVar(), "sanity");
+
+  // Check simple cases first.
+  if (jobj1 != NULL) {
+    if (jobj1->escape_state() == PointsToNode::NoEscape) {
+      if (jobj1 == jobj2) {
+        // Comparing the same not escaping object.
+        return _pcmp_eq;
+      }
+      Node* obj = jobj1->ideal_node();
+      // Comparing not escaping allocation.
+      if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
+          !ptn2->points_to(jobj1)) {
+        return _pcmp_neq; // This includes nullness check.
+      }
+    }
+  }
+  if (jobj2 != NULL) {
+    if (jobj2->escape_state() == PointsToNode::NoEscape) {
+      Node* obj = jobj2->ideal_node();
+      // Comparing not escaping allocation.
+      if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
+          !ptn1->points_to(jobj2)) {
+        return _pcmp_neq; // This includes nullness check.
+      }
+    }
+  }
+  if (jobj1 != NULL && jobj1 != phantom_obj &&
+      jobj2 != NULL && jobj2 != phantom_obj &&
+      jobj1->ideal_node()->is_Con() &&
+      jobj2->ideal_node()->is_Con()) {
+    // Klass or String constants compare. Need to be careful with
+    // compressed pointers - compare types of ConN and ConP instead of nodes.
+    const Type* t1 = jobj1->ideal_node()->bottom_type()->make_ptr();
+    const Type* t2 = jobj2->ideal_node()->bottom_type()->make_ptr();
+    assert(t1 != NULL && t2 != NULL, "sanity");
+    if (t1->make_ptr() == t2->make_ptr()) {
+      return _pcmp_eq;
+    } else {
+      return _pcmp_neq;
+    }
+  }
+  if (ptn1->meet(ptn2)) {
+    return NULL; // Sets are not disjoint
+  }
 
-  assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
-  assert(f->node_type() == PointsToNode::LocalVar || f->node_type() == PointsToNode::Field, "invalid source of Deferred edge");
-  assert(t->node_type() == PointsToNode::LocalVar || t->node_type() == PointsToNode::Field, "invalid destination of Deferred edge");
-  // don't add a self-referential edge, this can occur during removal of
-  // deferred edges
-  if (from_i != to_i)
-    add_edge(f, to_i, PointsToNode::DeferredEdge);
+  // Sets are disjoint.
+  bool set1_has_unknown_ptr = ptn1->points_to(phantom_obj);
+  bool set2_has_unknown_ptr = ptn2->points_to(phantom_obj);
+  bool set1_has_null_ptr    = ptn1->points_to(null_obj);
+  bool set2_has_null_ptr    = ptn2->points_to(null_obj);
+  if (set1_has_unknown_ptr && set2_has_null_ptr ||
+      set2_has_unknown_ptr && set1_has_null_ptr) {
+    // Check nullness of unknown object.
+    return NULL;
+  }
+
+  // Disjointness by itself is not sufficient since
+  // alias analysis is not complete for escaped objects.
+  // Disjoint sets are definitely unrelated only when
+  // at least one set has only not escaping allocations.
+  if (!set1_has_unknown_ptr && !set1_has_null_ptr) {
+    if (ptn1->non_escaping_allocation()) {
+      return _pcmp_neq;
+    }
+  }
+  if (!set2_has_unknown_ptr && !set2_has_null_ptr) {
+    if (ptn2->non_escaping_allocation()) {
+      return _pcmp_neq;
+    }
+  }
+  return NULL;
+}
+
+// Connection Graph constuction functions.
+
+void ConnectionGraph::add_local_var(Node *n, PointsToNode::EscapeState es) {
+  PointsToNode* ptadr = _nodes.at(n->_idx);
+  if (ptadr != NULL) {
+    assert(ptadr->is_LocalVar() && ptadr->ideal_node() == n, "sanity");
+    return;
+  }
+  Compile* C = _compile;
+  ptadr = new (C->comp_arena()) LocalVarNode(C, n, es);
+  _nodes.at_put(n->_idx, ptadr);
+}
+
+void ConnectionGraph::add_java_object(Node *n, PointsToNode::EscapeState es) {
+  PointsToNode* ptadr = _nodes.at(n->_idx);
+  if (ptadr != NULL) {
+    assert(ptadr->is_JavaObject() && ptadr->ideal_node() == n, "sanity");
+    return;
+  }
+  Compile* C = _compile;
+  ptadr = new (C->comp_arena()) JavaObjectNode(C, n, es);
+  _nodes.at_put(n->_idx, ptadr);
+}
+
+void ConnectionGraph::add_field(Node *n, PointsToNode::EscapeState es, int offset) {
+  PointsToNode* ptadr = _nodes.at(n->_idx);
+  if (ptadr != NULL) {
+    assert(ptadr->is_Field() && ptadr->ideal_node() == n, "sanity");
+    return;
+  }
+  bool unsafe = false;
+  bool is_oop = is_oop_field(n, offset, &unsafe);
+  if (unsafe) {
+    es = PointsToNode::GlobalEscape;
+  }
+  Compile* C = _compile;
+  FieldNode* field = new (C->comp_arena()) FieldNode(C, n, es, offset, is_oop);
+  _nodes.at_put(n->_idx, field);
+}
+
+void ConnectionGraph::add_arraycopy(Node *n, PointsToNode::EscapeState es,
+                                    PointsToNode* src, PointsToNode* dst) {
+  assert(!src->is_Field() && !dst->is_Field(), "only for JavaObject and LocalVar");
+  assert((src != null_obj) && (dst != null_obj), "not for ConP NULL");
+  PointsToNode* ptadr = _nodes.at(n->_idx);
+  if (ptadr != NULL) {
+    assert(ptadr->is_Arraycopy() && ptadr->ideal_node() == n, "sanity");
+    return;
+  }
+  Compile* C = _compile;
+  ptadr = new (C->comp_arena()) ArraycopyNode(C, n, es);
+  _nodes.at_put(n->_idx, ptadr);
+  // Add edge from arraycopy node to source object.
+  (void)add_edge(ptadr, src);
+  src->set_arraycopy_src();
+  // Add edge from destination object to arraycopy node.
+  (void)add_edge(dst, ptadr);
+  dst->set_arraycopy_dst();
 }
 
+bool ConnectionGraph::is_oop_field(Node* n, int offset, bool* unsafe) {
+  const Type* adr_type = n->as_AddP()->bottom_type();
+  BasicType bt = T_INT;
+  if (offset == Type::OffsetBot) {
+    // Check only oop fields.
+    if (!adr_type->isa_aryptr() ||
+        (adr_type->isa_aryptr()->klass() == NULL) ||
+         adr_type->isa_aryptr()->klass()->is_obj_array_klass()) {
+      // OffsetBot is used to reference array's element. Ignore first AddP.
+      if (find_second_addp(n, n->in(AddPNode::Base)) == NULL) {
+        bt = T_OBJECT;
+      }
+    }
+  } else if (offset != oopDesc::klass_offset_in_bytes()) {
+    if (adr_type->isa_instptr()) {
+      ciField* field = _compile->alias_type(adr_type->isa_instptr())->field();
+      if (field != NULL) {
+        bt = field->layout_type();
+      } else {
+        // Check for unsafe oop field access
+        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+          int opcode = n->fast_out(i)->Opcode();
+          if (opcode == Op_StoreP || opcode == Op_LoadP ||
+              opcode == Op_StoreN || opcode == Op_LoadN) {
+            bt = T_OBJECT;
+            (*unsafe) = true;
+            break;
+          }
+        }
+      }
+    } else if (adr_type->isa_aryptr()) {
+      if (offset == arrayOopDesc::length_offset_in_bytes()) {
+        // Ignore array length load.
+      } else if (find_second_addp(n, n->in(AddPNode::Base)) != NULL) {
+        // Ignore first AddP.
+      } else {
+        const Type* elemtype = adr_type->isa_aryptr()->elem();
+        bt = elemtype->array_element_basic_type();
+      }
+    } else if (adr_type->isa_rawptr() || adr_type->isa_klassptr()) {
+      // Allocation initialization, ThreadLocal field access, unsafe access
+      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+        int opcode = n->fast_out(i)->Opcode();
+        if (opcode == Op_StoreP || opcode == Op_LoadP ||
+            opcode == Op_StoreN || opcode == Op_LoadN) {
+          bt = T_OBJECT;
+          break;
+        }
+      }
+    }
+  }
+  return (bt == T_OBJECT || bt == T_NARROWOOP || bt == T_ARRAY);
+}
+
+// Returns unique pointed java object or NULL.
+JavaObjectNode* ConnectionGraph::unique_java_object(Node *n) {
+  assert(!_collecting, "should not call when contructed graph");
+  // If the node was created after the escape computation we can't answer.
+  uint idx = n->_idx;
+  if (idx >= nodes_size()) {
+    return NULL;
+  }
+  PointsToNode* ptn = ptnode_adr(idx);
+  if (ptn->is_JavaObject()) {
+    return ptn->as_JavaObject();
+  }
+  assert(ptn->is_LocalVar(), "sanity");
+  // Check all java objects it points to.
+  JavaObjectNode* jobj = NULL;
+  for (EdgeIterator i(ptn); i.has_next(); i.next()) {
+    PointsToNode* e = i.get();
+    if (e->is_JavaObject()) {
+      if (jobj == NULL) {
+        jobj = e->as_JavaObject();
+      } else if (jobj != e) {
+        return NULL;
+      }
+    }
+  }
+  return jobj;
+}
+
+// Return true if this node points only to non-escaping allocations.
+bool PointsToNode::non_escaping_allocation() {
+  if (is_JavaObject()) {
+    Node* n = ideal_node();
+    if (n->is_Allocate() || n->is_CallStaticJava()) {
+      return (escape_state() == PointsToNode::NoEscape);
+    } else {
+      return false;
+    }
+  }
+  assert(is_LocalVar(), "sanity");
+  // Check all java objects it points to.
+  for (EdgeIterator i(this); i.has_next(); i.next()) {
+    PointsToNode* e = i.get();
+    if (e->is_JavaObject()) {
+      Node* n = e->ideal_node();
+      if ((e->escape_state() != PointsToNode::NoEscape) ||
+          !(n->is_Allocate() || n->is_CallStaticJava())) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+// Return true if we know the node does not escape globally.
+bool ConnectionGraph::not_global_escape(Node *n) {
+  assert(!_collecting, "should not call during graph construction");
+  // If the node was created after the escape computation we can't answer.
+  uint idx = n->_idx;
+  if (idx >= nodes_size()) {
+    return false;
+  }
+  PointsToNode* ptn = ptnode_adr(idx);
+  PointsToNode::EscapeState es = ptn->escape_state();
+  // If we have already computed a value, return it.
+  if (es >= PointsToNode::GlobalEscape)
+    return false;
+  if (ptn->is_JavaObject()) {
+    return true; // (es < PointsToNode::GlobalEscape);
+  }
+  assert(ptn->is_LocalVar(), "sanity");
+  // Check all java objects it points to.
+  for (EdgeIterator i(ptn); i.has_next(); i.next()) {
+    if (i.get()->escape_state() >= PointsToNode::GlobalEscape)
+      return false;
+  }
+  return true;
+}
+
+
+// Helper functions
+
+// Return true if this node points to specified node or nodes it points to.
+bool PointsToNode::points_to(JavaObjectNode* ptn) const {
+  if (is_JavaObject()) {
+    return (this == ptn);
+  }
+  assert(is_LocalVar(), "sanity");
+  for (EdgeIterator i(this); i.has_next(); i.next()) {
+    if (i.get() == ptn)
+      return true;
+  }
+  return false;
+}
+
+// Return true if one node points to an other.
+bool PointsToNode::meet(PointsToNode* ptn) {
+  if (this == ptn) {
+    return true;
+  } else if (ptn->is_JavaObject()) {
+    return this->points_to(ptn->as_JavaObject());
+  } else if (this->is_JavaObject()) {
+    return ptn->points_to(this->as_JavaObject());
+  }
+  assert(this->is_LocalVar() && ptn->is_LocalVar(), "sanity");
+  int ptn_count =  ptn->edge_count();
+  for (EdgeIterator i(this); i.has_next(); i.next()) {
+    PointsToNode* this_e = i.get();
+    for (int j = 0; j < ptn_count; j++) {
+      if (this_e == ptn->edge(j))
+        return true;
+    }
+  }
+  return false;
+}
+
+#ifdef ASSERT
+// Return true if bases point to this java object.
+bool FieldNode::has_base(JavaObjectNode* jobj) const {
+  for (BaseIterator i(this); i.has_next(); i.next()) {
+    if (i.get() == jobj)
+      return true;
+  }
+  return false;
+}
+#endif
+
 int ConnectionGraph::address_offset(Node* adr, PhaseTransform *phase) {
   const Type *adr_type = phase->type(adr);
   if (adr->is_AddP() && adr_type->isa_oopptr() == NULL &&
@@ -171,286 +1996,7 @@
   return t_ptr->offset();
 }
 
-void ConnectionGraph::add_field_edge(uint from_i, uint to_i, int offset) {
-  // Don't add fields to NULL pointer.
-  if (is_null_ptr(from_i))
-    return;
-  PointsToNode *f = ptnode_adr(from_i);
-  PointsToNode *t = ptnode_adr(to_i);
-
-  assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
-  assert(f->node_type() == PointsToNode::JavaObject, "invalid destination of Field edge");
-  assert(t->node_type() == PointsToNode::Field, "invalid destination of Field edge");
-  assert (t->offset() == -1 || t->offset() == offset, "conflicting field offsets");
-  t->set_offset(offset);
-
-  add_edge(f, to_i, PointsToNode::FieldEdge);
-}
-
-void ConnectionGraph::set_escape_state(uint ni, PointsToNode::EscapeState es) {
-  // Don't change non-escaping state of NULL pointer.
-  if (is_null_ptr(ni))
-    return;
-  PointsToNode *npt = ptnode_adr(ni);
-  PointsToNode::EscapeState old_es = npt->escape_state();
-  if (es > old_es)
-    npt->set_escape_state(es);
-}
-
-void ConnectionGraph::add_node(Node *n, PointsToNode::NodeType nt,
-                               PointsToNode::EscapeState es, bool done) {
-  PointsToNode* ptadr = ptnode_adr(n->_idx);
-  ptadr->_node = n;
-  ptadr->set_node_type(nt);
-
-  // inline set_escape_state(idx, es);
-  PointsToNode::EscapeState old_es = ptadr->escape_state();
-  if (es > old_es)
-    ptadr->set_escape_state(es);
-
-  if (done)
-    _processed.set(n->_idx);
-}
-
-PointsToNode::EscapeState ConnectionGraph::escape_state(Node *n) {
-  uint idx = n->_idx;
-  PointsToNode::EscapeState es;
-
-  // If we are still collecting or there were no non-escaping allocations
-  // we don't know the answer yet
-  if (_collecting)
-    return PointsToNode::UnknownEscape;
-
-  // if the node was created after the escape computation, return
-  // UnknownEscape
-  if (idx >= nodes_size())
-    return PointsToNode::UnknownEscape;
-
-  es = ptnode_adr(idx)->escape_state();
-
-  // if we have already computed a value, return it
-  if (es != PointsToNode::UnknownEscape &&
-      ptnode_adr(idx)->node_type() == PointsToNode::JavaObject)
-    return es;
-
-  // PointsTo() calls n->uncast() which can return a new ideal node.
-  if (n->uncast()->_idx >= nodes_size())
-    return PointsToNode::UnknownEscape;
-
-  PointsToNode::EscapeState orig_es = es;
-
-  // compute max escape state of anything this node could point to
-  for(VectorSetI i(PointsTo(n)); i.test() && es != PointsToNode::GlobalEscape; ++i) {
-    uint pt = i.elem;
-    PointsToNode::EscapeState pes = ptnode_adr(pt)->escape_state();
-    if (pes > es)
-      es = pes;
-  }
-  if (orig_es != es) {
-    // cache the computed escape state
-    assert(es > orig_es, "should have computed an escape state");
-    set_escape_state(idx, es);
-  } // orig_es could be PointsToNode::UnknownEscape
-  return es;
-}
-
-VectorSet* ConnectionGraph::PointsTo(Node * n) {
-  pt_ptset.Reset();
-  pt_visited.Reset();
-  pt_worklist.clear();
-
-#ifdef ASSERT
-  Node *orig_n = n;
-#endif
-
-  n = n->uncast();
-  PointsToNode* npt = ptnode_adr(n->_idx);
-
-  // If we have a JavaObject, return just that object
-  if (npt->node_type() == PointsToNode::JavaObject) {
-    pt_ptset.set(n->_idx);
-    return &pt_ptset;
-  }
-#ifdef ASSERT
-  if (npt->_node == NULL) {
-    if (orig_n != n)
-      orig_n->dump();
-    n->dump();
-    assert(npt->_node != NULL, "unregistered node");
-  }
-#endif
-  pt_worklist.push(n->_idx);
-  while(pt_worklist.length() > 0) {
-    int ni = pt_worklist.pop();
-    if (pt_visited.test_set(ni))
-      continue;
-
-    PointsToNode* pn = ptnode_adr(ni);
-    // ensure that all inputs of a Phi have been processed
-    assert(!_collecting || !pn->_node->is_Phi() || _processed.test(ni),"");
-
-    int edges_processed = 0;
-    uint e_cnt = pn->edge_count();
-    for (uint e = 0; e < e_cnt; e++) {
-      uint etgt = pn->edge_target(e);
-      PointsToNode::EdgeType et = pn->edge_type(e);
-      if (et == PointsToNode::PointsToEdge) {
-        pt_ptset.set(etgt);
-        edges_processed++;
-      } else if (et == PointsToNode::DeferredEdge) {
-        pt_worklist.push(etgt);
-        edges_processed++;
-      } else {
-        assert(false,"neither PointsToEdge or DeferredEdge");
-      }
-    }
-    if (edges_processed == 0) {
-      // no deferred or pointsto edges found.  Assume the value was set
-      // outside this method.  Add the phantom object to the pointsto set.
-      pt_ptset.set(_phantom_object);
-    }
-  }
-  return &pt_ptset;
-}
-
-void ConnectionGraph::remove_deferred(uint ni, GrowableArray<uint>* deferred_edges, VectorSet* visited) {
-  // This method is most expensive during ConnectionGraph construction.
-  // Reuse vectorSet and an additional growable array for deferred edges.
-  deferred_edges->clear();
-  visited->Reset();
-
-  visited->set(ni);
-  PointsToNode *ptn = ptnode_adr(ni);
-  assert(ptn->node_type() == PointsToNode::LocalVar ||
-         ptn->node_type() == PointsToNode::Field, "sanity");
-  assert(ptn->edge_count() != 0, "should have at least phantom_object");
-
-  // Mark current edges as visited and move deferred edges to separate array.
-  for (uint i = 0; i < ptn->edge_count(); ) {
-    uint t = ptn->edge_target(i);
-#ifdef ASSERT
-    assert(!visited->test_set(t), "expecting no duplications");
-#else
-    visited->set(t);
-#endif
-    if (ptn->edge_type(i) == PointsToNode::DeferredEdge) {
-      ptn->remove_edge(t, PointsToNode::DeferredEdge);
-      deferred_edges->append(t);
-    } else {
-      i++;
-    }
-  }
-  for (int next = 0; next < deferred_edges->length(); ++next) {
-    uint t = deferred_edges->at(next);
-    PointsToNode *ptt = ptnode_adr(t);
-    uint e_cnt = ptt->edge_count();
-    assert(e_cnt != 0, "should have at least phantom_object");
-    for (uint e = 0; e < e_cnt; e++) {
-      uint etgt = ptt->edge_target(e);
-      if (visited->test_set(etgt))
-        continue;
-
-      PointsToNode::EdgeType et = ptt->edge_type(e);
-      if (et == PointsToNode::PointsToEdge) {
-        add_pointsto_edge(ni, etgt);
-      } else if (et == PointsToNode::DeferredEdge) {
-        deferred_edges->append(etgt);
-      } else {
-        assert(false,"invalid connection graph");
-      }
-    }
-  }
-  if (ptn->edge_count() == 0) {
-    // No pointsto edges found after deferred edges are removed.
-    // For example, in the next case where call is replaced
-    // with uncommon trap and as result array's load references
-    // itself through deferred edges:
-    //
-    // A a = b[i];
-    // if (c!=null) a = c.foo();
-    // b[i] = a;
-    //
-    // Assume the value was set outside this method and
-    // add edge to phantom object.
-    add_pointsto_edge(ni, _phantom_object);
-  }
-}
-
-
-//  Add an edge to node given by "to_i" from any field of adr_i whose offset
-//  matches "offset"  A deferred edge is added if to_i is a LocalVar, and
-//  a pointsto edge is added if it is a JavaObject
-
-void ConnectionGraph::add_edge_from_fields(uint adr_i, uint to_i, int offs) {
-  // No fields for NULL pointer.
-  if (is_null_ptr(adr_i)) {
-    return;
-  }
-  PointsToNode* an = ptnode_adr(adr_i);
-  PointsToNode* to = ptnode_adr(to_i);
-  bool deferred = (to->node_type() == PointsToNode::LocalVar);
-  bool escaped  = (to_i == _phantom_object) && (offs == Type::OffsetTop);
-  if (escaped) {
-    // Values in fields escaped during call.
-    assert(an->escape_state() >= PointsToNode::ArgEscape, "sanity");
-    offs = Type::OffsetBot;
-  }
-  for (uint fe = 0; fe < an->edge_count(); fe++) {
-    assert(an->edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
-    int fi = an->edge_target(fe);
-    if (escaped) {
-      set_escape_state(fi, PointsToNode::GlobalEscape);
-    }
-    PointsToNode* pf = ptnode_adr(fi);
-    int po = pf->offset();
-    if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) {
-      if (deferred)
-        add_deferred_edge(fi, to_i);
-      else
-        add_pointsto_edge(fi, to_i);
-    }
-  }
-}
-
-// Add a deferred  edge from node given by "from_i" to any field of adr_i
-// whose offset matches "offset".
-void ConnectionGraph::add_deferred_edge_to_fields(uint from_i, uint adr_i, int offs) {
-  // No fields for NULL pointer.
-  if (is_null_ptr(adr_i)) {
-    return;
-  }
-  if (adr_i == _phantom_object) {
-    // Add only one edge for unknown object.
-    add_pointsto_edge(from_i, _phantom_object);
-    return;
-  }
-  PointsToNode* an = ptnode_adr(adr_i);
-  bool is_alloc = an->_node->is_Allocate();
-  for (uint fe = 0; fe < an->edge_count(); fe++) {
-    assert(an->edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
-    int fi = an->edge_target(fe);
-    PointsToNode* pf = ptnode_adr(fi);
-    int offset = pf->offset();
-    if (!is_alloc) {
-      // Assume the field was set outside this method if it is not Allocation
-      add_pointsto_edge(fi, _phantom_object);
-    }
-    if (offset == offs || offset == Type::OffsetBot || offs == Type::OffsetBot) {
-      add_deferred_edge(from_i, fi);
-    }
-  }
-  // Some fields references (AddP) may still be missing
-  // until Connection Graph construction is complete.
-  // For example, loads from RAW pointers with offset 0
-  // which don't have AddP.
-  // A reference to phantom_object will be added if
-  // a field reference is still missing after completing
-  // Connection Graph (see remove_deferred()).
-}
-
-// Helper functions
-
-static Node* get_addp_base(Node *addp) {
+Node* ConnectionGraph::get_addp_base(Node *addp) {
   assert(addp->is_AddP(), "must be AddP");
   //
   // AddP cases for Base and Address inputs:
@@ -513,30 +2059,30 @@
   //       | |
   //       AddP  ( base == address )
   //
-  Node *base = addp->in(AddPNode::Base)->uncast();
-  if (base->is_top()) { // The AddP case #3 and #6.
-    base = addp->in(AddPNode::Address)->uncast();
+  Node *base = addp->in(AddPNode::Base);
+  if (base->uncast()->is_top()) { // The AddP case #3 and #6.
+    base = addp->in(AddPNode::Address);
     while (base->is_AddP()) {
       // Case #6 (unsafe access) may have several chained AddP nodes.
-      assert(base->in(AddPNode::Base)->is_top(), "expected unsafe access address only");
-      base = base->in(AddPNode::Address)->uncast();
+      assert(base->in(AddPNode::Base)->uncast()->is_top(), "expected unsafe access address only");
+      base = base->in(AddPNode::Address);
     }
-    assert(base->Opcode() == Op_ConP || base->Opcode() == Op_ThreadLocal ||
-           base->Opcode() == Op_CastX2P || base->is_DecodeN() ||
-           (base->is_Mem() && base->bottom_type() == TypeRawPtr::NOTNULL) ||
-           (base->is_Proj() && base->in(0)->is_Allocate()), "sanity");
+    Node* uncast_base = base->uncast();
+    int opcode = uncast_base->Opcode();
+    assert(opcode == Op_ConP || opcode == Op_ThreadLocal ||
+           opcode == Op_CastX2P || uncast_base->is_DecodeN() ||
+           (uncast_base->is_Mem() && uncast_base->bottom_type() == TypeRawPtr::NOTNULL) ||
+           (uncast_base->is_Proj() && uncast_base->in(0)->is_Allocate()), "sanity");
   }
   return base;
 }
 
-static Node* find_second_addp(Node* addp, Node* n) {
+Node* ConnectionGraph::find_second_addp(Node* addp, Node* n) {
   assert(addp->is_AddP() && addp->outcnt() > 0, "Don't process dead nodes");
-
   Node* addp2 = addp->raw_out(0);
   if (addp->outcnt() == 1 && addp2->is_AddP() &&
       addp2->in(AddPNode::Base) == n &&
       addp2->in(AddPNode::Address) == addp) {
-
     assert(addp->in(AddPNode::Base) == n, "expecting the same base");
     //
     // Find array's offset to push it on worklist first and
@@ -575,7 +2121,8 @@
 // Adjust the type and inputs of an AddP which computes the
 // address of a field of an instance
 //
-bool ConnectionGraph::split_AddP(Node *addp, Node *base,  PhaseGVN  *igvn) {
+bool ConnectionGraph::split_AddP(Node *addp, Node *base) {
+  PhaseGVN* igvn = _igvn;
   const TypeOopPtr *base_t = igvn->type(base)->isa_oopptr();
   assert(base_t != NULL && base_t->is_known_instance(), "expecting instance oopptr");
   const TypeOopPtr *t = igvn->type(addp)->isa_oopptr();
@@ -612,7 +2159,6 @@
       !base_t->klass()->is_subtype_of(t->klass())) {
      return false; // bail out
   }
-
   const TypeOopPtr *tinst = base_t->add_offset(t->offset())->is_oopptr();
   // Do NOT remove the next line: ensure a new alias index is allocated
   // for the instance type. Note: C++ will not remove it since the call
@@ -620,9 +2166,7 @@
   int alias_idx = _compile->get_alias_index(tinst);
   igvn->set_type(addp, tinst);
   // record the allocation in the node map
-  assert(ptnode_adr(addp->_idx)->_node != NULL, "should be registered");
-  set_map(addp->_idx, get_map(base->_idx));
-
+  set_map(addp, get_map(base->_idx));
   // Set addp's Base and Address to 'base'.
   Node *abase = addp->in(AddPNode::Base);
   Node *adr   = addp->in(AddPNode::Address);
@@ -657,8 +2201,9 @@
 // created phi or an existing phi.  Sets create_new to indicate whether a new
 // phi was created.  Cache the last newly created phi in the node map.
 //
-PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn, bool &new_created) {
+PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, bool &new_created) {
   Compile *C = _compile;
+  PhaseGVN* igvn = _igvn;
   new_created = false;
   int phi_alias_idx = C->get_alias_index(orig_phi->adr_type());
   // nothing to do if orig_phi is bottom memory or matches alias_idx
@@ -698,12 +2243,7 @@
   C->copy_node_notes_to(result, orig_phi);
   igvn->set_type(result, result->bottom_type());
   record_for_optimizer(result);
-
-  debug_only(Node* pn = ptnode_adr(orig_phi->_idx)->_node;)
-  assert(pn == NULL || pn == orig_phi, "wrong node");
-  set_map(orig_phi->_idx, result);
-  ptnode_adr(orig_phi->_idx)->_node = orig_phi;
-
+  set_map(orig_phi, result);
   new_created = true;
   return result;
 }
@@ -712,27 +2252,25 @@
 // Return a new version of Memory Phi "orig_phi" with the inputs having the
 // specified alias index.
 //
-PhiNode *ConnectionGraph::split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn) {
-
+PhiNode *ConnectionGraph::split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist) {
   assert(alias_idx != Compile::AliasIdxBot, "can't split out bottom memory");
   Compile *C = _compile;
+  PhaseGVN* igvn = _igvn;
   bool new_phi_created;
-  PhiNode *result = create_split_phi(orig_phi, alias_idx, orig_phi_worklist, igvn, new_phi_created);
+  PhiNode *result = create_split_phi(orig_phi, alias_idx, orig_phi_worklist, new_phi_created);
   if (!new_phi_created) {
     return result;
   }
-
   GrowableArray<PhiNode *>  phi_list;
   GrowableArray<uint>  cur_input;
-
   PhiNode *phi = orig_phi;
   uint idx = 1;
   bool finished = false;
   while(!finished) {
     while (idx < phi->req()) {
-      Node *mem = find_inst_mem(phi->in(idx), alias_idx, orig_phi_worklist, igvn);
+      Node *mem = find_inst_mem(phi->in(idx), alias_idx, orig_phi_worklist);
       if (mem != NULL && mem->is_Phi()) {
-        PhiNode *newphi = create_split_phi(mem->as_Phi(), alias_idx, orig_phi_worklist, igvn, new_phi_created);
+        PhiNode *newphi = create_split_phi(mem->as_Phi(), alias_idx, orig_phi_worklist, new_phi_created);
         if (new_phi_created) {
           // found an phi for which we created a new split, push current one on worklist and begin
           // processing new one
@@ -775,19 +2313,18 @@
   return result;
 }
 
-
 //
 // The next methods are derived from methods in MemNode.
 //
-static Node *step_through_mergemem(MergeMemNode *mmem, int alias_idx, const TypeOopPtr *toop) {
+Node* ConnectionGraph::step_through_mergemem(MergeMemNode *mmem, int alias_idx, const TypeOopPtr *toop) {
   Node *mem = mmem;
   // TypeOopPtr::NOTNULL+any is an OOP with unknown offset - generally
   // means an array I have not precisely typed yet.  Do not do any
   // alias stuff with it any time soon.
-  if( toop->base() != Type::AnyPtr &&
+  if (toop->base() != Type::AnyPtr &&
       !(toop->klass() != NULL &&
         toop->klass()->is_java_lang_Object() &&
-        toop->offset() == Type::OffsetBot) ) {
+        toop->offset() == Type::OffsetBot)) {
     mem = mmem->memory_at(alias_idx);
     // Update input if it is progress over what we have now
   }
@@ -797,9 +2334,9 @@
 //
 // Move memory users to their memory slices.
 //
-void ConnectionGraph::move_inst_mem(Node* n, GrowableArray<PhiNode *>  &orig_phis, PhaseGVN *igvn) {
+void ConnectionGraph::move_inst_mem(Node* n, GrowableArray<PhiNode *>  &orig_phis) {
   Compile* C = _compile;
-
+  PhaseGVN* igvn = _igvn;
   const TypePtr* tp = igvn->type(n->in(MemNode::Address))->isa_ptr();
   assert(tp != NULL, "ptr type");
   int alias_idx = C->get_alias_index(tp);
@@ -816,7 +2353,7 @@
       }
       // Replace previous general reference to mem node.
       uint orig_uniq = C->unique();
-      Node* m = find_inst_mem(n, general_idx, orig_phis, igvn);
+      Node* m = find_inst_mem(n, general_idx, orig_phis);
       assert(orig_uniq == C->unique(), "no new nodes");
       mmem->set_memory_at(general_idx, m);
       --imax;
@@ -836,7 +2373,7 @@
       }
       // Move to general memory slice.
       uint orig_uniq = C->unique();
-      Node* m = find_inst_mem(n, general_idx, orig_phis, igvn);
+      Node* m = find_inst_mem(n, general_idx, orig_phis);
       assert(orig_uniq == C->unique(), "no new nodes");
       igvn->hash_delete(use);
       imax -= use->replace_edge(n, m);
@@ -873,10 +2410,11 @@
 // Search memory chain of "mem" to find a MemNode whose address
 // is the specified alias index.
 //
-Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArray<PhiNode *>  &orig_phis, PhaseGVN *phase) {
+Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArray<PhiNode *>  &orig_phis) {
   if (orig_mem == NULL)
     return orig_mem;
-  Compile* C = phase->C;
+  Compile* C = _compile;
+  PhaseGVN* igvn = _igvn;
   const TypeOopPtr *toop = C->get_adr_type(alias_idx)->isa_oopptr();
   bool is_instance = (toop != NULL) && toop->is_known_instance();
   Node *start_mem = C->start()->proj_out(TypeFunc::Memory);
@@ -887,7 +2425,7 @@
     if (result == start_mem)
       break;  // hit one of our sentinels
     if (result->is_Mem()) {
-      const Type *at = phase->type(result->in(MemNode::Address));
+      const Type *at = igvn->type(result->in(MemNode::Address));
       if (at == Type::TOP)
         break; // Dead
       assert (at->isa_ptr() != NULL, "pointer type required.");
@@ -909,7 +2447,7 @@
         break;  // hit one of our sentinels
       } else if (proj_in->is_Call()) {
         CallNode *call = proj_in->as_Call();
-        if (!call->may_modify(toop, phase)) {
+        if (!call->may_modify(toop, igvn)) {
           result = call->in(TypeFunc::Memory);
         }
       } else if (proj_in->is_Initialize()) {
@@ -928,7 +2466,7 @@
       if (result == mmem->base_memory()) {
         // Didn't find instance memory, search through general slice recursively.
         result = mmem->memory_at(C->get_general_index(alias_idx));
-        result = find_inst_mem(result, alias_idx, orig_phis, phase);
+        result = find_inst_mem(result, alias_idx, orig_phis);
         if (C->failing()) {
           return NULL;
         }
@@ -936,7 +2474,7 @@
       }
     } else if (result->is_Phi() &&
                C->get_alias_index(result->as_Phi()->adr_type()) != alias_idx) {
-      Node *un = result->as_Phi()->unique_input(phase);
+      Node *un = result->as_Phi()->unique_input(igvn);
       if (un != NULL) {
         orig_phis.append_if_missing(result->as_Phi());
         result = un;
@@ -944,7 +2482,7 @@
         break;
       }
     } else if (result->is_ClearArray()) {
-      if (!ClearArrayNode::step_through(&result, (uint)toop->instance_id(), phase)) {
+      if (!ClearArrayNode::step_through(&result, (uint)toop->instance_id(), igvn)) {
         // Can not bypass initialization of the instance
         // we are looking for.
         break;
@@ -952,7 +2490,7 @@
       // Otherwise skip it (the call updated 'result' value).
     } else if (result->Opcode() == Op_SCMemProj) {
       assert(result->in(0)->is_LoadStore(), "sanity");
-      const Type *at = phase->type(result->in(0)->in(MemNode::Address));
+      const Type *at = igvn->type(result->in(0)->in(MemNode::Address));
       if (at != Type::TOP) {
         assert (at->isa_ptr() != NULL, "pointer type required.");
         int idx = C->get_alias_index(at->is_ptr());
@@ -972,7 +2510,7 @@
       orig_phis.append_if_missing(mphi);
     } else if (C->get_alias_index(t) != alias_idx) {
       // Create a new Phi with the specified alias index type.
-      result = split_memory_phi(mphi, alias_idx, orig_phis, phase);
+      result = split_memory_phi(mphi, alias_idx, orig_phis);
     }
   }
   // the result is either MemNode, PhiNode, InitializeNode.
@@ -1071,12 +2609,12 @@
 void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist) {
   GrowableArray<Node *>  memnode_worklist;
   GrowableArray<PhiNode *>  orig_phis;
-
   PhaseIterGVN  *igvn = _igvn;
   uint new_index_start = (uint) _compile->num_alias_types();
   Arena* arena = Thread::current()->resource_area();
   VectorSet visited(arena);
-
+  ideal_nodes.clear(); // Reset for use with set_map/get_map.
+  uint unique_old = _compile->unique();
 
   //  Phase 1:  Process possible allocations from alloc_worklist.
   //  Create instance types for the CheckCastPP for allocations where possible.
@@ -1088,17 +2626,15 @@
   while (alloc_worklist.length() != 0) {
     Node *n = alloc_worklist.pop();
     uint ni = n->_idx;
-    const TypeOopPtr* tinst = NULL;
     if (n->is_Call()) {
       CallNode *alloc = n->as_Call();
       // copy escape information to call node
       PointsToNode* ptn = ptnode_adr(alloc->_idx);
-      PointsToNode::EscapeState es = escape_state(alloc);
+      PointsToNode::EscapeState es = ptn->escape_state();
       // We have an allocation or call which returns a Java object,
       // see if it is unescaped.
       if (es != PointsToNode::NoEscape || !ptn->scalar_replaceable())
         continue;
-
       // Find CheckCastPP for the allocate or for the return value of a call
       n = alloc->result_cast();
       if (n == NULL) {            // No uses except Initialize node
@@ -1145,20 +2681,18 @@
         // so it could be eliminated.
         alloc->as_Allocate()->_is_scalar_replaceable = true;
       }
-      set_escape_state(n->_idx, es); // CheckCastPP escape state
+      set_escape_state(ptnode_adr(n->_idx), es); // CheckCastPP escape state
       // in order for an object to be scalar-replaceable, it must be:
       //   - a direct allocation (not a call returning an object)
       //   - non-escaping
       //   - eligible to be a unique type
       //   - not determined to be ineligible by escape analysis
-      assert(ptnode_adr(alloc->_idx)->_node != NULL &&
-             ptnode_adr(n->_idx)->_node != NULL, "should be registered");
-      set_map(alloc->_idx, n);
-      set_map(n->_idx, alloc);
+      set_map(alloc, n);
+      set_map(n, alloc);
       const TypeOopPtr *t = igvn->type(n)->isa_oopptr();
       if (t == NULL)
         continue;  // not a TypeOopPtr
-      tinst = t->cast_to_exactness(true)->is_oopptr()->cast_to_instance_id(ni);
+      const TypeOopPtr* tinst = t->cast_to_exactness(true)->is_oopptr()->cast_to_instance_id(ni);
       igvn->hash_delete(n);
       igvn->set_type(n,  tinst);
       n->raise_bottom_type(tinst);
@@ -1168,9 +2702,10 @@
 
         // First, put on the worklist all Field edges from Connection Graph
         // which is more accurate then putting immediate users from Ideal Graph.
-        for (uint e = 0; e < ptn->edge_count(); e++) {
-          Node *use = ptnode_adr(ptn->edge_target(e))->_node;
-          assert(ptn->edge_type(e) == PointsToNode::FieldEdge && use->is_AddP(),
+        for (EdgeIterator e(ptn); e.has_next(); e.next()) {
+          PointsToNode* tgt = e.get();
+          Node* use = tgt->ideal_node();
+          assert(tgt->is_Field() && use->is_AddP(),
                  "only AddP nodes are Field edges in CG");
           if (use->outcnt() > 0) { // Don't process dead nodes
             Node* addp2 = find_second_addp(use, use->in(AddPNode::Base));
@@ -1202,16 +2737,18 @@
         }
       }
     } else if (n->is_AddP()) {
-      VectorSet* ptset = PointsTo(get_addp_base(n));
-      assert(ptset->Size() == 1, "AddP address is unique");
-      uint elem = ptset->getelem(); // Allocation node's index
-      if (elem == _phantom_object) {
-        assert(false, "escaped allocation");
-        continue; // Assume the value was set outside this method.
+      JavaObjectNode* jobj = unique_java_object(get_addp_base(n));
+      if (jobj == NULL || jobj == phantom_obj) {
+#ifdef ASSERT
+        ptnode_adr(get_addp_base(n)->_idx)->dump();
+        ptnode_adr(n->_idx)->dump();
+        assert(jobj != NULL && jobj != phantom_obj, "escaped allocation");
+#endif
+        _compile->record_failure(C2Compiler::retry_no_escape_analysis());
+        return;
       }
-      Node *base = get_map(elem);  // CheckCastPP node
-      if (!split_AddP(n, base, igvn)) continue; // wrong type from dead path
-      tinst = igvn->type(base)->isa_oopptr();
+      Node *base = get_map(jobj->idx());  // CheckCastPP node
+      if (!split_AddP(n, base)) continue; // wrong type from dead path
     } else if (n->is_Phi() ||
                n->is_CheckCastPP() ||
                n->is_EncodeP() ||
@@ -1221,18 +2758,20 @@
         assert(n->is_Phi(), "loops only through Phi's");
         continue;  // already processed
       }
-      VectorSet* ptset = PointsTo(n);
-      if (ptset->Size() == 1) {
-        uint elem = ptset->getelem(); // Allocation node's index
-        if (elem == _phantom_object) {
-          assert(false, "escaped allocation");
-          continue; // Assume the value was set outside this method.
-        }
-        Node *val = get_map(elem);   // CheckCastPP node
+      JavaObjectNode* jobj = unique_java_object(n);
+      if (jobj == NULL || jobj == phantom_obj) {
+#ifdef ASSERT
+        ptnode_adr(n->_idx)->dump();
+        assert(jobj != NULL && jobj != phantom_obj, "escaped allocation");
+#endif
+        _compile->record_failure(C2Compiler::retry_no_escape_analysis());
+        return;
+      } else {
+        Node *val = get_map(jobj->idx());   // CheckCastPP node
         TypeNode *tn = n->as_Type();
-        tinst = igvn->type(val)->isa_oopptr();
+        const TypeOopPtr* tinst = igvn->type(val)->isa_oopptr();
         assert(tinst != NULL && tinst->is_known_instance() &&
-               (uint)tinst->instance_id() == elem , "instance type expected.");
+               tinst->instance_id() == jobj->idx() , "instance type expected.");
 
         const Type *tn_type = igvn->type(tn);
         const TypeOopPtr *tn_t;
@@ -1241,7 +2780,6 @@
         } else {
           tn_t = tn_type->isa_oopptr();
         }
-
         if (tn_t != NULL && tinst->klass()->is_subtype_of(tn_t->klass())) {
           if (tn_type->isa_narrowoop()) {
             tn_type = tinst->make_narrowoop();
@@ -1314,13 +2852,13 @@
   }
   // New alias types were created in split_AddP().
   uint new_index_end = (uint) _compile->num_alias_types();
+  assert(unique_old == _compile->unique(), "there should be no new ideal nodes after Phase 1");
 
   //  Phase 2:  Process MemNode's from memnode_worklist. compute new address type and
   //            compute new values for Memory inputs  (the Memory inputs are not
   //            actually updated until phase 4.)
   if (memnode_worklist.length() == 0)
     return;  // nothing to do
-
   while (memnode_worklist.length() != 0) {
     Node *n = memnode_worklist.pop();
     if (visited.test_set(n->_idx))
@@ -1341,17 +2879,14 @@
       assert (addr_t->isa_ptr() != NULL, "pointer type required.");
       int alias_idx = _compile->get_alias_index(addr_t->is_ptr());
       assert ((uint)alias_idx < new_index_end, "wrong alias index");
-      Node *mem = find_inst_mem(n->in(MemNode::Memory), alias_idx, orig_phis, igvn);
+      Node *mem = find_inst_mem(n->in(MemNode::Memory), alias_idx, orig_phis);
       if (_compile->failing()) {
         return;
       }
       if (mem != n->in(MemNode::Memory)) {
         // We delay the memory edge update since we need old one in
         // MergeMem code below when instances memory slices are separated.
-        debug_only(Node* pn = ptnode_adr(n->_idx)->_node;)
-        assert(pn == NULL || pn == n, "wrong node");
-        set_map(n->_idx, mem);
-        ptnode_adr(n->_idx)->_node = n;
+        set_map(n, mem);
       }
       if (n->is_Load()) {
         continue;  // don't push users
@@ -1442,7 +2977,7 @@
         if((uint)_compile->get_general_index(ni) == i) {
           Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni);
           if (nmm->is_empty_memory(m)) {
-            Node* result = find_inst_mem(mem, ni, orig_phis, igvn);
+            Node* result = find_inst_mem(mem, ni, orig_phis);
             if (_compile->failing()) {
               return;
             }
@@ -1458,7 +2993,7 @@
       if (result == nmm->base_memory()) {
         // Didn't find instance memory, search through general slice recursively.
         result = nmm->memory_at(_compile->get_general_index(ni));
-        result = find_inst_mem(result, ni, orig_phis, igvn);
+        result = find_inst_mem(result, ni, orig_phis);
         if (_compile->failing()) {
           return;
         }
@@ -1482,7 +3017,7 @@
     igvn->hash_delete(phi);
     for (uint i = 1; i < phi->req(); i++) {
       Node *mem = phi->in(i);
-      Node *new_mem = find_inst_mem(mem, alias_idx, orig_phis, igvn);
+      Node *new_mem = find_inst_mem(mem, alias_idx, orig_phis);
       if (_compile->failing()) {
         return;
       }
@@ -1496,39 +3031,36 @@
 
   // Update the memory inputs of MemNodes with the value we computed
   // in Phase 2 and move stores memory users to corresponding memory slices.
-
   // Disable memory split verification code until the fix for 6984348.
   // Currently it produces false negative results since it does not cover all cases.
 #if 0 // ifdef ASSERT
   visited.Reset();
   Node_Stack old_mems(arena, _compile->unique() >> 2);
 #endif
-  for (uint i = 0; i < nodes_size(); i++) {
-    Node *nmem = get_map(i);
-    if (nmem != NULL) {
-      Node *n = ptnode_adr(i)->_node;
-      assert(n != NULL, "sanity");
-      if (n->is_Mem()) {
+  for (uint i = 0; i < ideal_nodes.size(); i++) {
+    Node*    n = ideal_nodes.at(i);
+    Node* nmem = get_map(n->_idx);
+    assert(nmem != NULL, "sanity");
+    if (n->is_Mem()) {
 #if 0 // ifdef ASSERT
-        Node* old_mem = n->in(MemNode::Memory);
-        if (!visited.test_set(old_mem->_idx)) {
-          old_mems.push(old_mem, old_mem->outcnt());
-        }
+      Node* old_mem = n->in(MemNode::Memory);
+      if (!visited.test_set(old_mem->_idx)) {
+        old_mems.push(old_mem, old_mem->outcnt());
+      }
 #endif
-        assert(n->in(MemNode::Memory) != nmem, "sanity");
-        if (!n->is_Load()) {
-          // Move memory users of a store first.
-          move_inst_mem(n, orig_phis, igvn);
-        }
-        // Now update memory input
-        igvn->hash_delete(n);
-        n->set_req(MemNode::Memory, nmem);
-        igvn->hash_insert(n);
-        record_for_optimizer(n);
-      } else {
-        assert(n->is_Allocate() || n->is_CheckCastPP() ||
-               n->is_AddP() || n->is_Phi(), "unknown node used for set_map()");
+      assert(n->in(MemNode::Memory) != nmem, "sanity");
+      if (!n->is_Load()) {
+        // Move memory users of a store first.
+        move_inst_mem(n, orig_phis);
       }
+      // Now update memory input
+      igvn->hash_delete(n);
+      n->set_req(MemNode::Memory, nmem);
+      igvn->hash_insert(n);
+      record_for_optimizer(n);
+    } else {
+      assert(n->is_Allocate() || n->is_CheckCastPP() ||
+             n->is_AddP() || n->is_Phi(), "unknown node used for set_map()");
     }
   }
 #if 0 // ifdef ASSERT
@@ -1542,1571 +3074,72 @@
 #endif
 }
 
-bool ConnectionGraph::has_candidates(Compile *C) {
-  // EA brings benefits only when the code has allocations and/or locks which
-  // are represented by ideal Macro nodes.
-  int cnt = C->macro_count();
-  for( int i=0; i < cnt; i++ ) {
-    Node *n = C->macro_node(i);
-    if ( n->is_Allocate() )
-      return true;
-    if( n->is_Lock() ) {
-      Node* obj = n->as_Lock()->obj_node()->uncast();
-      if( !(obj->is_Parm() || obj->is_Con()) )
-        return true;
-    }
-  }
-  return false;
-}
-
-void ConnectionGraph::do_analysis(Compile *C, PhaseIterGVN *igvn) {
-  // Add ConP#NULL and ConN#NULL nodes before ConnectionGraph construction
-  // to create space for them in ConnectionGraph::_nodes[].
-  Node* oop_null = igvn->zerocon(T_OBJECT);
-  Node* noop_null = igvn->zerocon(T_NARROWOOP);
-
-  ConnectionGraph* congraph = new(C->comp_arena()) ConnectionGraph(C, igvn);
-  // Perform escape analysis
-  if (congraph->compute_escape()) {
-    // There are non escaping objects.
-    C->set_congraph(congraph);
-  }
-
-  // Cleanup.
-  if (oop_null->outcnt() == 0)
-    igvn->hash_delete(oop_null);
-  if (noop_null->outcnt() == 0)
-    igvn->hash_delete(noop_null);
-}
-
-bool ConnectionGraph::compute_escape() {
-  Compile* C = _compile;
-
-  // 1. Populate Connection Graph (CG) with Ideal nodes.
-
-  Unique_Node_List worklist_init;
-  worklist_init.map(C->unique(), NULL);  // preallocate space
-
-  // Initialize worklist
-  if (C->root() != NULL) {
-    worklist_init.push(C->root());
-  }
-
-  GrowableArray<Node*> alloc_worklist;
-  GrowableArray<Node*> addp_worklist;
-  GrowableArray<Node*> ptr_cmp_worklist;
-  GrowableArray<Node*> storestore_worklist;
-  PhaseGVN* igvn = _igvn;
-
-  // Push all useful nodes onto CG list and set their type.
-  for( uint next = 0; next < worklist_init.size(); ++next ) {
-    Node* n = worklist_init.at(next);
-    record_for_escape_analysis(n, igvn);
-    // Only allocations and java static calls results are checked
-    // for an escape status. See process_call_result() below.
-    if (n->is_Allocate() || n->is_CallStaticJava() &&
-        ptnode_adr(n->_idx)->node_type() == PointsToNode::JavaObject) {
-      alloc_worklist.append(n);
-    } else if(n->is_AddP()) {
-      // Collect address nodes. Use them during stage 3 below
-      // to build initial connection graph field edges.
-      addp_worklist.append(n);
-    } else if (n->is_MergeMem()) {
-      // Collect all MergeMem nodes to add memory slices for
-      // scalar replaceable objects in split_unique_types().
-      _mergemem_worklist.append(n->as_MergeMem());
-    } else if (OptimizePtrCompare && n->is_Cmp() &&
-               (n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) {
-      // Compare pointers nodes
-      ptr_cmp_worklist.append(n);
-    } else if (n->is_MemBarStoreStore()) {
-      // Collect all MemBarStoreStore nodes so that depending on the
-      // escape status of the associated Allocate node some of them
-      // may be eliminated.
-      storestore_worklist.append(n);
-    }
-    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
-      Node* m = n->fast_out(i);   // Get user
-      worklist_init.push(m);
-    }
-  }
-
-  if (alloc_worklist.length() == 0) {
-    _collecting = false;
-    return false; // Nothing to do.
-  }
-
-  // 2. First pass to create simple CG edges (doesn't require to walk CG).
-  uint delayed_size = _delayed_worklist.size();
-  for( uint next = 0; next < delayed_size; ++next ) {
-    Node* n = _delayed_worklist.at(next);
-    build_connection_graph(n, igvn);
-  }
-
-  // 3. Pass to create initial fields edges (JavaObject -F-> AddP)
-  //    to reduce number of iterations during stage 4 below.
-  uint addp_length = addp_worklist.length();
-  for( uint next = 0; next < addp_length; ++next ) {
-    Node* n = addp_worklist.at(next);
-    Node* base = get_addp_base(n);
-    if (base->is_Proj() && base->in(0)->is_Call())
-      base = base->in(0);
-    PointsToNode::NodeType nt = ptnode_adr(base->_idx)->node_type();
-    if (nt == PointsToNode::JavaObject) {
-      build_connection_graph(n, igvn);
-    }
-  }
-
-  GrowableArray<int> cg_worklist;
-  cg_worklist.append(_phantom_object);
-  GrowableArray<uint>  worklist;
-
-  // 4. Build Connection Graph which need
-  //    to walk the connection graph.
-  _progress = false;
-  for (uint ni = 0; ni < nodes_size(); ni++) {
-    PointsToNode* ptn = ptnode_adr(ni);
-    Node *n = ptn->_node;
-    if (n != NULL) { // Call, AddP, LoadP, StoreP
-      build_connection_graph(n, igvn);
-      if (ptn->node_type() != PointsToNode::UnknownType)
-        cg_worklist.append(n->_idx); // Collect CG nodes
-      if (!_processed.test(n->_idx))
-        worklist.append(n->_idx); // Collect C/A/L/S nodes
-    }
-  }
-
-  // After IGVN user nodes may have smaller _idx than
-  // their inputs so they will be processed first in
-  // previous loop. Because of that not all Graph
-  // edges will be created. Walk over interesting
-  // nodes again until no new edges are created.
-  //
-  // Normally only 1-3 passes needed to build
-  // Connection Graph depending on graph complexity.
-  // Observed 8 passes in jvm2008 compiler.compiler.
-  // Set limit to 20 to catch situation when something
-  // did go wrong and recompile the method without EA.
-  // Also limit build time to 30 sec (60 in debug VM).
-
-#define CG_BUILD_ITER_LIMIT 20
-
-#ifdef ASSERT
-#define CG_BUILD_TIME_LIMIT 60.0
-#else
-#define CG_BUILD_TIME_LIMIT 30.0
-#endif
+#ifndef PRODUCT
+static const char *node_type_names[] = {
+  "UnknownType",
+  "JavaObject",
+  "LocalVar",
+  "Field",
+  "Arraycopy"
+};
 
-  uint length = worklist.length();
-  int iterations = 0;
-  elapsedTimer time;
-  while(_progress &&
-        (iterations++   < CG_BUILD_ITER_LIMIT) &&
-        (time.seconds() < CG_BUILD_TIME_LIMIT)) {
-    time.start();
-    _progress = false;
-    for( uint next = 0; next < length; ++next ) {
-      int ni = worklist.at(next);
-      PointsToNode* ptn = ptnode_adr(ni);
-      Node* n = ptn->_node;
-      assert(n != NULL, "should be known node");
-      build_connection_graph(n, igvn);
-    }
-    time.stop();
-  }
-  if ((iterations     >= CG_BUILD_ITER_LIMIT) ||
-      (time.seconds() >= CG_BUILD_TIME_LIMIT)) {
-    assert(false, err_msg("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
-           time.seconds(), iterations, nodes_size(), length));
-    // Possible infinite build_connection_graph loop,
-    // bailout (no changes to ideal graph were made).
-    _collecting = false;
-    return false;
-  }
-#undef CG_BUILD_ITER_LIMIT
-#undef CG_BUILD_TIME_LIMIT
-
-  // 5. Propagate escaped states.
-  worklist.clear();
-
-  // mark all nodes reachable from GlobalEscape nodes
-  (void)propagate_escape_state(&cg_worklist, &worklist, PointsToNode::GlobalEscape);
-
-  // mark all nodes reachable from ArgEscape nodes
-  bool has_non_escaping_obj = propagate_escape_state(&cg_worklist, &worklist, PointsToNode::ArgEscape);
-
-  Arena* arena = Thread::current()->resource_area();
-  VectorSet visited(arena);
-
-  // 6. Find fields initializing values for not escaped allocations
-  uint alloc_length = alloc_worklist.length();
-  for (uint next = 0; next < alloc_length; ++next) {
-    Node* n = alloc_worklist.at(next);
-    PointsToNode::EscapeState es = ptnode_adr(n->_idx)->escape_state();
-    if (es == PointsToNode::NoEscape) {
-      has_non_escaping_obj = true;
-      if (n->is_Allocate()) {
-        find_init_values(n, &visited, igvn);
-        // The object allocated by this Allocate node will never be
-        // seen by an other thread. Mark it so that when it is
-        // expanded no MemBarStoreStore is added.
-        n->as_Allocate()->initialization()->set_does_not_escape();
-      }
-    } else if ((es == PointsToNode::ArgEscape) && n->is_Allocate()) {
-      // Same as above. Mark this Allocate node so that when it is
-      // expanded no MemBarStoreStore is added.
-      n->as_Allocate()->initialization()->set_does_not_escape();
-    }
-  }
-
-  uint cg_length = cg_worklist.length();
-
-  // Skip the rest of code if all objects escaped.
-  if (!has_non_escaping_obj) {
-    cg_length = 0;
-    addp_length = 0;
-  }
-
-  for (uint next = 0; next < cg_length; ++next) {
-    int ni = cg_worklist.at(next);
-    PointsToNode* ptn = ptnode_adr(ni);
-    PointsToNode::NodeType nt = ptn->node_type();
-    if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
-      if (ptn->edge_count() == 0) {
-        // No values were found. Assume the value was set
-        // outside this method - add edge to phantom object.
-        add_pointsto_edge(ni, _phantom_object);
-      }
-    }
-  }
-
-  // 7. Remove deferred edges from the graph.
-  for (uint next = 0; next < cg_length; ++next) {
-    int ni = cg_worklist.at(next);
-    PointsToNode* ptn = ptnode_adr(ni);
-    PointsToNode::NodeType nt = ptn->node_type();
-    if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
-      remove_deferred(ni, &worklist, &visited);
-    }
-  }
-
-  // 8. Adjust escape state of nonescaping objects.
-  for (uint next = 0; next < addp_length; ++next) {
-    Node* n = addp_worklist.at(next);
-    adjust_escape_state(n);
-  }
+static const char *esc_names[] = {
+  "UnknownEscape",
+  "NoEscape",
+  "ArgEscape",
+  "GlobalEscape"
+};
 
-  // push all NoEscape nodes on the worklist
-  worklist.clear();
-  for( uint next = 0; next < cg_length; ++next ) {
-    int nk = cg_worklist.at(next);
-    if (ptnode_adr(nk)->escape_state() == PointsToNode::NoEscape &&
-        !is_null_ptr(nk))
-      worklist.push(nk);
-  }
-
-  alloc_worklist.clear();
-  // Propagate scalar_replaceable value.
-  while(worklist.length() > 0) {
-    uint nk = worklist.pop();
-    PointsToNode* ptn = ptnode_adr(nk);
-    Node* n = ptn->_node;
-    bool scalar_replaceable = ptn->scalar_replaceable();
-    if (n->is_Allocate() && scalar_replaceable) {
-      // Push scalar replaceable allocations on alloc_worklist
-      // for processing in split_unique_types(). Note,
-      // following code may change scalar_replaceable value.
-      alloc_worklist.append(n);
-    }
-    uint e_cnt = ptn->edge_count();
-    for (uint ei = 0; ei < e_cnt; ei++) {
-      uint npi = ptn->edge_target(ei);
-      if (is_null_ptr(npi))
-        continue;
-      PointsToNode *np = ptnode_adr(npi);
-      if (np->escape_state() < PointsToNode::NoEscape) {
-        set_escape_state(npi, PointsToNode::NoEscape);
-        if (!scalar_replaceable) {
-          np->set_scalar_replaceable(false);
-        }
-        worklist.push(npi);
-      } else if (np->scalar_replaceable() && !scalar_replaceable) {
-        np->set_scalar_replaceable(false);
-        worklist.push(npi);
-      }
-    }
-  }
-
-  _collecting = false;
-  assert(C->unique() == nodes_size(), "there should be no new ideal nodes during ConnectionGraph build");
-
-  assert(ptnode_adr(_oop_null)->escape_state() == PointsToNode::NoEscape &&
-         ptnode_adr(_oop_null)->edge_count() == 0, "sanity");
-  if (UseCompressedOops) {
-    assert(ptnode_adr(_noop_null)->escape_state() == PointsToNode::NoEscape &&
-           ptnode_adr(_noop_null)->edge_count() == 0, "sanity");
-  }
-
-  if (EliminateLocks && has_non_escaping_obj) {
-    // Mark locks before changing ideal graph.
-    int cnt = C->macro_count();
-    for( int i=0; i < cnt; i++ ) {
-      Node *n = C->macro_node(i);
-      if (n->is_AbstractLock()) { // Lock and Unlock nodes
-        AbstractLockNode* alock = n->as_AbstractLock();
-        if (!alock->is_non_esc_obj()) {
-          PointsToNode::EscapeState es = escape_state(alock->obj_node());
-          assert(es != PointsToNode::UnknownEscape, "should know");
-          if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-            assert(!alock->is_eliminated() || alock->is_coarsened(), "sanity");
-            // The lock could be marked eliminated by lock coarsening
-            // code during first IGVN before EA. Replace coarsened flag
-            // to eliminate all associated locks/unlocks.
-            alock->set_non_esc_obj();
-          }
-        }
-      }
-    }
-  }
-
-  if (OptimizePtrCompare && has_non_escaping_obj) {
-    // Add ConI(#CC_GT) and ConI(#CC_EQ).
-    _pcmp_neq = igvn->makecon(TypeInt::CC_GT);
-    _pcmp_eq = igvn->makecon(TypeInt::CC_EQ);
-    // Optimize objects compare.
-    while (ptr_cmp_worklist.length() != 0) {
-      Node *n = ptr_cmp_worklist.pop();
-      Node *res = optimize_ptr_compare(n);
-      if (res != NULL) {
-#ifndef PRODUCT
-        if (PrintOptimizePtrCompare) {
-          tty->print_cr("++++ Replaced: %d %s(%d,%d) --> %s", n->_idx, (n->Opcode() == Op_CmpP ? "CmpP" : "CmpN"), n->in(1)->_idx, n->in(2)->_idx, (res == _pcmp_eq ? "EQ" : "NotEQ"));
-          if (Verbose) {
-            n->dump(1);
-          }
-        }
-#endif
-        _igvn->replace_node(n, res);
-      }
-    }
-    // cleanup
-    if (_pcmp_neq->outcnt() == 0)
-      igvn->hash_delete(_pcmp_neq);
-    if (_pcmp_eq->outcnt()  == 0)
-      igvn->hash_delete(_pcmp_eq);
+void PointsToNode::dump(bool print_state) const {
+  NodeType nt = node_type();
+  tty->print("%s ", node_type_names[(int) nt]);
+  if (print_state) {
+    EscapeState es = escape_state();
+    EscapeState fields_es = fields_escape_state();
+    tty->print("%s(%s) ", esc_names[(int)es], esc_names[(int)fields_es]);
+    if (nt == PointsToNode::JavaObject && !this->scalar_replaceable())
+      tty->print("NSR");
   }
-
-  // For MemBarStoreStore nodes added in library_call.cpp, check
-  // escape status of associated AllocateNode and optimize out
-  // MemBarStoreStore node if the allocated object never escapes.
-  while (storestore_worklist.length() != 0) {
-    Node *n = storestore_worklist.pop();
-    MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore();
-    Node *alloc = storestore->in(MemBarNode::Precedent)->in(0);
-    assert (alloc->is_Allocate(), "storestore should point to AllocateNode");
-    PointsToNode::EscapeState es = ptnode_adr(alloc->_idx)->escape_state();
-    if (es == PointsToNode::NoEscape || es == PointsToNode::ArgEscape) {
-      MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot);
-      mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory));
-      mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control));
-
-      _igvn->register_new_node_with_optimizer(mb);
-      _igvn->replace_node(storestore, mb);
+  if (is_Field()) {
+    FieldNode* f = (FieldNode*)this;
+    tty->print("(");
+    for (BaseIterator i(f); i.has_next(); i.next()) {
+      PointsToNode* b = i.get();
+      tty->print(" %d%s", b->idx(),(b->is_JavaObject() ? "P" : ""));
     }
-  }
-
-#ifndef PRODUCT
-  if (PrintEscapeAnalysis) {
-    dump(); // Dump ConnectionGraph
-  }
-#endif
-
-  bool has_scalar_replaceable_candidates = false;
-  alloc_length = alloc_worklist.length();
-  for (uint next = 0; next < alloc_length; ++next) {
-    Node* n = alloc_worklist.at(next);
-    PointsToNode* ptn = ptnode_adr(n->_idx);
-    assert(ptn->escape_state() == PointsToNode::NoEscape, "sanity");
-    if (ptn->scalar_replaceable()) {
-      has_scalar_replaceable_candidates = true;
-      break;
-    }
-  }
-
-  if ( has_scalar_replaceable_candidates &&
-       C->AliasLevel() >= 3 && EliminateAllocations ) {
-
-    // Now use the escape information to create unique types for
-    // scalar replaceable objects.
-    split_unique_types(alloc_worklist);
-
-    if (C->failing())  return false;
-
-    C->print_method("After Escape Analysis", 2);
-
-#ifdef ASSERT
-  } else if (Verbose && (PrintEscapeAnalysis || PrintEliminateAllocations)) {
-    tty->print("=== No allocations eliminated for ");
-    C->method()->print_short_name();
-    if(!EliminateAllocations) {
-      tty->print(" since EliminateAllocations is off ===");
-    } else if(!has_scalar_replaceable_candidates) {
-      tty->print(" since there are no scalar replaceable candidates ===");
-    } else if(C->AliasLevel() < 3) {
-      tty->print(" since AliasLevel < 3 ===");
-    }
-    tty->cr();
-#endif
+    tty->print(" )");
   }
-  return has_non_escaping_obj;
-}
-
-// Find fields initializing values for allocations.
-void ConnectionGraph::find_init_values(Node* alloc, VectorSet* visited, PhaseTransform* phase) {
-  assert(alloc->is_Allocate(), "Should be called for Allocate nodes only");
-  PointsToNode* pta = ptnode_adr(alloc->_idx);
-  assert(pta->escape_state() == PointsToNode::NoEscape, "Not escaped Allocate nodes only");
-  InitializeNode* ini = alloc->as_Allocate()->initialization();
-
-  Compile* C = _compile;
-  visited->Reset();
-  // Check if a oop field's initializing value is recorded and add
-  // a corresponding NULL field's value if it is not recorded.
-  // Connection Graph does not record a default initialization by NULL
-  // captured by Initialize node.
-  //
-  uint null_idx = UseCompressedOops ? _noop_null : _oop_null;
-  uint ae_cnt = pta->edge_count();
-  bool visited_bottom_offset = false;
-  for (uint ei = 0; ei < ae_cnt; ei++) {
-    uint nidx = pta->edge_target(ei); // Field (AddP)
-    PointsToNode* ptn = ptnode_adr(nidx);
-    assert(ptn->_node->is_AddP(), "Should be AddP nodes only");
-    int offset = ptn->offset();
-    if (offset == Type::OffsetBot) {
-      if (!visited_bottom_offset) {
-        visited_bottom_offset = true;
-        // Check only oop fields.
-        const Type* adr_type = ptn->_node->as_AddP()->bottom_type();
-        if (!adr_type->isa_aryptr() ||
-            (adr_type->isa_aryptr()->klass() == NULL) ||
-             adr_type->isa_aryptr()->klass()->is_obj_array_klass()) {
-          // OffsetBot is used to reference array's element,
-          // always add reference to NULL since we don't
-          // known which element is referenced.
-          add_edge_from_fields(alloc->_idx, null_idx, offset);
-        }
-      }
-    } else if (offset != oopDesc::klass_offset_in_bytes() &&
-               !visited->test_set(offset)) {
-
-      // Check only oop fields.
-      const Type* adr_type = ptn->_node->as_AddP()->bottom_type();
-      BasicType basic_field_type = T_INT;
-      if (adr_type->isa_instptr()) {
-        ciField* field = C->alias_type(adr_type->isa_instptr())->field();
-        if (field != NULL) {
-          basic_field_type = field->layout_type();
-        } else {
-          // Ignore non field load (for example, klass load)
-        }
-      } else if (adr_type->isa_aryptr()) {
-        if (offset != arrayOopDesc::length_offset_in_bytes()) {
-          const Type* elemtype = adr_type->isa_aryptr()->elem();
-          basic_field_type = elemtype->array_element_basic_type();
-        } else {
-          // Ignore array length load
-        }
-#ifdef ASSERT
-      } else {
-        // Raw pointers are used for initializing stores so skip it
-        // since it should be recorded already
-        Node* base = get_addp_base(ptn->_node);
-        assert(adr_type->isa_rawptr() && base->is_Proj() &&
-               (base->in(0) == alloc),"unexpected pointer type");
-#endif
-      }
-      if (basic_field_type == T_OBJECT ||
-          basic_field_type == T_NARROWOOP ||
-          basic_field_type == T_ARRAY) {
-        Node* value = NULL;
-        if (ini != NULL) {
-          BasicType ft = UseCompressedOops ? T_NARROWOOP : T_OBJECT;
-          Node* store = ini->find_captured_store(offset, type2aelembytes(ft), phase);
-          if (store != NULL && store->is_Store()) {
-            value = store->in(MemNode::ValueIn);
-          } else {
-            // There could be initializing stores which follow allocation.
-            // For example, a volatile field store is not collected
-            // by Initialize node.
-            //
-            // Need to check for dependent loads to separate such stores from
-            // stores which follow loads. For now, add initial value NULL so
-            // that compare pointers optimization works correctly.
-          }
-        }
-        if (value == NULL || value != ptnode_adr(value->_idx)->_node) {
-          // A field's initializing value was not recorded. Add NULL.
-          add_edge_from_fields(alloc->_idx, null_idx, offset);
-        }
-      }
-    }
+  tty->print("[");
+  for (EdgeIterator i(this); i.has_next(); i.next()) {
+    PointsToNode* e = i.get();
+    tty->print(" %d%s%s", e->idx(),(e->is_JavaObject() ? "P" : (e->is_Field() ? "F" : "")), e->is_Arraycopy() ? "cp" : "");
   }
-}
-
-// Adjust escape state after Connection Graph is built.
-void ConnectionGraph::adjust_escape_state(Node* n) {
-  PointsToNode* ptn = ptnode_adr(n->_idx);
-  assert(n->is_AddP(), "Should be called for AddP nodes only");
-  // Search for objects which are not scalar replaceable
-  // and mark them to propagate the state to referenced objects.
-  //
-
-  int offset = ptn->offset();
-  Node* base = get_addp_base(n);
-  VectorSet* ptset = PointsTo(base);
-  int ptset_size = ptset->Size();
-
-  // An object is not scalar replaceable if the field which may point
-  // to it has unknown offset (unknown element of an array of objects).
-  //
-
-  if (offset == Type::OffsetBot) {
-    uint e_cnt = ptn->edge_count();
-    for (uint ei = 0; ei < e_cnt; ei++) {
-      uint npi = ptn->edge_target(ei);
-      ptnode_adr(npi)->set_scalar_replaceable(false);
-    }
-  }
-
-  // Currently an object is not scalar replaceable if a LoadStore node
-  // access its field since the field value is unknown after it.
-  //
-  bool has_LoadStore = false;
-  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
-    Node *use = n->fast_out(i);
-    if (use->is_LoadStore()) {
-      has_LoadStore = true;
-      break;
-    }
-  }
-  // An object is not scalar replaceable if the address points
-  // to unknown field (unknown element for arrays, offset is OffsetBot).
-  //
-  // Or the address may point to more then one object. This may produce
-  // the false positive result (set not scalar replaceable)
-  // since the flow-insensitive escape analysis can't separate
-  // the case when stores overwrite the field's value from the case
-  // when stores happened on different control branches.
-  //
-  // Note: it will disable scalar replacement in some cases:
-  //
-  //    Point p[] = new Point[1];
-  //    p[0] = new Point(); // Will be not scalar replaced
-  //
-  // but it will save us from incorrect optimizations in next cases:
-  //
-  //    Point p[] = new Point[1];
-  //    if ( x ) p[0] = new Point(); // Will be not scalar replaced
-  //
-  if (ptset_size > 1 || ptset_size != 0 &&
-      (has_LoadStore || offset == Type::OffsetBot)) {
-    for( VectorSetI j(ptset); j.test(); ++j ) {
-      ptnode_adr(j.elem)->set_scalar_replaceable(false);
-    }
-  }
-}
-
-// Propagate escape states to referenced nodes.
-bool ConnectionGraph::propagate_escape_state(GrowableArray<int>* cg_worklist,
-                                             GrowableArray<uint>* worklist,
-                                             PointsToNode::EscapeState esc_state) {
-  bool has_java_obj = false;
-
-  // push all nodes with the same escape state on the worklist
-  uint cg_length = cg_worklist->length();
-  for (uint next = 0; next < cg_length; ++next) {
-    int nk = cg_worklist->at(next);
-    if (ptnode_adr(nk)->escape_state() == esc_state)
-      worklist->push(nk);
-  }
-  // mark all reachable nodes
-  while (worklist->length() > 0) {
-    int pt = worklist->pop();
-    PointsToNode* ptn = ptnode_adr(pt);
-    if (ptn->node_type() == PointsToNode::JavaObject &&
-        !is_null_ptr(pt)) {
-      has_java_obj = true;
-      if (esc_state > PointsToNode::NoEscape) {
-        // fields values are unknown if object escapes
-        add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
-      }
+  tty->print(" [");
+  for (UseIterator i(this); i.has_next(); i.next()) {
+    PointsToNode* u = i.get();
+    bool is_base = false;
+    if (PointsToNode::is_base_use(u)) {
+      is_base = true;
+      u = PointsToNode::get_use_node(u)->as_Field();
     }
-    uint e_cnt = ptn->edge_count();
-    for (uint ei = 0; ei < e_cnt; ei++) {
-      uint npi = ptn->edge_target(ei);
-      if (is_null_ptr(npi))
-        continue;
-      PointsToNode *np = ptnode_adr(npi);
-      if (np->escape_state() < esc_state) {
-        set_escape_state(npi, esc_state);
-        worklist->push(npi);
-      }
-    }
-  }
-  // Has not escaping java objects
-  return has_java_obj && (esc_state < PointsToNode::GlobalEscape);
-}
-
-// Optimize objects compare.
-Node* ConnectionGraph::optimize_ptr_compare(Node* n) {
-  assert(OptimizePtrCompare, "sanity");
-  // Clone returned Set since PointsTo() returns pointer
-  // to the same structure ConnectionGraph.pt_ptset.
-  VectorSet ptset1 = *PointsTo(n->in(1));
-  VectorSet ptset2 = *PointsTo(n->in(2));
-
-  // Check simple cases first.
-  if (ptset1.Size() == 1) {
-    uint pt1 = ptset1.getelem();
-    PointsToNode* ptn1 = ptnode_adr(pt1);
-    if (ptn1->escape_state() == PointsToNode::NoEscape) {
-      if (ptset2.Size() == 1 && ptset2.getelem() == pt1) {
-        // Comparing the same not escaping object.
-        return _pcmp_eq;
-      }
-      Node* obj = ptn1->_node;
-      // Comparing not escaping allocation.
-      if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
-          !ptset2.test(pt1)) {
-        return _pcmp_neq; // This includes nullness check.
-      }
-    }
-  } else if (ptset2.Size() == 1) {
-    uint pt2 = ptset2.getelem();
-    PointsToNode* ptn2 = ptnode_adr(pt2);
-    if (ptn2->escape_state() == PointsToNode::NoEscape) {
-      Node* obj = ptn2->_node;
-      // Comparing not escaping allocation.
-      if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
-          !ptset1.test(pt2)) {
-        return _pcmp_neq; // This includes nullness check.
-      }
-    }
+    tty->print(" %d%s%s", u->idx(), is_base ? "b" : "", u->is_Arraycopy() ? "cp" : "");
   }
-
-  if (!ptset1.disjoint(ptset2)) {
-    return NULL; // Sets are not disjoint
-  }
-
-  // Sets are disjoint.
-  bool set1_has_unknown_ptr = ptset1.test(_phantom_object) != 0;
-  bool set2_has_unknown_ptr = ptset2.test(_phantom_object) != 0;
-  bool set1_has_null_ptr   = (ptset1.test(_oop_null) | ptset1.test(_noop_null)) != 0;
-  bool set2_has_null_ptr   = (ptset2.test(_oop_null) | ptset2.test(_noop_null)) != 0;
-
-  if (set1_has_unknown_ptr && set2_has_null_ptr ||
-      set2_has_unknown_ptr && set1_has_null_ptr) {
-    // Check nullness of unknown object.
-    return NULL;
-  }
-
-  // Disjointness by itself is not sufficient since
-  // alias analysis is not complete for escaped objects.
-  // Disjoint sets are definitely unrelated only when
-  // at least one set has only not escaping objects.
-  if (!set1_has_unknown_ptr && !set1_has_null_ptr) {
-    bool has_only_non_escaping_alloc = true;
-    for (VectorSetI i(&ptset1); i.test(); ++i) {
-      uint pt = i.elem;
-      PointsToNode* ptn = ptnode_adr(pt);
-      Node* obj = ptn->_node;
-      if (ptn->escape_state() != PointsToNode::NoEscape ||
-          !(obj->is_Allocate() || obj->is_CallStaticJava())) {
-        has_only_non_escaping_alloc = false;
-        break;
-      }
-    }
-    if (has_only_non_escaping_alloc) {
-      return _pcmp_neq;
-    }
-  }
-  if (!set2_has_unknown_ptr && !set2_has_null_ptr) {
-    bool has_only_non_escaping_alloc = true;
-    for (VectorSetI i(&ptset2); i.test(); ++i) {
-      uint pt = i.elem;
-      PointsToNode* ptn = ptnode_adr(pt);
-      Node* obj = ptn->_node;
-      if (ptn->escape_state() != PointsToNode::NoEscape ||
-          !(obj->is_Allocate() || obj->is_CallStaticJava())) {
-        has_only_non_escaping_alloc = false;
-        break;
-      }
-    }
-    if (has_only_non_escaping_alloc) {
-      return _pcmp_neq;
-    }
-  }
-  return NULL;
+  tty->print(" ]]  ");
+  if (_node == NULL)
+    tty->print_cr("<null>");
+  else
+    _node->dump();
 }
 
-void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) {
-    bool is_arraycopy = false;
-    switch (call->Opcode()) {
-#ifdef ASSERT
-    case Op_Allocate:
-    case Op_AllocateArray:
-    case Op_Lock:
-    case Op_Unlock:
-      assert(false, "should be done already");
-      break;
-#endif
-    case Op_CallLeafNoFP:
-      is_arraycopy = (call->as_CallLeaf()->_name != NULL &&
-                      strstr(call->as_CallLeaf()->_name, "arraycopy") != 0);
-      // fall through
-    case Op_CallLeaf:
-    {
-      // Stub calls, objects do not escape but they are not scale replaceable.
-      // Adjust escape state for outgoing arguments.
-      const TypeTuple * d = call->tf()->domain();
-      bool src_has_oops = false;
-      for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
-        const Type* at = d->field_at(i);
-        Node *arg = call->in(i)->uncast();
-        const Type *aat = phase->type(arg);
-        PointsToNode::EscapeState arg_esc = ptnode_adr(arg->_idx)->escape_state();
-        if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr() &&
-            (is_arraycopy || arg_esc < PointsToNode::ArgEscape)) {
-#ifdef ASSERT
-          assert(aat == Type::TOP || aat == TypePtr::NULL_PTR ||
-                 aat->isa_ptr() != NULL, "expecting an Ptr");
-          if (!(is_arraycopy ||
-                call->as_CallLeaf()->_name != NULL &&
-                (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
-                 strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
-          ) {
-            call->dump();
-            assert(false, "EA: unexpected CallLeaf");
-          }
-#endif
-          if (arg_esc < PointsToNode::ArgEscape) {
-            set_escape_state(arg->_idx, PointsToNode::ArgEscape);
-            Node* arg_base = arg;
-            if (arg->is_AddP()) {
-              //
-              // The inline_native_clone() case when the arraycopy stub is called
-              // after the allocation before Initialize and CheckCastPP nodes.
-              // Or normal arraycopy for object arrays case.
-              //
-              // Set AddP's base (Allocate) as not scalar replaceable since
-              // pointer to the base (with offset) is passed as argument.
-              //
-              arg_base = get_addp_base(arg);
-              set_escape_state(arg_base->_idx, PointsToNode::ArgEscape);
-            }
-          }
-
-          bool arg_has_oops = aat->isa_oopptr() &&
-                              (aat->isa_oopptr()->klass() == NULL || aat->isa_instptr() ||
-                               (aat->isa_aryptr() && aat->isa_aryptr()->klass()->is_obj_array_klass()));
-          if (i == TypeFunc::Parms) {
-            src_has_oops = arg_has_oops;
-          }
-          //
-          // src or dst could be j.l.Object when other is basic type array:
-          //
-          //   arraycopy(char[],0,Object*,0,size);
-          //   arraycopy(Object*,0,char[],0,size);
-          //
-          // Do nothing special in such cases.
-          //
-          if (is_arraycopy && (i > TypeFunc::Parms) &&
-              src_has_oops && arg_has_oops) {
-            // Destination object's fields reference an unknown object.
-            Node* arg_base = arg;
-            if (arg->is_AddP()) {
-              arg_base = get_addp_base(arg);
-            }
-            for (VectorSetI s(PointsTo(arg_base)); s.test(); ++s) {
-              uint ps = s.elem;
-              set_escape_state(ps, PointsToNode::ArgEscape);
-              add_edge_from_fields(ps, _phantom_object, Type::OffsetBot);
-            }
-            // Conservatively all values in source object fields globally escape
-            // since we don't know if values in destination object fields
-            // escape (it could be traced but it is too expensive).
-            Node* src = call->in(TypeFunc::Parms)->uncast();
-            Node* src_base = src;
-            if (src->is_AddP()) {
-              src_base  = get_addp_base(src);
-            }
-            for (VectorSetI s(PointsTo(src_base)); s.test(); ++s) {
-              uint ps = s.elem;
-              set_escape_state(ps, PointsToNode::ArgEscape);
-              // Use OffsetTop to indicate fields global escape.
-              add_edge_from_fields(ps, _phantom_object, Type::OffsetTop);
-            }
-          }
-        }
-      }
-      break;
-    }
-
-    case Op_CallStaticJava:
-    // For a static call, we know exactly what method is being called.
-    // Use bytecode estimator to record the call's escape affects
-    {
-      ciMethod *meth = call->as_CallJava()->method();
-      BCEscapeAnalyzer *call_analyzer = (meth !=NULL) ? meth->get_bcea() : NULL;
-      // fall-through if not a Java method or no analyzer information
-      if (call_analyzer != NULL) {
-        const TypeTuple * d = call->tf()->domain();
-        bool copy_dependencies = false;
-        for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
-          const Type* at = d->field_at(i);
-          int k = i - TypeFunc::Parms;
-          Node *arg = call->in(i)->uncast();
-
-          if (at->isa_oopptr() != NULL &&
-              ptnode_adr(arg->_idx)->escape_state() < PointsToNode::GlobalEscape) {
-
-            bool global_escapes = false;
-            bool fields_escapes = false;
-            if (!call_analyzer->is_arg_stack(k)) {
-              // The argument global escapes, mark everything it could point to
-              set_escape_state(arg->_idx, PointsToNode::GlobalEscape);
-              global_escapes = true;
-            } else {
-              if (!call_analyzer->is_arg_local(k)) {
-                // The argument itself doesn't escape, but any fields might
-                fields_escapes = true;
-              }
-              set_escape_state(arg->_idx, PointsToNode::ArgEscape);
-              copy_dependencies = true;
-            }
-
-            for( VectorSetI j(PointsTo(arg)); j.test(); ++j ) {
-              uint pt = j.elem;
-              if (global_escapes) {
-                // The argument global escapes, mark everything it could point to
-                set_escape_state(pt, PointsToNode::GlobalEscape);
-                add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
-              } else {
-                set_escape_state(pt, PointsToNode::ArgEscape);
-                if (fields_escapes) {
-                  // The argument itself doesn't escape, but any fields might.
-                  // Use OffsetTop to indicate such case.
-                  add_edge_from_fields(pt, _phantom_object, Type::OffsetTop);
-                }
-              }
-            }
-          }
-        }
-        if (copy_dependencies)
-          call_analyzer->copy_dependencies(_compile->dependencies());
-        break;
-      }
-    }
-
-    default:
-    // Fall-through here if not a Java method or no analyzer information
-    // or some other type of call, assume the worst case: all arguments
-    // globally escape.
-    {
-      // adjust escape state for  outgoing arguments
-      const TypeTuple * d = call->tf()->domain();
-      for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
-        const Type* at = d->field_at(i);
-        if (at->isa_oopptr() != NULL) {
-          Node *arg = call->in(i)->uncast();
-          set_escape_state(arg->_idx, PointsToNode::GlobalEscape);
-          for( VectorSetI j(PointsTo(arg)); j.test(); ++j ) {
-            uint pt = j.elem;
-            set_escape_state(pt, PointsToNode::GlobalEscape);
-            add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
-          }
-        }
-      }
-    }
-  }
-}
-void ConnectionGraph::process_call_result(ProjNode *resproj, PhaseTransform *phase) {
-  CallNode   *call = resproj->in(0)->as_Call();
-  uint    call_idx = call->_idx;
-  uint resproj_idx = resproj->_idx;
-
-  switch (call->Opcode()) {
-    case Op_Allocate:
-    {
-      Node *k = call->in(AllocateNode::KlassNode);
-      const TypeKlassPtr *kt = k->bottom_type()->isa_klassptr();
-      assert(kt != NULL, "TypeKlassPtr  required.");
-      ciKlass* cik = kt->klass();
-
-      PointsToNode::EscapeState es;
-      uint edge_to;
-      if (cik->is_subclass_of(_compile->env()->Thread_klass()) ||
-         !cik->is_instance_klass() || // StressReflectiveCode
-          cik->as_instance_klass()->has_finalizer()) {
-        es = PointsToNode::GlobalEscape;
-        edge_to = _phantom_object; // Could not be worse
-      } else {
-        es = PointsToNode::NoEscape;
-        edge_to = call_idx;
-        assert(ptnode_adr(call_idx)->scalar_replaceable(), "sanity");
-      }
-      set_escape_state(call_idx, es);
-      add_pointsto_edge(resproj_idx, edge_to);
-      _processed.set(resproj_idx);
-      break;
-    }
-
-    case Op_AllocateArray:
-    {
-
-      Node *k = call->in(AllocateNode::KlassNode);
-      const TypeKlassPtr *kt = k->bottom_type()->isa_klassptr();
-      assert(kt != NULL, "TypeKlassPtr  required.");
-      ciKlass* cik = kt->klass();
-
-      PointsToNode::EscapeState es;
-      uint edge_to;
-      if (!cik->is_array_klass()) { // StressReflectiveCode
-        es = PointsToNode::GlobalEscape;
-        edge_to = _phantom_object;
-      } else {
-        es = PointsToNode::NoEscape;
-        edge_to = call_idx;
-        assert(ptnode_adr(call_idx)->scalar_replaceable(), "sanity");
-        int length = call->in(AllocateNode::ALength)->find_int_con(-1);
-        if (length < 0 || length > EliminateAllocationArraySizeLimit) {
-          // Not scalar replaceable if the length is not constant or too big.
-          ptnode_adr(call_idx)->set_scalar_replaceable(false);
-        }
-      }
-      set_escape_state(call_idx, es);
-      add_pointsto_edge(resproj_idx, edge_to);
-      _processed.set(resproj_idx);
-      break;
-    }
-
-    case Op_CallStaticJava:
-    // For a static call, we know exactly what method is being called.
-    // Use bytecode estimator to record whether the call's return value escapes
-    {
-      bool done = true;
-      const TypeTuple *r = call->tf()->range();
-      const Type* ret_type = NULL;
-
-      if (r->cnt() > TypeFunc::Parms)
-        ret_type = r->field_at(TypeFunc::Parms);
-
-      // Note:  we use isa_ptr() instead of isa_oopptr()  here because the
-      //        _multianewarray functions return a TypeRawPtr.
-      if (ret_type == NULL || ret_type->isa_ptr() == NULL) {
-        _processed.set(resproj_idx);
-        break;  // doesn't return a pointer type
-      }
-      ciMethod *meth = call->as_CallJava()->method();
-      const TypeTuple * d = call->tf()->domain();
-      if (meth == NULL) {
-        // not a Java method, assume global escape
-        set_escape_state(call_idx, PointsToNode::GlobalEscape);
-        add_pointsto_edge(resproj_idx, _phantom_object);
-      } else {
-        BCEscapeAnalyzer *call_analyzer = meth->get_bcea();
-        bool copy_dependencies = false;
-
-        if (call_analyzer->is_return_allocated()) {
-          // Returns a newly allocated unescaped object, simply
-          // update dependency information.
-          // Mark it as NoEscape so that objects referenced by
-          // it's fields will be marked as NoEscape at least.
-          set_escape_state(call_idx, PointsToNode::NoEscape);
-          ptnode_adr(call_idx)->set_scalar_replaceable(false);
-          // Fields values are unknown
-          add_edge_from_fields(call_idx, _phantom_object, Type::OffsetBot);
-          add_pointsto_edge(resproj_idx, call_idx);
-          copy_dependencies = true;
-        } else {
-          // determine whether any arguments are returned
-          set_escape_state(call_idx, PointsToNode::ArgEscape);
-          bool ret_arg = false;
-          for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
-            const Type* at = d->field_at(i);
-            if (at->isa_oopptr() != NULL) {
-              Node *arg = call->in(i)->uncast();
-
-              if (call_analyzer->is_arg_returned(i - TypeFunc::Parms)) {
-                ret_arg = true;
-                PointsToNode *arg_esp = ptnode_adr(arg->_idx);
-                if (arg_esp->node_type() == PointsToNode::UnknownType)
-                  done = false;
-                else if (arg_esp->node_type() == PointsToNode::JavaObject)
-                  add_pointsto_edge(resproj_idx, arg->_idx);
-                else
-                  add_deferred_edge(resproj_idx, arg->_idx);
-              }
-            }
-          }
-          if (done) {
-            copy_dependencies = true;
-            // is_return_local() is true when only arguments are returned.
-            if (!ret_arg || !call_analyzer->is_return_local()) {
-              // Returns unknown object.
-              add_pointsto_edge(resproj_idx, _phantom_object);
-            }
-          }
-        }
-        if (copy_dependencies)
-          call_analyzer->copy_dependencies(_compile->dependencies());
-      }
-      if (done)
-        _processed.set(resproj_idx);
-      break;
-    }
-
-    default:
-    // Some other type of call, assume the worst case that the
-    // returned value, if any, globally escapes.
-    {
-      const TypeTuple *r = call->tf()->range();
-      if (r->cnt() > TypeFunc::Parms) {
-        const Type* ret_type = r->field_at(TypeFunc::Parms);
-
-        // Note:  we use isa_ptr() instead of isa_oopptr()  here because the
-        //        _multianewarray functions return a TypeRawPtr.
-        if (ret_type->isa_ptr() != NULL) {
-          set_escape_state(call_idx, PointsToNode::GlobalEscape);
-          add_pointsto_edge(resproj_idx, _phantom_object);
-        }
-      }
-      _processed.set(resproj_idx);
-    }
-  }
-}
-
-// Populate Connection Graph with Ideal nodes and create simple
-// connection graph edges (do not need to check the node_type of inputs
-// or to call PointsTo() to walk the connection graph).
-void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase) {
-  if (_processed.test(n->_idx))
-    return; // No need to redefine node's state.
-
-  if (n->is_Call()) {
-    // Arguments to allocation and locking don't escape.
-    if (n->is_Allocate()) {
-      add_node(n, PointsToNode::JavaObject, PointsToNode::UnknownEscape, true);
-      record_for_optimizer(n);
-    } else if (n->is_Lock() || n->is_Unlock()) {
-      // Put Lock and Unlock nodes on IGVN worklist to process them during
-      // the first IGVN optimization when escape information is still available.
-      record_for_optimizer(n);
-      _processed.set(n->_idx);
-    } else {
-      // Don't mark as processed since call's arguments have to be processed.
-      PointsToNode::NodeType nt = PointsToNode::UnknownType;
-      PointsToNode::EscapeState es = PointsToNode::UnknownEscape;
-
-      // Check if a call returns an object.
-      const TypeTuple *r = n->as_Call()->tf()->range();
-      if (r->cnt() > TypeFunc::Parms &&
-          r->field_at(TypeFunc::Parms)->isa_ptr() &&
-          n->as_Call()->proj_out(TypeFunc::Parms) != NULL) {
-        nt = PointsToNode::JavaObject;
-        if (!n->is_CallStaticJava()) {
-          // Since the called mathod is statically unknown assume
-          // the worst case that the returned value globally escapes.
-          es = PointsToNode::GlobalEscape;
-        }
-      }
-      add_node(n, nt, es, false);
-    }
-    return;
-  }
-
-  // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
-  // ThreadLocal has RawPrt type.
-  switch (n->Opcode()) {
-    case Op_AddP:
-    {
-      add_node(n, PointsToNode::Field, PointsToNode::UnknownEscape, false);
-      break;
-    }
-    case Op_CastX2P:
-    { // "Unsafe" memory access.
-      add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, true);
-      break;
-    }
-    case Op_CastPP:
-    case Op_CheckCastPP:
-    case Op_EncodeP:
-    case Op_DecodeN:
-    {
-      add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false);
-      int ti = n->in(1)->_idx;
-      PointsToNode::NodeType nt = ptnode_adr(ti)->node_type();
-      if (nt == PointsToNode::UnknownType) {
-        _delayed_worklist.push(n); // Process it later.
-        break;
-      } else if (nt == PointsToNode::JavaObject) {
-        add_pointsto_edge(n->_idx, ti);
-      } else {
-        add_deferred_edge(n->_idx, ti);
-      }
-      _processed.set(n->_idx);
-      break;
-    }
-    case Op_ConP:
-    {
-      // assume all pointer constants globally escape except for null
-      PointsToNode::EscapeState es;
-      if (phase->type(n) == TypePtr::NULL_PTR)
-        es = PointsToNode::NoEscape;
-      else
-        es = PointsToNode::GlobalEscape;
-
-      add_node(n, PointsToNode::JavaObject, es, true);
-      break;
-    }
-    case Op_ConN:
-    {
-      // assume all narrow oop constants globally escape except for null
-      PointsToNode::EscapeState es;
-      if (phase->type(n) == TypeNarrowOop::NULL_PTR)
-        es = PointsToNode::NoEscape;
-      else
-        es = PointsToNode::GlobalEscape;
-
-      add_node(n, PointsToNode::JavaObject, es, true);
-      break;
-    }
-    case Op_CreateEx:
-    {
-      // assume that all exception objects globally escape
-      add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, true);
-      break;
-    }
-    case Op_LoadKlass:
-    case Op_LoadNKlass:
-    {
-      add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, true);
-      break;
-    }
-    case Op_LoadP:
-    case Op_LoadN:
-    {
-      const Type *t = phase->type(n);
-      if (t->make_ptr() == NULL) {
-        _processed.set(n->_idx);
-        return;
-      }
-      add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false);
-      break;
-    }
-    case Op_Parm:
-    {
-      _processed.set(n->_idx); // No need to redefine it state.
-      uint con = n->as_Proj()->_con;
-      if (con < TypeFunc::Parms)
-        return;
-      const Type *t = n->in(0)->as_Start()->_domain->field_at(con);
-      if (t->isa_ptr() == NULL)
-        return;
-      // We have to assume all input parameters globally escape
-      // (Note: passing 'false' since _processed is already set).
-      add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, false);
-      break;
-    }
-    case Op_PartialSubtypeCheck:
-    { // Produces Null or notNull and is used in CmpP.
-      add_node(n, PointsToNode::JavaObject, PointsToNode::ArgEscape, true);
-      break;
-    }
-    case Op_Phi:
-    {
-      const Type *t = n->as_Phi()->type();
-      if (t->make_ptr() == NULL) {
-        // nothing to do if not an oop or narrow oop
-        _processed.set(n->_idx);
-        return;
-      }
-      add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false);
-      uint i;
-      for (i = 1; i < n->req() ; i++) {
-        Node* in = n->in(i);
-        if (in == NULL)
-          continue;  // ignore NULL
-        in = in->uncast();
-        if (in->is_top() || in == n)
-          continue;  // ignore top or inputs which go back this node
-        int ti = in->_idx;
-        PointsToNode::NodeType nt = ptnode_adr(ti)->node_type();
-        if (nt == PointsToNode::UnknownType) {
-          break;
-        } else if (nt == PointsToNode::JavaObject) {
-          add_pointsto_edge(n->_idx, ti);
-        } else {
-          add_deferred_edge(n->_idx, ti);
-        }
-      }
-      if (i >= n->req())
-        _processed.set(n->_idx);
-      else
-        _delayed_worklist.push(n);
-      break;
-    }
-    case Op_Proj:
-    {
-      // we are only interested in the oop result projection from a call
-      if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) {
-        const TypeTuple *r = n->in(0)->as_Call()->tf()->range();
-        assert(r->cnt() > TypeFunc::Parms, "sanity");
-        if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) {
-          add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false);
-          int ti = n->in(0)->_idx;
-          // The call may not be registered yet (since not all its inputs are registered)
-          // if this is the projection from backbranch edge of Phi.
-          if (ptnode_adr(ti)->node_type() != PointsToNode::UnknownType) {
-            process_call_result(n->as_Proj(), phase);
-          }
-          if (!_processed.test(n->_idx)) {
-            // The call's result may need to be processed later if the call
-            // returns it's argument and the argument is not processed yet.
-            _delayed_worklist.push(n);
-          }
-          break;
-        }
-      }
-      _processed.set(n->_idx);
-      break;
-    }
-    case Op_Return:
-    {
-      if( n->req() > TypeFunc::Parms &&
-          phase->type(n->in(TypeFunc::Parms))->isa_oopptr() ) {
-        // Treat Return value as LocalVar with GlobalEscape escape state.
-        add_node(n, PointsToNode::LocalVar, PointsToNode::GlobalEscape, false);
-        int ti = n->in(TypeFunc::Parms)->_idx;
-        PointsToNode::NodeType nt = ptnode_adr(ti)->node_type();
-        if (nt == PointsToNode::UnknownType) {
-          _delayed_worklist.push(n); // Process it later.
-          break;
-        } else if (nt == PointsToNode::JavaObject) {
-          add_pointsto_edge(n->_idx, ti);
-        } else {
-          add_deferred_edge(n->_idx, ti);
-        }
-      }
-      _processed.set(n->_idx);
-      break;
-    }
-    case Op_StoreP:
-    case Op_StoreN:
-    {
-      const Type *adr_type = phase->type(n->in(MemNode::Address));
-      adr_type = adr_type->make_ptr();
-      if (adr_type->isa_oopptr()) {
-        add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false);
-      } else {
-        Node* adr = n->in(MemNode::Address);
-        if (adr->is_AddP() && phase->type(adr) == TypeRawPtr::NOTNULL &&
-            adr->in(AddPNode::Address)->is_Proj() &&
-            adr->in(AddPNode::Address)->in(0)->is_Allocate()) {
-          add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false);
-          // We are computing a raw address for a store captured
-          // by an Initialize compute an appropriate address type.
-          int offs = (int)phase->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
-          assert(offs != Type::OffsetBot, "offset must be a constant");
-        } else {
-          _processed.set(n->_idx);
-          return;
-        }
-      }
-      break;
-    }
-    case Op_StorePConditional:
-    case Op_CompareAndSwapP:
-    case Op_CompareAndSwapN:
-    {
-      const Type *adr_type = phase->type(n->in(MemNode::Address));
-      adr_type = adr_type->make_ptr();
-      if (adr_type->isa_oopptr()) {
-        add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false);
-      } else {
-        _processed.set(n->_idx);
-        return;
-      }
-      break;
-    }
-    case Op_AryEq:
-    case Op_StrComp:
-    case Op_StrEquals:
-    case Op_StrIndexOf:
-    {
-      // char[] arrays passed to string intrinsics are not scalar replaceable.
-      add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false);
-      break;
-    }
-    case Op_ThreadLocal:
-    {
-      add_node(n, PointsToNode::JavaObject, PointsToNode::ArgEscape, true);
-      break;
-    }
-    default:
-      ;
-      // nothing to do
-  }
-  return;
-}
-
-void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) {
-  uint n_idx = n->_idx;
-  assert(ptnode_adr(n_idx)->_node != NULL, "node should be registered");
-
-  // Don't set processed bit for AddP, LoadP, StoreP since
-  // they may need more then one pass to process.
-  // Also don't mark as processed Call nodes since their
-  // arguments may need more then one pass to process.
-  if (_processed.test(n_idx))
-    return; // No need to redefine node's state.
-
-  if (n->is_Call()) {
-    CallNode *call = n->as_Call();
-    process_call_arguments(call, phase);
-    return;
-  }
-
-  switch (n->Opcode()) {
-    case Op_AddP:
-    {
-      Node *base = get_addp_base(n);
-      int offset = address_offset(n, phase);
-      // Create a field edge to this node from everything base could point to.
-      for( VectorSetI i(PointsTo(base)); i.test(); ++i ) {
-        uint pt = i.elem;
-        add_field_edge(pt, n_idx, offset);
-      }
-      break;
-    }
-    case Op_CastX2P:
-    {
-      assert(false, "Op_CastX2P");
-      break;
-    }
-    case Op_CastPP:
-    case Op_CheckCastPP:
-    case Op_EncodeP:
-    case Op_DecodeN:
-    {
-      int ti = n->in(1)->_idx;
-      assert(ptnode_adr(ti)->node_type() != PointsToNode::UnknownType, "all nodes should be registered");
-      if (ptnode_adr(ti)->node_type() == PointsToNode::JavaObject) {
-        add_pointsto_edge(n_idx, ti);
-      } else {
-        add_deferred_edge(n_idx, ti);
-      }
-      _processed.set(n_idx);
-      break;
-    }
-    case Op_ConP:
-    {
-      assert(false, "Op_ConP");
-      break;
-    }
-    case Op_ConN:
-    {
-      assert(false, "Op_ConN");
-      break;
-    }
-    case Op_CreateEx:
-    {
-      assert(false, "Op_CreateEx");
-      break;
-    }
-    case Op_LoadKlass:
-    case Op_LoadNKlass:
-    {
-      assert(false, "Op_LoadKlass");
-      break;
-    }
-    case Op_LoadP:
-    case Op_LoadN:
-    {
-      const Type *t = phase->type(n);
-#ifdef ASSERT
-      if (t->make_ptr() == NULL)
-        assert(false, "Op_LoadP");
-#endif
-
-      Node* adr = n->in(MemNode::Address)->uncast();
-      Node* adr_base;
-      if (adr->is_AddP()) {
-        adr_base = get_addp_base(adr);
-      } else {
-        adr_base = adr;
-      }
-
-      // For everything "adr_base" could point to, create a deferred edge from
-      // this node to each field with the same offset.
-      int offset = address_offset(adr, phase);
-      for( VectorSetI i(PointsTo(adr_base)); i.test(); ++i ) {
-        uint pt = i.elem;
-        if (adr->is_AddP()) {
-          // Add field edge if it is missing.
-          add_field_edge(pt, adr->_idx, offset);
-        }
-        add_deferred_edge_to_fields(n_idx, pt, offset);
-      }
-      break;
-    }
-    case Op_Parm:
-    {
-      assert(false, "Op_Parm");
-      break;
-    }
-    case Op_PartialSubtypeCheck:
-    {
-      assert(false, "Op_PartialSubtypeCheck");
-      break;
-    }
-    case Op_Phi:
-    {
-#ifdef ASSERT
-      const Type *t = n->as_Phi()->type();
-      if (t->make_ptr() == NULL)
-        assert(false, "Op_Phi");
-#endif
-      for (uint i = 1; i < n->req() ; i++) {
-        Node* in = n->in(i);
-        if (in == NULL)
-          continue;  // ignore NULL
-        in = in->uncast();
-        if (in->is_top() || in == n)
-          continue;  // ignore top or inputs which go back this node
-        int ti = in->_idx;
-        PointsToNode::NodeType nt = ptnode_adr(ti)->node_type();
-        assert(nt != PointsToNode::UnknownType, "all nodes should be known");
-        if (nt == PointsToNode::JavaObject) {
-          add_pointsto_edge(n_idx, ti);
-        } else {
-          add_deferred_edge(n_idx, ti);
-        }
-      }
-      _processed.set(n_idx);
-      break;
-    }
-    case Op_Proj:
-    {
-      // we are only interested in the oop result projection from a call
-      if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) {
-        assert(ptnode_adr(n->in(0)->_idx)->node_type() != PointsToNode::UnknownType,
-               "all nodes should be registered");
-        const TypeTuple *r = n->in(0)->as_Call()->tf()->range();
-        assert(r->cnt() > TypeFunc::Parms, "sanity");
-        if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) {
-          process_call_result(n->as_Proj(), phase);
-          assert(_processed.test(n_idx), "all call results should be processed");
-          break;
-        }
-      }
-      assert(false, "Op_Proj");
-      break;
-    }
-    case Op_Return:
-    {
-#ifdef ASSERT
-      if( n->req() <= TypeFunc::Parms ||
-          !phase->type(n->in(TypeFunc::Parms))->isa_oopptr() ) {
-        assert(false, "Op_Return");
-      }
-#endif
-      int ti = n->in(TypeFunc::Parms)->_idx;
-      assert(ptnode_adr(ti)->node_type() != PointsToNode::UnknownType, "node should be registered");
-      if (ptnode_adr(ti)->node_type() == PointsToNode::JavaObject) {
-        add_pointsto_edge(n_idx, ti);
-      } else {
-        add_deferred_edge(n_idx, ti);
-      }
-      _processed.set(n_idx);
-      break;
-    }
-    case Op_StoreP:
-    case Op_StoreN:
-    case Op_StorePConditional:
-    case Op_CompareAndSwapP:
-    case Op_CompareAndSwapN:
-    {
-      Node *adr = n->in(MemNode::Address);
-      const Type *adr_type = phase->type(adr)->make_ptr();
-#ifdef ASSERT
-      if (!adr_type->isa_oopptr())
-        assert(phase->type(adr) == TypeRawPtr::NOTNULL, "Op_StoreP");
-#endif
-
-      assert(adr->is_AddP(), "expecting an AddP");
-      Node *adr_base = get_addp_base(adr);
-      Node *val = n->in(MemNode::ValueIn)->uncast();
-      int offset = address_offset(adr, phase);
-      // For everything "adr_base" could point to, create a deferred edge
-      // to "val" from each field with the same offset.
-      for( VectorSetI i(PointsTo(adr_base)); i.test(); ++i ) {
-        uint pt = i.elem;
-        // Add field edge if it is missing.
-        add_field_edge(pt, adr->_idx, offset);
-        add_edge_from_fields(pt, val->_idx, offset);
-      }
-      break;
-    }
-    case Op_AryEq:
-    case Op_StrComp:
-    case Op_StrEquals:
-    case Op_StrIndexOf:
-    {
-      // char[] arrays passed to string intrinsic do not escape but
-      // they are not scalar replaceable. Adjust escape state for them.
-      // Start from in(2) edge since in(1) is memory edge.
-      for (uint i = 2; i < n->req(); i++) {
-        Node* adr = n->in(i)->uncast();
-        const Type *at = phase->type(adr);
-        if (!adr->is_top() && at->isa_ptr()) {
-          assert(at == Type::TOP || at == TypePtr::NULL_PTR ||
-                 at->isa_ptr() != NULL, "expecting an Ptr");
-          if (adr->is_AddP()) {
-            adr = get_addp_base(adr);
-          }
-          // Mark as ArgEscape everything "adr" could point to.
-          set_escape_state(adr->_idx, PointsToNode::ArgEscape);
-        }
-      }
-      _processed.set(n_idx);
-      break;
-    }
-    case Op_ThreadLocal:
-    {
-      assert(false, "Op_ThreadLocal");
-      break;
-    }
-    default:
-      // This method should be called only for EA specific nodes.
-      ShouldNotReachHere();
-  }
-}
-
-#ifndef PRODUCT
-void ConnectionGraph::dump() {
+void ConnectionGraph::dump(GrowableArray<PointsToNode*>& ptnodes_worklist) {
   bool first = true;
-
-  uint size = nodes_size();
-  for (uint ni = 0; ni < size; ni++) {
-    PointsToNode *ptn = ptnode_adr(ni);
-    PointsToNode::NodeType ptn_type = ptn->node_type();
-
-    if (ptn_type != PointsToNode::JavaObject || ptn->_node == NULL)
+  int ptnodes_length = ptnodes_worklist.length();
+  for (int i = 0; i < ptnodes_length; i++) {
+    PointsToNode *ptn = ptnodes_worklist.at(i);
+    if (ptn == NULL || !ptn->is_JavaObject())
       continue;
-    PointsToNode::EscapeState es = escape_state(ptn->_node);
-    if (ptn->_node->is_Allocate() && (es == PointsToNode::NoEscape || Verbose)) {
+    PointsToNode::EscapeState es = ptn->escape_state();
+    if (ptn->ideal_node()->is_Allocate() && (es == PointsToNode::NoEscape || Verbose)) {
       if (first) {
         tty->cr();
         tty->print("======== Connection graph for ");
@@ -3114,22 +3147,14 @@
         tty->cr();
         first = false;
       }
-      tty->print("%6d ", ni);
       ptn->dump();
-      // Print all locals which reference this allocation
-      for (uint li = ni; li < size; li++) {
-        PointsToNode *ptn_loc = ptnode_adr(li);
-        PointsToNode::NodeType ptn_loc_type = ptn_loc->node_type();
-        if ( ptn_loc_type == PointsToNode::LocalVar && ptn_loc->_node != NULL &&
-             ptn_loc->edge_count() == 1 && ptn_loc->edge_target(0) == ni ) {
-          ptnode_adr(li)->dump(false);
-        }
-      }
-      if (Verbose) {
-        // Print all fields which reference this allocation
-        for (uint i = 0; i < ptn->edge_count(); i++) {
-          uint ei = ptn->edge_target(i);
-          ptnode_adr(ei)->dump(false);
+      // Print all locals and fields which reference this allocation
+      for (UseIterator j(ptn); j.has_next(); j.next()) {
+        PointsToNode* use = j.get();
+        if (use->is_LocalVar()) {
+          use->dump(Verbose);
+        } else if (Verbose) {
+          use->dump();
         }
       }
       tty->cr();
--- a/src/share/vm/opto/escape.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/escape.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,18 +115,36 @@
 class  CallNode;
 class  PhiNode;
 class  PhaseTransform;
+class  PointsToNode;
 class  Type;
 class  TypePtr;
 class  VectorSet;
 
-class PointsToNode {
-friend class ConnectionGraph;
+class JavaObjectNode;
+class LocalVarNode;
+class FieldNode;
+class ArraycopyNode;
+
+// ConnectionGraph nodes
+class PointsToNode : public ResourceObj {
+  GrowableArray<PointsToNode*> _edges; // List of nodes this node points to
+  GrowableArray<PointsToNode*> _uses;  // List of nodes which point to this node
+
+  const u1           _type;  // NodeType
+  u1                _flags;  // NodeFlags
+  u1               _escape;  // EscapeState of object
+  u1        _fields_escape;  // EscapeState of object's fields
+
+  Node* const        _node;  // Ideal node corresponding to this PointsTo node.
+  const int           _idx;  // Cached ideal node's _idx
+
 public:
   typedef enum {
     UnknownType = 0,
     JavaObject  = 1,
     LocalVar    = 2,
-    Field       = 3
+    Field       = 3,
+    Arraycopy   = 4
   } NodeType;
 
   typedef enum {
@@ -140,178 +158,385 @@
   } EscapeState;
 
   typedef enum {
-    UnknownEdge   = 0,
-    PointsToEdge  = 1,
-    DeferredEdge  = 2,
-    FieldEdge     = 3
-  } EdgeType;
-
-private:
-  enum {
-    EdgeMask = 3,
-    EdgeShift = 2,
-
-    INITIAL_EDGE_COUNT = 4
-  };
-
-  NodeType             _type;
-  EscapeState          _escape;
-  GrowableArray<uint>* _edges; // outgoing edges
-  Node* _node;                 // Ideal node corresponding to this PointsTo node.
-  int   _offset;               // Object fields offsets.
-  bool  _scalar_replaceable;   // Not escaped object could be replaced with scalar
-  bool  _has_unknown_ptr;      // Has edge to phantom_object
-
-public:
-  PointsToNode():
-    _type(UnknownType),
-    _escape(UnknownEscape),
-    _edges(NULL),
-    _node(NULL),
-    _offset(-1),
-    _has_unknown_ptr(false),
-    _scalar_replaceable(true) {}
+    ScalarReplaceable = 1,  // Not escaped object could be replaced with scalar
+    PointsToUnknown   = 2,  // Has edge to phantom_object
+    ArraycopySrc      = 4,  // Has edge from Arraycopy node
+    ArraycopyDst      = 8   // Has edge to Arraycopy node
+  } NodeFlags;
 
 
-  EscapeState escape_state() const { return _escape; }
-  NodeType node_type() const { return _type;}
-  int offset() { return _offset;}
-  bool scalar_replaceable() { return _scalar_replaceable;}
-  bool has_unknown_ptr()    { return _has_unknown_ptr;}
-
-  void set_offset(int offs) { _offset = offs;}
-  void set_escape_state(EscapeState state) { _escape = state; }
-  void set_node_type(NodeType ntype) {
-    assert(_type == UnknownType || _type == ntype, "Can't change node type");
-    _type = ntype;
-  }
-  void set_scalar_replaceable(bool v) { _scalar_replaceable = v; }
-  void set_has_unknown_ptr()          { _has_unknown_ptr = true; }
-
-  // count of outgoing edges
-  uint edge_count() const { return (_edges == NULL) ? 0 : _edges->length(); }
-
-  // node index of target of outgoing edge "e"
-  uint edge_target(uint e) const {
-    assert(_edges != NULL, "valid edge index");
-    return (_edges->at(e) >> EdgeShift);
-  }
-  // type of outgoing edge "e"
-  EdgeType edge_type(uint e) const {
-    assert(_edges != NULL, "valid edge index");
-    return (EdgeType) (_edges->at(e) & EdgeMask);
+  PointsToNode(Compile *C, Node* n, EscapeState es, NodeType type):
+    _edges(C->comp_arena(), 2, 0, NULL),
+    _uses (C->comp_arena(), 2, 0, NULL),
+    _node(n),
+    _idx(n->_idx),
+    _type((u1)type),
+    _escape((u1)es),
+    _fields_escape((u1)es),
+    _flags(ScalarReplaceable) {
+    assert(n != NULL && es != UnknownEscape, "sanity");
   }
 
-  // add a edge of the specified type pointing to the specified target
-  void add_edge(uint targIdx, EdgeType et);
+  Node* ideal_node()   const { return _node; }
+  int          idx()   const { return _idx; }
+
+  bool is_JavaObject() const { return _type == (u1)JavaObject; }
+  bool is_LocalVar()   const { return _type == (u1)LocalVar; }
+  bool is_Field()      const { return _type == (u1)Field; }
+  bool is_Arraycopy()  const { return _type == (u1)Arraycopy; }
+
+  JavaObjectNode* as_JavaObject() { assert(is_JavaObject(),""); return (JavaObjectNode*)this; }
+  LocalVarNode*   as_LocalVar()   { assert(is_LocalVar(),"");   return (LocalVarNode*)this; }
+  FieldNode*      as_Field()      { assert(is_Field(),"");      return (FieldNode*)this; }
+  ArraycopyNode*  as_Arraycopy()  { assert(is_Arraycopy(),"");  return (ArraycopyNode*)this; }
+
+  EscapeState escape_state() const { return (EscapeState)_escape; }
+  void    set_escape_state(EscapeState state) { _escape = (u1)state; }
+
+  EscapeState fields_escape_state() const { return (EscapeState)_fields_escape; }
+  void    set_fields_escape_state(EscapeState state) { _fields_escape = (u1)state; }
+
+  bool     has_unknown_ptr() const { return (_flags & PointsToUnknown) != 0; }
+  void set_has_unknown_ptr()       { _flags |= PointsToUnknown; }
+
+  bool     arraycopy_src() const { return (_flags & ArraycopySrc) != 0; }
+  void set_arraycopy_src()       { _flags |= ArraycopySrc; }
+  bool     arraycopy_dst() const { return (_flags & ArraycopyDst) != 0; }
+  void set_arraycopy_dst()       { _flags |= ArraycopyDst; }
 
-  // remove an edge of the specified type pointing to the specified target
-  void remove_edge(uint targIdx, EdgeType et);
+  bool     scalar_replaceable() const { return (_flags & ScalarReplaceable) != 0;}
+  void set_scalar_replaceable(bool v) {
+    if (v)
+      _flags |= ScalarReplaceable;
+    else
+      _flags &= ~ScalarReplaceable;
+  }
+
+  int edge_count()              const { return _edges.length(); }
+  PointsToNode* edge(int e)     const { return _edges.at(e); }
+  bool add_edge(PointsToNode* edge)    { return _edges.append_if_missing(edge); }
+
+  int use_count()             const { return _uses.length(); }
+  PointsToNode* use(int e)    const { return _uses.at(e); }
+  bool add_use(PointsToNode* use)    { return _uses.append_if_missing(use); }
+
+  // Mark base edge use to distinguish from stored value edge.
+  bool add_base_use(FieldNode* use) { return _uses.append_if_missing((PointsToNode*)((intptr_t)use + 1)); }
+  static bool is_base_use(PointsToNode* use) { return (((intptr_t)use) & 1); }
+  static PointsToNode* get_use_node(PointsToNode* use) { return (PointsToNode*)(((intptr_t)use) & ~1); }
+
+  // Return true if this node points to specified node or nodes it points to.
+  bool points_to(JavaObjectNode* ptn) const;
+
+  // Return true if this node points only to non-escaping allocations.
+  bool non_escaping_allocation();
+
+  // Return true if one node points to an other.
+  bool meet(PointsToNode* ptn);
 
 #ifndef PRODUCT
+  NodeType node_type() const { return (NodeType)_type;}
   void dump(bool print_state=true) const;
 #endif
 
 };
 
+class LocalVarNode: public PointsToNode {
+public:
+  LocalVarNode(Compile *C, Node* n, EscapeState es):
+    PointsToNode(C, n, es, LocalVar) {}
+};
+
+class JavaObjectNode: public PointsToNode {
+public:
+  JavaObjectNode(Compile *C, Node* n, EscapeState es):
+    PointsToNode(C, n, es, JavaObject) {
+      if (es > NoEscape)
+        set_scalar_replaceable(false);
+    }
+};
+
+class FieldNode: public PointsToNode {
+  GrowableArray<PointsToNode*> _bases; // List of JavaObject nodes which point to this node
+  const int   _offset; // Field's offset.
+  const bool  _is_oop; // Field points to object
+        bool  _has_unknown_base; // Has phantom_object base
+public:
+  FieldNode(Compile *C, Node* n, EscapeState es, int offs, bool is_oop):
+    PointsToNode(C, n, es, Field),
+    _offset(offs), _is_oop(is_oop),
+    _has_unknown_base(false) {}
+
+  int      offset()              const { return _offset;}
+  bool     is_oop()              const { return _is_oop;}
+  bool     has_unknown_base()    const { return _has_unknown_base; }
+  void set_has_unknown_base()          { _has_unknown_base = true; }
+
+  int base_count()              const { return _bases.length(); }
+  PointsToNode* base(int e)     const { return _bases.at(e); }
+  bool add_base(PointsToNode* base)    { return _bases.append_if_missing(base); }
+#ifdef ASSERT
+  // Return true if bases points to this java object.
+  bool has_base(JavaObjectNode* ptn) const;
+#endif
+
+};
+
+class ArraycopyNode: public PointsToNode {
+public:
+  ArraycopyNode(Compile *C, Node* n, EscapeState es):
+    PointsToNode(C, n, es, Arraycopy) {}
+};
+
+// Iterators for PointsTo node's edges:
+//   for (EdgeIterator i(n); i.has_next(); i.next()) {
+//     PointsToNode* u = i.get();
+class PointsToIterator: public StackObj {
+protected:
+  const PointsToNode* node;
+  const int cnt;
+  int i;
+public:
+  inline PointsToIterator(const PointsToNode* n, int cnt) : node(n), cnt(cnt), i(0) { }
+  inline bool has_next() const { return i < cnt; }
+  inline void next() { i++; }
+  PointsToNode* get() const { ShouldNotCallThis(); return NULL; }
+};
+
+class EdgeIterator: public PointsToIterator {
+public:
+  inline EdgeIterator(const PointsToNode* n) : PointsToIterator(n, n->edge_count()) { }
+  inline PointsToNode* get() const { return node->edge(i); }
+};
+
+class UseIterator: public PointsToIterator {
+public:
+  inline UseIterator(const PointsToNode* n) : PointsToIterator(n, n->use_count()) { }
+  inline PointsToNode* get() const { return node->use(i); }
+};
+
+class BaseIterator: public PointsToIterator {
+public:
+  inline BaseIterator(const FieldNode* n) : PointsToIterator(n, n->base_count()) { }
+  inline PointsToNode* get() const { return ((PointsToNode*)node)->as_Field()->base(i); }
+};
+
+
 class ConnectionGraph: public ResourceObj {
 private:
-  GrowableArray<PointsToNode>  _nodes; // Connection graph nodes indexed
-                                       // by ideal node index.
-
-  Unique_Node_List  _delayed_worklist; // Nodes to be processed before
-                                       // the call build_connection_graph().
+  GrowableArray<PointsToNode*>  _nodes; // Map from ideal nodes to
+                                        // ConnectionGraph nodes.
 
-  GrowableArray<MergeMemNode *>  _mergemem_worklist; // List of all MergeMem nodes
+  GrowableArray<PointsToNode*>  _worklist; // Nodes to be processed
 
-  VectorSet                _processed; // Records which nodes have been
-                                       // processed.
-
-  bool                    _collecting; // Indicates whether escape information
-                                       // is still being collected. If false,
-                                       // no new nodes will be processed.
+  bool            _collecting; // Indicates whether escape information
+                               // is still being collected. If false,
+                               // no new nodes will be processed.
 
-  bool                    _progress;   // Indicates whether new Graph's edges
-                                       // were created.
+  bool               _verify;  // verify graph
 
-  uint                _phantom_object; // Index of globally escaping object
-                                       // that pointer values loaded from
-                                       // a field which has not been set
-                                       // are assumed to point to.
-  uint                      _oop_null; // ConP(#NULL)->_idx
-  uint                     _noop_null; // ConN(#NULL)->_idx
-  Node*                     _pcmp_neq; // ConI(#CC_GT)
-  Node*                      _pcmp_eq; // ConI(#CC_EQ)
+  JavaObjectNode* phantom_obj; // Unknown object
+  JavaObjectNode*    null_obj;
+  Node*             _pcmp_neq; // ConI(#CC_GT)
+  Node*              _pcmp_eq; // ConI(#CC_EQ)
 
-  Compile *                  _compile; // Compile object for current compilation
-  PhaseIterGVN *                _igvn; // Value numbering
+  Compile*           _compile; // Compile object for current compilation
+  PhaseIterGVN*         _igvn; // Value numbering
+
+  Unique_Node_List ideal_nodes; // Used by CG construction and types splitting.
 
   // Address of an element in _nodes.  Used when the element is to be modified
-  PointsToNode *ptnode_adr(uint idx) const {
+  PointsToNode* ptnode_adr(int idx) const {
     // There should be no new ideal nodes during ConnectionGraph build,
-    // growableArray::adr_at() will throw assert otherwise.
-    return _nodes.adr_at(idx);
+    // growableArray::at() will throw assert otherwise.
+    return _nodes.at(idx);
   }
   uint nodes_size() const { return _nodes.length(); }
 
-  bool is_null_ptr(uint idx) const { return (idx == _noop_null || idx == _oop_null); }
+  // Add nodes to ConnectionGraph.
+  void add_local_var(Node* n, PointsToNode::EscapeState es);
+  void add_java_object(Node* n, PointsToNode::EscapeState es);
+  void add_field(Node* n, PointsToNode::EscapeState es, int offset);
+  void add_arraycopy(Node* n, PointsToNode::EscapeState es, PointsToNode* src, PointsToNode* dst);
+
+  // Compute the escape state for arguments to a call.
+  void process_call_arguments(CallNode *call);
+
+  // Add PointsToNode node corresponding to a call
+  void add_call_node(CallNode* call);
+
+  // Map ideal node to existing PointsTo node (usually phantom_object).
+  void map_ideal_node(Node *n, PointsToNode* ptn) {
+    assert(ptn != NULL, "only existing PointsTo node");
+    _nodes.at_put(n->_idx, ptn);
+  }
+
+  // Create PointsToNode node and add it to Connection Graph.
+  void add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist);
+
+  // Add final simple edges to graph.
+  void add_final_edges(Node *n);
+
+  // Finish Graph construction.
+  bool complete_connection_graph(GrowableArray<PointsToNode*>&   ptnodes_worklist,
+                                 GrowableArray<JavaObjectNode*>& non_escaped_worklist,
+                                 GrowableArray<JavaObjectNode*>& java_objects_worklist,
+                                 GrowableArray<FieldNode*>&      oop_fields_worklist);
+
+#ifdef ASSERT
+  void verify_connection_graph(GrowableArray<PointsToNode*>&   ptnodes_worklist,
+                               GrowableArray<JavaObjectNode*>& non_escaped_worklist,
+                               GrowableArray<JavaObjectNode*>& java_objects_worklist,
+                               GrowableArray<Node*>& addp_worklist);
+#endif
+
+  // Add all references to this JavaObject node.
+  int add_java_object_edges(JavaObjectNode* jobj, bool populate_worklist);
+
+  // Put node on worklist if it is (or was) not there.
+  void add_to_worklist(PointsToNode* pt) {
+    _worklist.push(pt);
+    return;
+  }
+
+  // Put on worklist all uses of this node.
+  void add_uses_to_worklist(PointsToNode* pt) {
+    for (UseIterator i(pt); i.has_next(); i.next())
+      _worklist.push(i.get());
+  }
+
+  // Put on worklist all field's uses and related field nodes.
+  void add_field_uses_to_worklist(FieldNode* field);
+
+  // Put on worklist all related field nodes.
+  void add_fields_to_worklist(FieldNode* field, PointsToNode* base);
+
+  // Find fields which have unknown value.
+  int find_field_value(FieldNode* field);
+
+  // Find fields initializing values for allocations.
+  int find_init_values(JavaObjectNode* ptn, PointsToNode* init_val, PhaseTransform* phase);
+
+  // Set the escape state of an object and its fields.
+  void set_escape_state(PointsToNode* ptn, PointsToNode::EscapeState esc) {
+    // Don't change non-escaping state of NULL pointer.
+    if (ptn != null_obj) {
+      if (ptn->escape_state() < esc)
+        ptn->set_escape_state(esc);
+      if (ptn->fields_escape_state() < esc)
+        ptn->set_fields_escape_state(esc);
+    }
+  }
+  void set_fields_escape_state(PointsToNode* ptn, PointsToNode::EscapeState esc) {
+    // Don't change non-escaping state of NULL pointer.
+    if (ptn != null_obj) {
+      if (ptn->fields_escape_state() < esc)
+        ptn->set_fields_escape_state(esc);
+    }
+  }
 
-  // Add node to ConnectionGraph.
-  void add_node(Node *n, PointsToNode::NodeType nt, PointsToNode::EscapeState es, bool done);
+  // Propagate GlobalEscape and ArgEscape escape states to all nodes
+  // and check that we still have non-escaping java objects.
+  bool find_non_escaped_objects(GrowableArray<PointsToNode*>& ptnodes_worklist,
+                                GrowableArray<JavaObjectNode*>& non_escaped_worklist);
+
+  // Adjust scalar_replaceable state after Connection Graph is built.
+  void adjust_scalar_replaceable_state(JavaObjectNode* jobj);
+
+  // Optimize ideal graph.
+  void optimize_ideal_graph(GrowableArray<Node*>& ptr_cmp_worklist,
+                            GrowableArray<Node*>& storestore_worklist);
+  // Optimize objects compare.
+  Node* optimize_ptr_compare(Node* n);
+
+  // Returns unique corresponding java object or NULL.
+  JavaObjectNode* unique_java_object(Node *n);
+
+  // Add an edge of the specified type pointing to the specified target.
+  bool add_edge(PointsToNode* from, PointsToNode* to) {
+    assert(!from->is_Field() || from->as_Field()->is_oop(), "sanity");
+
+    if (to == phantom_obj) {
+      if (from->has_unknown_ptr()) {
+        return false; // already points to phantom_obj
+      }
+      from->set_has_unknown_ptr();
+    }
+
+    bool is_new = from->add_edge(to);
+    assert(to != phantom_obj || is_new, "sanity");
+    if (is_new) { // New edge?
+      assert(!_verify, "graph is incomplete");
+      is_new = to->add_use(from);
+      assert(is_new, "use should be also new");
+    }
+    return is_new;
+  }
 
+  // Add an edge from Field node to its base and back.
+  bool add_base(FieldNode* from, PointsToNode* to) {
+    assert(!to->is_Arraycopy(), "sanity");
+    if (to == phantom_obj) {
+      if (from->has_unknown_base()) {
+        return false; // already has phantom_obj base
+      }
+      from->set_has_unknown_base();
+    }
+    bool is_new = from->add_base(to);
+    assert(to != phantom_obj || is_new, "sanity");
+    if (is_new) {      // New edge?
+      assert(!_verify, "graph is incomplete");
+      if (to == null_obj)
+        return is_new; // Don't add fields to NULL pointer.
+      if (to->is_JavaObject()) {
+        is_new = to->add_edge(from);
+      } else {
+        is_new = to->add_base_use(from);
+      }
+      assert(is_new, "use should be also new");
+    }
+    return is_new;
+  }
+
+  // Add LocalVar node and edge if possible
+  void add_local_var_and_edge(Node* n, PointsToNode::EscapeState es, Node* to,
+                              Unique_Node_List *delayed_worklist) {
+    PointsToNode* ptn = ptnode_adr(to->_idx);
+    if (delayed_worklist != NULL) { // First iteration of CG construction
+      add_local_var(n, es);
+      if (ptn == NULL) {
+        delayed_worklist->push(n);
+        return; // Process it later.
+      }
+    } else {
+      assert(ptn != NULL, "node should be registered");
+    }
+    add_edge(ptnode_adr(n->_idx), ptn);
+ }
+  // Helper functions
+  bool   is_oop_field(Node* n, int offset, bool* unsafe);
+ static Node* get_addp_base(Node *addp);
+ static Node* find_second_addp(Node* addp, Node* n);
   // offset of a field reference
   int address_offset(Node* adr, PhaseTransform *phase);
 
-  // compute the escape state for arguments to a call
-  void process_call_arguments(CallNode *call, PhaseTransform *phase);
 
-  // compute the escape state for the return value of a call
-  void process_call_result(ProjNode *resproj, PhaseTransform *phase);
-
-  // Populate Connection Graph with Ideal nodes.
-  void record_for_escape_analysis(Node *n, PhaseTransform *phase);
-
-  // Build Connection Graph and set nodes escape state.
-  void build_connection_graph(Node *n, PhaseTransform *phase);
-
-  // walk the connection graph starting at the node corresponding to "n" and
-  // add the index of everything it could point to, to "ptset".  This may cause
-  // Phi's encountered to get (re)processed  (which requires "phase".)
-  VectorSet* PointsTo(Node * n);
-
-  // Reused structures for PointsTo().
-  VectorSet            pt_ptset;
-  VectorSet            pt_visited;
-  GrowableArray<uint>  pt_worklist;
+  // Propagate unique types created for unescaped allocated objects
+  // through the graph
+  void split_unique_types(GrowableArray<Node *>  &alloc_worklist);
 
-  //  Edge manipulation.  The "from_i" and "to_i" arguments are the
-  //  node indices of the source and destination of the edge
-  void add_pointsto_edge(uint from_i, uint to_i);
-  void add_deferred_edge(uint from_i, uint to_i);
-  void add_field_edge(uint from_i, uint to_i, int offs);
+  // Helper methods for unique types split.
+  bool split_AddP(Node *addp, Node *base);
 
-  // Add an edge of the specified type pointing to the specified target.
-  // Set _progress if new edge is added.
-  void add_edge(PointsToNode *f, uint to_i, PointsToNode::EdgeType et) {
-    uint e_cnt = f->edge_count();
-    f->add_edge(to_i, et);
-    _progress |= (f->edge_count() != e_cnt);
-  }
+  PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, bool &new_created);
+  PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist);
 
-  // Add an edge to node given by "to_i" from any field of adr_i whose offset
-  // matches "offset"  A deferred edge is added if to_i is a LocalVar, and
-  // a pointsto edge is added if it is a JavaObject
-  void add_edge_from_fields(uint adr, uint to_i, int offs);
-
-  // Add a deferred  edge from node given by "from_i" to any field
-  // of adr_i whose offset matches "offset"
-  void add_deferred_edge_to_fields(uint from_i, uint adr, int offs);
+  void  move_inst_mem(Node* n, GrowableArray<PhiNode *>  &orig_phis);
+  Node* find_inst_mem(Node* mem, int alias_idx,GrowableArray<PhiNode *>  &orig_phi_worklist);
+  Node* step_through_mergemem(MergeMemNode *mmem, int alias_idx, const TypeOopPtr *toop);
 
 
-  // Remove outgoing deferred edges from the node referenced by "ni".
-  // Any outgoing edges from the target of the deferred edge are copied
-  // to "ni".
-  void remove_deferred(uint ni, GrowableArray<uint>* deferred_edges, VectorSet* visited);
+  GrowableArray<MergeMemNode*>  _mergemem_worklist; // List of all MergeMem nodes
 
   Node_Array _node_map; // used for bookeeping during type splitting
                         // Used for the following purposes:
@@ -320,21 +545,18 @@
                         // MemNode       - new memory input for this node
                         // ChecCastPP    - allocation that this is a cast of
                         // allocation    - CheckCastPP of the allocation
-  bool split_AddP(Node *addp, Node *base,  PhaseGVN  *igvn);
-  PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn, bool &new_created);
-  PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn);
-  void  move_inst_mem(Node* n, GrowableArray<PhiNode *>  &orig_phis, PhaseGVN *igvn);
-  Node *find_inst_mem(Node *mem, int alias_idx,GrowableArray<PhiNode *>  &orig_phi_worklist,  PhaseGVN  *igvn);
-
-  // Propagate unique types created for unescaped allocated objects
-  // through the graph
-  void split_unique_types(GrowableArray<Node *>  &alloc_worklist);
 
   // manage entries in _node_map
-  void  set_map(int idx, Node *n)        { _node_map.map(idx, n); }
-  Node *get_map(int idx)                 { return _node_map[idx]; }
-  PhiNode *get_map_phi(int idx) {
-    Node *phi = _node_map[idx];
+
+  void  set_map(Node* from, Node* to)  {
+    ideal_nodes.push(from);
+    _node_map.map(from->_idx, to);
+  }
+
+  Node* get_map(int idx) { return _node_map[idx]; }
+
+  PhiNode* get_map_phi(int idx) {
+    Node* phi = _node_map[idx];
     return (phi == NULL) ? NULL : phi->as_Phi();
   }
 
@@ -344,23 +566,6 @@
     _igvn->add_users_to_worklist(n);
   }
 
-  // Set the escape state of a node
-  void set_escape_state(uint ni, PointsToNode::EscapeState es);
-
-  // Find fields initializing values for allocations.
-  void find_init_values(Node* n, VectorSet* visited, PhaseTransform* phase);
-
-  // Adjust escape state after Connection Graph is built.
-  void adjust_escape_state(Node* n);
-
-  // Propagate escape states to referenced nodes.
-  bool propagate_escape_state(GrowableArray<int>* cg_worklist,
-                              GrowableArray<uint>* worklist,
-                              PointsToNode::EscapeState esc_state);
-
-  // Optimize objects compare.
-  Node* optimize_ptr_compare(Node* n);
-
   // Compute the escape information
   bool compute_escape();
 
@@ -373,11 +578,10 @@
   // Perform escape analysis
   static void do_analysis(Compile *C, PhaseIterGVN *igvn);
 
-  // escape state of a node
-  PointsToNode::EscapeState escape_state(Node *n);
+  bool not_global_escape(Node *n);
 
 #ifndef PRODUCT
-  void dump();
+  void dump(GrowableArray<PointsToNode*>& ptnodes_worklist);
 #endif
 };
 
--- a/src/share/vm/opto/graphKit.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -965,7 +965,7 @@
   assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, "");
 }
 
-bool GraphKit::compute_stack_effects(int& inputs, int& depth) {
+bool GraphKit::compute_stack_effects(int& inputs, int& depth, bool for_parse) {
   Bytecodes::Code code = java_bc();
   if (code == Bytecodes::_wide) {
     code = method()->java_code_at_bci(bci() + 1);
@@ -1006,11 +1006,11 @@
   case Bytecodes::_putfield:
     {
       bool is_get = (depth >= 0), is_static = (depth & 1);
-      bool ignore;
       ciBytecodeStream iter(method());
       iter.reset_to_bci(bci());
       iter.next();
-      ciField* field = iter.get_field(ignore);
+      bool ignored_will_link;
+      ciField* field = iter.get_field(ignored_will_link);
       int      size  = field->type()->size();
       inputs  = (is_static ? 0 : 1);
       if (is_get) {
@@ -1028,16 +1028,27 @@
   case Bytecodes::_invokedynamic:
   case Bytecodes::_invokeinterface:
     {
-      bool ignore;
       ciBytecodeStream iter(method());
       iter.reset_to_bci(bci());
       iter.next();
-      ciMethod* method = iter.get_method(ignore);
+      bool ignored_will_link;
+      ciSignature* declared_signature = NULL;
+      ciMethod* callee = iter.get_method(ignored_will_link, &declared_signature);
+      assert(declared_signature != NULL, "cannot be null");
       // (Do not use ciMethod::arg_size(), because
       // it might be an unloaded method, which doesn't
       // know whether it is static or not.)
-      inputs = method->invoke_arg_size(code);
-      int size = method->return_type()->size();
+      if (for_parse) {
+        // Case 1: When called from parse we are *before* the invoke (in the
+        //         caller) and need to to adjust the inputs by an appendix
+        //         argument that will be pushed implicitly.
+        inputs = callee->invoke_arg_size(code) - (iter.has_appendix() ? 1 : 0);
+      } else {
+        // Case 2: Here we are *after* the invoke (in the callee) and need to
+        //         remove any appendix arguments that were popped.
+        inputs = callee->invoke_arg_size(code) - (callee->has_member_arg() ? 1 : 0);
+      }
+      int size = declared_signature->return_type()->size();
       depth = size - inputs;
     }
     break;
@@ -1373,7 +1384,6 @@
 }
 
 
-
 //=============================================================================
 //--------------------------------memory---------------------------------------
 Node* GraphKit::memory(uint alias_idx) {
--- a/src/share/vm/opto/graphKit.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -145,6 +145,7 @@
   void clean_stack(int from_sp); // clear garbage beyond from_sp to top
 
   void inc_sp(int i)                  { set_sp(sp() + i); }
+  void dec_sp(int i)                  { set_sp(sp() - i); }
   void set_bci(int bci)               { _bci = bci; }
 
   // Make sure jvms has current bci & sp.
@@ -285,7 +286,7 @@
   // How many stack inputs does the current BC consume?
   // And, how does the stack change after the bytecode?
   // Returns false if unknown.
-  bool compute_stack_effects(int& inputs, int& depth);
+  bool compute_stack_effects(int& inputs, int& depth, bool for_parse = false);
 
   // Add a fixed offset to a pointer
   Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset) {
@@ -370,9 +371,9 @@
   // Replace all occurrences of one node by another.
   void replace_in_map(Node* old, Node* neww);
 
-  void push(Node* n)    { map_not_null(); _map->set_stack(_map->_jvms,_sp++,n); }
-  Node* pop()           { map_not_null(); return _map->stack(_map->_jvms,--_sp); }
-  Node* peek(int off=0) { map_not_null(); return _map->stack(_map->_jvms, _sp - off - 1); }
+  void  push(Node* n)     { map_not_null();        _map->set_stack(_map->_jvms,   _sp++, n); }
+  Node* pop()             { map_not_null(); return _map->stack(    _map->_jvms, --_sp); }
+  Node* peek(int off = 0) { map_not_null(); return _map->stack(    _map->_jvms,   _sp - off - 1); }
 
   void push_pair(Node* ldval) {
     push(ldval);
--- a/src/share/vm/opto/idealGraphPrinter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/idealGraphPrinter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -130,15 +130,15 @@
       } else {
         st.print("%s%d", PrintIdealGraphFile, _file_count);
       }
-      fileStream *stream = new (ResourceObj::C_HEAP) fileStream(st.as_string());
+      fileStream *stream = new (ResourceObj::C_HEAP, mtCompiler) fileStream(st.as_string());
       _output = stream;
     } else {
-      fileStream *stream = new (ResourceObj::C_HEAP) fileStream(PrintIdealGraphFile);
+      fileStream *stream = new (ResourceObj::C_HEAP, mtCompiler) fileStream(PrintIdealGraphFile);
       _output = stream;
     }
     _file_count++;
   } else {
-    _stream = new (ResourceObj::C_HEAP) networkStream();
+    _stream = new (ResourceObj::C_HEAP, mtCompiler) networkStream();
 
     // Try to connect to visualizer
     if (_stream->connect(PrintIdealGraphAddress, PrintIdealGraphPort)) {
@@ -155,12 +155,12 @@
     } else {
       // It would be nice if we could shut down cleanly but it should
       // be an error if we can't connect to the visualizer.
-      fatal(err_msg("Couldn't connect to visualizer at %s:%d",
-                    PrintIdealGraphAddress, PrintIdealGraphPort));
+      fatal(err_msg_res("Couldn't connect to visualizer at %s:%d",
+                        PrintIdealGraphAddress, PrintIdealGraphPort));
     }
   }
 
-  _xml = new (ResourceObj::C_HEAP) xmlStream(_output);
+  _xml = new (ResourceObj::C_HEAP, mtCompiler) xmlStream(_output);
 
   head(TOP_ELEMENT);
 }
--- a/src/share/vm/opto/idealKit.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/idealKit.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -295,7 +295,11 @@
   if (_delay_all_transforms) {
     return delay_transform(n);
   } else {
-    return gvn().transform(n);
+    n = gvn().transform(n);
+    if (!gvn().is_IterGVN()) {
+      C->record_for_igvn(n);
+    }
+    return n;
   }
 }
 
--- a/src/share/vm/opto/ifg.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/ifg.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -416,6 +416,7 @@
     if( lrgs(lidx).mask().is_UP() &&
         lrgs(lidx).mask_size() &&
         !lrgs(lidx)._is_float &&
+        !lrgs(lidx)._is_vector &&
         lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) )
       cnt += lrgs(lidx).reg_pressure();
   }
@@ -430,7 +431,7 @@
   while ((lidx = elements.next()) != 0) {
     if( lrgs(lidx).mask().is_UP() &&
         lrgs(lidx).mask_size() &&
-        lrgs(lidx)._is_float )
+        (lrgs(lidx)._is_float || lrgs(lidx)._is_vector))
       cnt += lrgs(lidx).reg_pressure();
   }
   return cnt;
@@ -439,8 +440,8 @@
 //------------------------------lower_pressure---------------------------------
 // Adjust register pressure down by 1.  Capture last hi-to-low transition,
 static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
-  if( lrg->mask().is_UP() && lrg->mask_size() ) {
-    if( lrg->_is_float ) {
+  if (lrg->mask().is_UP() && lrg->mask_size()) {
+    if (lrg->_is_float || lrg->_is_vector) {
       pressure[1] -= lrg->reg_pressure();
       if( pressure[1] == (uint)FLOATPRESSURE ) {
         hrp_index[1] = where;
@@ -522,8 +523,8 @@
       LRG &lrg = lrgs(lidx);
       lrg._area += cost;
       // Compute initial register pressure
-      if( lrg.mask().is_UP() && lrg.mask_size() ) {
-        if( lrg._is_float ) {   // Count float pressure
+      if (lrg.mask().is_UP() && lrg.mask_size()) {
+        if (lrg._is_float || lrg._is_vector) {   // Count float pressure
           pressure[1] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
           if( pressure[1] > b->_freg_pressure )
@@ -681,13 +682,10 @@
         // according to its bindings.
         const RegMask &rmask = lrgs(r).mask();
         if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
-          // Smear odd bits; leave only aligned pairs of bits.
-          RegMask r2mask = rmask;
-          r2mask.SmearToPairs();
           // Check for common case
           int r_size = lrgs(r).num_regs();
           OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
-
+          // Smear odd bits
           IndexSetIterator elements(&liveout);
           uint l;
           while ((l = elements.next()) != 0) {
@@ -701,10 +699,15 @@
             // Remove the bits from LRG 'r' from LRG 'l' so 'l' no
             // longer interferes with 'r'.  If 'l' requires aligned
             // adjacent pairs, subtract out bit pairs.
-            if( lrg.num_regs() == 2 && !lrg._fat_proj ) {
+            assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+            if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+              RegMask r2mask = rmask;
+              // Leave only aligned set of bits.
+              r2mask.smear_to_sets(lrg.num_regs());
+              // It includes vector case.
               lrg.SUBTRACT( r2mask );
               lrg.compute_set_mask_size();
-            } else if( r_size != 1 ) {
+            } else if( r_size != 1 ) { // fat proj
               lrg.SUBTRACT( rmask );
               lrg.compute_set_mask_size();
             } else {            // Common case: size 1 bound removal
@@ -763,8 +766,8 @@
             // Newly live things assumed live from here to top of block
             lrg._area += cost;
             // Adjust register pressure
-            if( lrg.mask().is_UP() && lrg.mask_size() ) {
-              if( lrg._is_float ) {
+            if (lrg.mask().is_UP() && lrg.mask_size()) {
+              if (lrg._is_float || lrg._is_vector) {
                 pressure[1] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
                 if( pressure[1] > b->_freg_pressure )
--- a/src/share/vm/opto/ifnode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/ifnode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -338,8 +338,7 @@
   Node *phi_f = NULL;     // do not construct unless needed
   for (DUIterator_Last i2min, i2 = phi->last_outs(i2min); i2 >= i2min; --i2) {
     Node* v = phi->last_out(i2);// User of the phi
-    igvn->hash_delete(v);       // Have to fixup other Phi users
-    igvn->_worklist.push(v);
+    igvn->rehash_node_delayed(v); // Have to fixup other Phi users
     uint vop = v->Opcode();
     Node *proj = NULL;
     if( vop == Op_Phi ) {       // Remote merge point
@@ -552,9 +551,8 @@
   if( new_cmp == cmp ) return;
   // Else, adjust existing check
   Node *new_bol = gvn->transform( new (gvn->C, 2) BoolNode( new_cmp, bol->as_Bool()->_test._test ) );
-  igvn->hash_delete( iff );
+  igvn->rehash_node_delayed( iff );
   iff->set_req_X( 1, new_bol, igvn );
-  igvn->_worklist.push( iff );
 }
 
 //------------------------------up_one_dom-------------------------------------
@@ -732,9 +730,7 @@
               Node* adjusted = phase->transform(new (phase->C, 3) SubINode(n, phase->intcon(failtype->_lo)));
               Node* newcmp = phase->transform(new (phase->C, 3) CmpUNode(adjusted, phase->intcon(bound)));
               Node* newbool = phase->transform(new (phase->C, 2) BoolNode(newcmp, cond));
-              phase->hash_delete(dom_iff);
-              dom_iff->set_req(1, phase->intcon(ctrl->as_Proj()->_con));
-              phase->is_IterGVN()->_worklist.push(dom_iff);
+              phase->is_IterGVN()->replace_input_of(dom_iff, 1, phase->intcon(ctrl->as_Proj()->_con));
               phase->hash_delete(this);
               set_req(1, newbool);
               return this;
@@ -1042,17 +1038,15 @@
     // Loop ends when projection has no more uses.
     for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
       Node* s = ifp->last_out(j);   // Get child of IfTrue/IfFalse
-      igvn->hash_delete(s);         // Yank from hash table before edge hacking
       if( !s->depends_only_on_test() ) {
         // Find the control input matching this def-use edge.
         // For Regions it may not be in slot 0.
         uint l;
         for( l = 0; s->in(l) != ifp; l++ ) { }
-        s->set_req(l, ctrl_target);
+        igvn->replace_input_of(s, l, ctrl_target);
       } else {                      // Else, for control producers,
-        s->set_req(0, data_target); // Move child to data-target
+        igvn->replace_input_of(s, 0, data_target); // Move child to data-target
       }
-      igvn->_worklist.push(s);  // Revisit collapsed Phis
     } // End for each child of a projection
 
     igvn->remove_dead_node(ifp);
--- a/src/share/vm/opto/lcm.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/lcm.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -139,6 +139,7 @@
     int iop = mach->ideal_Opcode();
     switch( iop ) {
     case Op_LoadB:
+    case Op_LoadUB:
     case Op_LoadUS:
     case Op_LoadD:
     case Op_LoadF:
@@ -445,6 +446,11 @@
     if( e->is_MachNullCheck() && e->in(1) == n )
       continue;
 
+    // Schedule IV increment last.
+    if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
+        e->in(1)->in(1) == n && n->is_iteratively_computed())
+      continue;
+
     uint n_choice  = 2;
 
     // See if this instruction is consumed by a branch. If so, then (as the
--- a/src/share/vm/opto/library_call.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/library_call.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -160,6 +160,7 @@
   bool inline_trans(vmIntrinsics::ID id);
   bool inline_abs(vmIntrinsics::ID id);
   bool inline_sqrt(vmIntrinsics::ID id);
+  void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
   bool inline_pow(vmIntrinsics::ID id);
   bool inline_exp(vmIntrinsics::ID id);
   bool inline_min_max(vmIntrinsics::ID id);
@@ -170,13 +171,17 @@
   // Helper for inline_unsafe_access.
   // Generates the guards that check whether the result of
   // Unsafe.getObject should be recorded in an SATB log buffer.
-  void insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val);
+  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, int nargs, bool need_mem_bar);
   bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
   bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
   bool inline_unsafe_allocate();
   bool inline_unsafe_copyMemory();
   bool inline_native_currentThread();
-  bool inline_native_time_funcs(bool isNano);
+#ifdef TRACE_HAVE_INTRINSICS
+  bool inline_native_classID();
+  bool inline_native_threadID();
+#endif
+  bool inline_native_time_funcs(address method, const char* funcName);
   bool inline_native_isInterrupted();
   bool inline_native_Class_query(vmIntrinsics::ID id);
   bool inline_native_subtype_check();
@@ -188,8 +193,6 @@
   void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
   bool inline_native_clone(bool is_virtual);
   bool inline_native_Reflection_getCallerClass();
-  bool inline_native_AtomicLong_get();
-  bool inline_native_AtomicLong_attemptUpdate();
   bool is_method_invoke_or_aux_frame(JVMState* jvms);
   // Helper function for inlining native object hash method
   bool inline_native_hashcode(bool is_virtual, bool is_static);
@@ -288,6 +291,8 @@
     case vmIntrinsics::_equals:
     case vmIntrinsics::_equalsC:
       break;  // InlineNatives does not control String.compareTo
+    case vmIntrinsics::_Reference_get:
+      break;  // InlineNatives does not control Reference.get
     default:
       return NULL;
     }
@@ -327,11 +332,6 @@
     // We do not intrinsify this.  The optimizer does fine with it.
     return NULL;
 
-  case vmIntrinsics::_get_AtomicLong:
-  case vmIntrinsics::_attemptUpdate:
-    if (!InlineAtomicLong)  return NULL;
-    break;
-
   case vmIntrinsics::_getCallerClass:
     if (!UseNewReflection)  return NULL;
     if (!InlineReflectionGetCallerClass)  return NULL;
@@ -339,16 +339,34 @@
     break;
 
   case vmIntrinsics::_bitCount_i:
+    if (!Matcher::match_rule_supported(Op_PopCountI)) return NULL;
+    break;
+
   case vmIntrinsics::_bitCount_l:
-    if (!UsePopCountInstruction)  return NULL;
+    if (!Matcher::match_rule_supported(Op_PopCountL)) return NULL;
+    break;
+
+  case vmIntrinsics::_numberOfLeadingZeros_i:
+    if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return NULL;
+    break;
+
+  case vmIntrinsics::_numberOfLeadingZeros_l:
+    if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return NULL;
+    break;
+
+  case vmIntrinsics::_numberOfTrailingZeros_i:
+    if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return NULL;
+    break;
+
+  case vmIntrinsics::_numberOfTrailingZeros_l:
+    if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL;
     break;
 
   case vmIntrinsics::_Reference_get:
-    // It is only when G1 is enabled that we absolutely
-    // need to use the intrinsic version of Reference.get()
-    // so that the value in the referent field, if necessary,
-    // can be registered by the pre-barrier code.
-    if (!UseG1GC) return NULL;
+    // Use the intrinsic version of Reference.get() so that the value in
+    // the referent field can be registered by the G1 pre-barrier code.
+    // Also add memory barrier to prevent commoning reads from this field
+    // across safepoint since GC can change it value.
     break;
 
  default:
@@ -417,14 +435,12 @@
     return kit.transfer_exceptions_into_jvms();
   }
 
-  if (PrintIntrinsics) {
+  // The intrinsic bailed out
+  if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
     if (jvms->has_method()) {
       // Not a root compile.
-      tty->print("Did not inline intrinsic %s%s at bci:%d in",
-                 vmIntrinsics::name_at(intrinsic_id()),
-                 (is_virtual() ? " (virtual)" : ""), kit.bci());
-      kit.caller()->print_short_name(tty);
-      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+      const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
+      CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg);
     } else {
       // Root compile
       tty->print("Did not generate intrinsic %s%s at bci:%d in",
@@ -622,10 +638,18 @@
   case vmIntrinsics::_isInterrupted:
     return inline_native_isInterrupted();
 
+#ifdef TRACE_HAVE_INTRINSICS
+  case vmIntrinsics::_classID:
+    return inline_native_classID();
+  case vmIntrinsics::_threadID:
+    return inline_native_threadID();
+  case vmIntrinsics::_counterTime:
+    return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime");
+#endif
   case vmIntrinsics::_currentTimeMillis:
-    return inline_native_time_funcs(false);
+    return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis");
   case vmIntrinsics::_nanoTime:
-    return inline_native_time_funcs(true);
+    return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
   case vmIntrinsics::_allocateInstance:
     return inline_unsafe_allocate();
   case vmIntrinsics::_copyMemory:
@@ -682,11 +706,6 @@
   case vmIntrinsics::_reverseBytes_c:
     return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
 
-  case vmIntrinsics::_get_AtomicLong:
-    return inline_native_AtomicLong_get();
-  case vmIntrinsics::_attemptUpdate:
-    return inline_native_AtomicLong_attemptUpdate();
-
   case vmIntrinsics::_getCallerClass:
     return inline_native_Reflection_getCallerClass();
 
@@ -1518,43 +1537,79 @@
   return true;
 }
 
+void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
+  //-------------------
+  //result=(result.isNaN())? funcAddr():result;
+  // Check: If isNaN() by checking result!=result? then either trap
+  // or go to runtime
+  Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
+  // Build the boolean node
+  Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
+
+  if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
+    {
+      BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
+      // End the current control-flow path
+      push_pair(x);
+      if (y != NULL) {
+        push_pair(y);
+      }
+      // The pow or exp intrinsic returned a NaN, which requires a call
+      // to the runtime.  Recompile with the runtime call.
+      uncommon_trap(Deoptimization::Reason_intrinsic,
+                    Deoptimization::Action_make_not_entrant);
+    }
+    push_pair(result);
+  } else {
+    // If this inlining ever returned NaN in the past, we compile a call
+    // to the runtime to properly handle corner cases
+
+    IfNode* iff = create_and_xform_if(control(), bolisnum, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+    Node* if_slow = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+    Node* if_fast = _gvn.transform( new (C, 1) IfTrueNode(iff) );
+
+    if (!if_slow->is_top()) {
+      RegionNode* result_region = new(C, 3) RegionNode(3);
+      PhiNode*    result_val = new (C, 3) PhiNode(result_region, Type::DOUBLE);
+
+      result_region->init_req(1, if_fast);
+      result_val->init_req(1, result);
+
+      set_control(if_slow);
+
+      const TypePtr* no_memory_effects = NULL;
+      Node* rt = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
+                                   no_memory_effects,
+                                   x, top(), y, y ? top() : NULL);
+      Node* value = _gvn.transform(new (C, 1) ProjNode(rt, TypeFunc::Parms+0));
+#ifdef ASSERT
+      Node* value_top = _gvn.transform(new (C, 1) ProjNode(rt, TypeFunc::Parms+1));
+      assert(value_top == top(), "second value must be top");
+#endif
+
+      result_region->init_req(2, control());
+      result_val->init_req(2, value);
+      push_result(result_region, result_val);
+    } else {
+      push_pair(result);
+    }
+  }
+}
+
 //------------------------------inline_exp-------------------------------------
 // Inline exp instructions, if possible.  The Intel hardware only misses
 // really odd corner cases (+/- Infinity).  Just uncommon-trap them.
 bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) {
   assert(id == vmIntrinsics::_dexp, "Not exp");
 
-  // If this inlining ever returned NaN in the past, we do not intrinsify it
-  // every again.  NaN results requires StrictMath.exp handling.
-  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
-
-  // Do not intrinsify on older platforms which lack cmove.
-  if (ConditionalMoveLimit == 0)  return false;
-
   _sp += arg_size();        // restore stack pointer
   Node *x = pop_math_arg();
   Node *result = _gvn.transform(new (C, 2) ExpDNode(0,x));
 
-  //-------------------
-  //result=(result.isNaN())? StrictMath::exp():result;
-  // Check: If isNaN() by checking result!=result? then go to Strict Math
-  Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
-  // Build the boolean node
-  Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
-
-  { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
-    // End the current control-flow path
-    push_pair(x);
-    // Math.exp intrinsic returned a NaN, which requires StrictMath.exp
-    // to handle.  Recompile without intrinsifying Math.exp
-    uncommon_trap(Deoptimization::Reason_intrinsic,
-                  Deoptimization::Action_make_not_entrant);
-  }
+  finish_pow_exp(result, x, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
 
-  push_pair(result);
-
   return true;
 }
 
@@ -1563,17 +1618,12 @@
 bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) {
   assert(id == vmIntrinsics::_dpow, "Not pow");
 
-  // If this inlining ever returned NaN in the past, we do not intrinsify it
-  // every again.  NaN results requires StrictMath.pow handling.
-  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
-
-  // Do not intrinsify on older platforms which lack cmove.
-  if (ConditionalMoveLimit == 0)  return false;
-
   // Pseudocode for pow
   // if (x <= 0.0) {
-  //   if ((double)((int)y)==y) { // if y is int
-  //     result = ((1&(int)y)==0)?-DPow(abs(x), y):DPow(abs(x), y)
+  //   long longy = (long)y;
+  //   if ((double)longy == y) { // if y is long
+  //     if (y + 1 == y) longy = 0; // huge number: even
+  //     result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
   //   } else {
   //     result = NaN;
   //   }
@@ -1581,7 +1631,7 @@
   //   result = DPow(x,y);
   // }
   // if (result != result)?  {
-  //   uncommon_trap();
+  //   result = uncommon_trap() or runtime_call();
   // }
   // return result;
 
@@ -1589,15 +1639,14 @@
   Node* y = pop_math_arg();
   Node* x = pop_math_arg();
 
-  Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, x, y) );
-
-  // Short form: if not top-level (i.e., Math.pow but inlining Math.pow
-  // inside of something) then skip the fancy tests and just check for
-  // NaN result.
-  Node *result = NULL;
-  if( jvms()->depth() >= 1 ) {
-    result = fast_result;
+  Node* result = NULL;
+
+  if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
+    // Short form: skip the fancy tests and just check for NaN result.
+    result = _gvn.transform( new (C, 3) PowDNode(0, x, y) );
   } else {
+    // If this inlining ever returned NaN in the past, include all
+    // checks + call to the runtime.
 
     // Set the merge point for If node with condition of (x <= 0.0)
     // There are four possible paths to region node and phi node
@@ -1613,55 +1662,95 @@
     Node *bol1 = _gvn.transform( new (C, 2) BoolNode( cmp, BoolTest::le ) );
     // Branch either way
     IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
-    Node *opt_test = _gvn.transform(if1);
-    //assert( opt_test->is_If(), "Expect an IfNode");
-    IfNode *opt_if1 = (IfNode*)opt_test;
     // Fast path taken; set region slot 3
-    Node *fast_taken = _gvn.transform( new (C, 1) IfFalseNode(opt_if1) );
+    Node *fast_taken = _gvn.transform( new (C, 1) IfFalseNode(if1) );
     r->init_req(3,fast_taken); // Capture fast-control
 
     // Fast path not-taken, i.e. slow path
-    Node *complex_path = _gvn.transform( new (C, 1) IfTrueNode(opt_if1) );
+    Node *complex_path = _gvn.transform( new (C, 1) IfTrueNode(if1) );
 
     // Set fast path result
-    Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, y, x) );
+    Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, x, y) );
     phi->init_req(3, fast_result);
 
     // Complex path
-    // Build the second if node (if y is int)
-    // Node for (int)y
-    Node *inty = _gvn.transform( new (C, 2) ConvD2INode(y));
-    // Node for (double)((int) y)
-    Node *doubleinty= _gvn.transform( new (C, 2) ConvI2DNode(inty));
-    // Check (double)((int) y) : y
-    Node *cmpinty= _gvn.transform(new (C, 3) CmpDNode(doubleinty, y));
-    // Check if (y isn't int) then go to slow path
-
-    Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmpinty, BoolTest::ne ) );
+    // Build the second if node (if y is long)
+    // Node for (long)y
+    Node *longy = _gvn.transform( new (C, 2) ConvD2LNode(y));
+    // Node for (double)((long) y)
+    Node *doublelongy= _gvn.transform( new (C, 2) ConvL2DNode(longy));
+    // Check (double)((long) y) : y
+    Node *cmplongy= _gvn.transform(new (C, 3) CmpDNode(doublelongy, y));
+    // Check if (y isn't long) then go to slow path
+
+    Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmplongy, BoolTest::ne ) );
     // Branch either way
     IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
-    Node *slow_path = opt_iff(r,if2); // Set region path 2
-
-    // Calculate DPow(abs(x), y)*(1 & (int)y)
+    Node* ylong_path = _gvn.transform( new (C, 1) IfFalseNode(if2));
+
+    Node *slow_path = _gvn.transform( new (C, 1) IfTrueNode(if2) );
+
+    // Calculate DPow(abs(x), y)*(1 & (long)y)
     // Node for constant 1
-    Node *conone = intcon(1);
-    // 1& (int)y
-    Node *signnode= _gvn.transform( new (C, 3) AndINode(conone, inty) );
+    Node *conone = longcon(1);
+    // 1& (long)y
+    Node *signnode= _gvn.transform( new (C, 3) AndLNode(conone, longy) );
+
+    // A huge number is always even. Detect a huge number by checking
+    // if y + 1 == y and set integer to be tested for parity to 0.
+    // Required for corner case:
+    // (long)9.223372036854776E18 = max_jlong
+    // (double)(long)9.223372036854776E18 = 9.223372036854776E18
+    // max_jlong is odd but 9.223372036854776E18 is even
+    Node* yplus1 = _gvn.transform( new (C, 3) AddDNode(y, makecon(TypeD::make(1))));
+    Node *cmpyplus1= _gvn.transform(new (C, 3) CmpDNode(yplus1, y));
+    Node *bolyplus1 = _gvn.transform( new (C, 2) BoolNode( cmpyplus1, BoolTest::eq ) );
+    Node* correctedsign = NULL;
+    if (ConditionalMoveLimit != 0) {
+      correctedsign = _gvn.transform( CMoveNode::make(C, NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
+    } else {
+      IfNode *ifyplus1 = create_and_xform_if(ylong_path,bolyplus1, PROB_FAIR, COUNT_UNKNOWN);
+      RegionNode *r = new (C, 3) RegionNode(3);
+      Node *phi = new (C, 3) PhiNode(r, TypeLong::LONG);
+      r->init_req(1, _gvn.transform( new (C, 1) IfFalseNode(ifyplus1)));
+      r->init_req(2, _gvn.transform( new (C, 1) IfTrueNode(ifyplus1)));
+      phi->init_req(1, signnode);
+      phi->init_req(2, longcon(0));
+      correctedsign = _gvn.transform(phi);
+      ylong_path = _gvn.transform(r);
+      record_for_igvn(r);
+    }
+
     // zero node
-    Node *conzero = intcon(0);
-    // Check (1&(int)y)==0?
-    Node *cmpeq1 = _gvn.transform(new (C, 3) CmpINode(signnode, conzero));
-    // Check if (1&(int)y)!=0?, if so the result is negative
+    Node *conzero = longcon(0);
+    // Check (1&(long)y)==0?
+    Node *cmpeq1 = _gvn.transform(new (C, 3) CmpLNode(correctedsign, conzero));
+    // Check if (1&(long)y)!=0?, if so the result is negative
     Node *bol3 = _gvn.transform( new (C, 2) BoolNode( cmpeq1, BoolTest::ne ) );
     // abs(x)
     Node *absx=_gvn.transform( new (C, 2) AbsDNode(x));
     // abs(x)^y
-    Node *absxpowy = _gvn.transform( new (C, 3) PowDNode(0, y, absx) );
+    Node *absxpowy = _gvn.transform( new (C, 3) PowDNode(0, absx, y) );
     // -abs(x)^y
     Node *negabsxpowy = _gvn.transform(new (C, 2) NegDNode (absxpowy));
-    // (1&(int)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
-    Node *signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
+    // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
+    Node *signresult = NULL;
+    if (ConditionalMoveLimit != 0) {
+      signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
+    } else {
+      IfNode *ifyeven = create_and_xform_if(ylong_path,bol3, PROB_FAIR, COUNT_UNKNOWN);
+      RegionNode *r = new (C, 3) RegionNode(3);
+      Node *phi = new (C, 3) PhiNode(r, Type::DOUBLE);
+      r->init_req(1, _gvn.transform( new (C, 1) IfFalseNode(ifyeven)));
+      r->init_req(2, _gvn.transform( new (C, 1) IfTrueNode(ifyeven)));
+      phi->init_req(1, absxpowy);
+      phi->init_req(2, negabsxpowy);
+      signresult = _gvn.transform(phi);
+      ylong_path = _gvn.transform(r);
+      record_for_igvn(r);
+    }
     // Set complex path fast result
+    r->init_req(2, ylong_path);
     phi->init_req(2, signresult);
 
     static const jlong nan_bits = CONST64(0x7ff8000000000000);
@@ -1675,27 +1764,10 @@
     result=_gvn.transform(phi);
   }
 
-  //-------------------
-  //result=(result.isNaN())? uncommon_trap():result;
-  // Check: If isNaN() by checking result!=result? then go to Strict Math
-  Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
-  // Build the boolean node
-  Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
-
-  { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
-    // End the current control-flow path
-    push_pair(x);
-    push_pair(y);
-    // Math.pow intrinsic returned a NaN, which requires StrictMath.pow
-    // to handle.  Recompile without intrinsifying Math.pow.
-    uncommon_trap(Deoptimization::Reason_intrinsic,
-                  Deoptimization::Action_make_not_entrant);
-  }
+  finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
 
-  push_pair(result);
-
   return true;
 }
 
@@ -1773,15 +1845,11 @@
   case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false;
   case vmIntrinsics::_dabs:  return Matcher::has_match_rule(Op_AbsD)  ? inline_abs(id)  : false;
 
-    // These intrinsics don't work on X86.  The ad implementation doesn't
-    // handle NaN's properly.  Instead of returning infinity, the ad
-    // implementation returns a NaN on overflow. See bug: 6304089
-    // Once the ad implementations are fixed, change the code below
-    // to match the intrinsics above
-
   case vmIntrinsics::_dexp:  return
+    Matcher::has_match_rule(Op_ExpD) ? inline_exp(id) :
     runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
   case vmIntrinsics::_dpow:  return
+    Matcher::has_match_rule(Op_PowD) ? inline_pow(id) :
     runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
 
    // These intrinsics are not yet correctly implemented
@@ -2104,7 +2172,7 @@
   if (id == vmIntrinsics::_reverseBytes_l && !Matcher::has_match_rule(Op_ReverseBytesL))  return false;
   if (id == vmIntrinsics::_reverseBytes_c && !Matcher::has_match_rule(Op_ReverseBytesUS)) return false;
   if (id == vmIntrinsics::_reverseBytes_s && !Matcher::has_match_rule(Op_ReverseBytesS))  return false;
-  _sp += arg_size();        // restore stack pointer
+  _sp += arg_size();  // restore stack pointer
   switch (id) {
   case vmIntrinsics::_reverseBytes_i:
     push(_gvn.transform(new (C, 2) ReverseBytesINode(0, pop())));
@@ -2128,14 +2196,17 @@
 
 const static BasicType T_ADDRESS_HOLDER = T_LONG;
 
-// Helper that guards and inserts a G1 pre-barrier.
-void LibraryCallKit::insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val) {
-  assert(UseG1GC, "should not call this otherwise");
-
+// Helper that guards and inserts a pre-barrier.
+void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
+                                        Node* pre_val, int nargs, bool need_mem_bar) {
   // We could be accessing the referent field of a reference object. If so, when G1
   // is enabled, we need to log the value in the referent field in an SATB buffer.
   // This routine performs some compile time filters and generates suitable
   // runtime filters that guard the pre-barrier code.
+  // Also add memory barrier for non volatile load from the referent field
+  // to prevent commoning of loads across safepoint.
+  if (!UseG1GC && !need_mem_bar)
+    return;
 
   // Some compile time checks.
 
@@ -2157,11 +2228,12 @@
 
     const TypeInstPtr* itype = btype->isa_instptr();
     if (itype != NULL) {
-      // Can the klass of base_oop be statically determined
-      // to be _not_ a sub-class of Reference?
+      // Can the klass of base_oop be statically determined to be
+      // _not_ a sub-class of Reference and _not_ Object?
       ciKlass* klass = itype->klass();
-      if (klass->is_subtype_of(env()->Reference_klass()) &&
-          !env()->Reference_klass()->is_subtype_of(klass)) {
+      if ( klass->is_loaded() &&
+          !klass->is_subtype_of(env()->Reference_klass()) &&
+          !env()->Object_klass()->is_subtype_of(klass)) {
         return;
       }
     }
@@ -2171,10 +2243,8 @@
   // we need to generate the following runtime filters
   //
   // if (offset == java_lang_ref_Reference::_reference_offset) {
-  //   if (base != null) {
-  //     if (instance_of(base, java.lang.ref.Reference)) {
-  //       pre_barrier(_, pre_val, ...);
-  //     }
+  //   if (instance_of(base, java.lang.ref.Reference)) {
+  //     pre_barrier(_, pre_val, ...);
   //   }
   // }
 
@@ -2187,19 +2257,19 @@
   Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
 
   __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
-    __ if_then(base_oop, BoolTest::ne, null(), likely); {
-
       // Update graphKit memory and control from IdealKit.
       sync_kit(ideal);
 
       Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
+      _sp += nargs;  // gen_instanceof might do an uncommon trap
       Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
+      _sp -= nargs;
 
       // Update IdealKit memory and control from graphKit.
       __ sync_kit(this);
 
       Node* one = __ ConI(1);
-
+      // is_instof == 0 if base_oop == NULL
       __ if_then(is_instof, BoolTest::eq, one, unlikely); {
 
         // Update graphKit from IdeakKit.
@@ -2211,12 +2281,15 @@
                     NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
                     pre_val /* pre_val */,
                     T_OBJECT);
-
+        if (need_mem_bar) {
+          // Add memory barrier to prevent commoning reads from this field
+          // across safepoint since GC can change its value.
+          insert_mem_bar(Op_MemBarCPUOrder);
+        }
         // Update IdealKit from graphKit.
         __ sync_kit(this);
 
       } __ end_if(); // _ref_type != ref_none
-    } __ end_if(); // base  != NULL
   } __ end_if(); // offset == referent_offset
 
   // Final sync IdealKit and GraphKit.
@@ -2277,6 +2350,7 @@
 
   // Argument words:  "this" plus (oop/offset) or (lo/hi) args plus maybe 1 or 2 value words
   int nargs = 1 + (is_native_ptr ? 2 : 3) + (is_store ? type_words : 0);
+  assert(callee()->arg_size() == nargs, "must be");
 
   debug_only(int saved_sp = _sp);
   _sp += nargs;
@@ -2350,7 +2424,9 @@
   // object (either by using Unsafe directly or through reflection)
   // then, if G1 is enabled, we need to record the referent in an
   // SATB log buffer using the pre-barrier mechanism.
-  bool need_read_barrier = UseG1GC && !is_native_ptr && !is_store &&
+  // Also we need to add memory barrier to prevent commoning reads
+  // from this field across safepoint since GC can change its value.
+  bool need_read_barrier = !is_native_ptr && !is_store &&
                            offset != top() && heap_base_oop != top();
 
   if (!is_store && type == T_OBJECT) {
@@ -2440,7 +2516,7 @@
       break;
     case T_OBJECT:
       if (need_read_barrier) {
-        insert_g1_pre_barrier(heap_base_oop, offset, p);
+        insert_pre_barrier(heap_base_oop, offset, p, nargs, !(is_volatile || need_mem_bar));
       }
       push(p);
       break;
@@ -2842,14 +2918,63 @@
   return true;
 }
 
+#ifdef TRACE_HAVE_INTRINSICS
+/*
+ * oop -> myklass
+ * myklass->trace_id |= USED
+ * return myklass->trace_id & ~0x3
+ */
+bool LibraryCallKit::inline_native_classID() {
+  int nargs = 1 + 1;
+  null_check_receiver(callee());  // check then ignore argument(0)
+  _sp += nargs;
+  Node* cls = do_null_check(argument(1), T_OBJECT);
+  _sp -= nargs;
+  Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0);
+  _sp += nargs;
+  kls = do_null_check(kls, T_OBJECT);
+  _sp -= nargs;
+  ByteSize offset = TRACE_ID_OFFSET;
+  Node* insp = basic_plus_adr(kls, in_bytes(offset));
+  Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG);
+  Node* bits = longcon(~0x03l); // ignore bit 0 & 1
+  Node* andl = _gvn.transform(new (C, 3) AndLNode(tvalue, bits));
+  Node* clsused = longcon(0x01l); // set the class bit
+  Node* orl = _gvn.transform(new (C, 3) OrLNode(tvalue, clsused));
+
+  const TypePtr *adr_type = _gvn.type(insp)->isa_ptr();
+  store_to_memory(control(), insp, orl, T_LONG, adr_type);
+  push_pair(andl);
+  return true;
+}
+
+bool LibraryCallKit::inline_native_threadID() {
+  Node* tls_ptr = NULL;
+  Node* cur_thr = generate_current_thread(tls_ptr);
+  Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
+  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
+  p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::thread_id_offset()));
+
+  Node* threadid = NULL;
+  size_t thread_id_size = OSThread::thread_id_size();
+  if (thread_id_size == (size_t) BytesPerLong) {
+    threadid = ConvL2I(make_load(control(), p, TypeLong::LONG, T_LONG));
+    push(threadid);
+  } else if (thread_id_size == (size_t) BytesPerInt) {
+    threadid = make_load(control(), p, TypeInt::INT, T_INT);
+    push(threadid);
+  } else {
+    ShouldNotReachHere();
+  }
+  return true;
+}
+#endif
+
 //------------------------inline_native_time_funcs--------------
 // inline code for System.currentTimeMillis() and System.nanoTime()
 // these have the same type and signature
-bool LibraryCallKit::inline_native_time_funcs(bool isNano) {
-  address funcAddr = isNano ? CAST_FROM_FN_PTR(address, os::javaTimeNanos) :
-                              CAST_FROM_FN_PTR(address, os::javaTimeMillis);
-  const char * funcName = isNano ? "nanoTime" : "currentTimeMillis";
-  const TypeFunc *tf = OptoRuntime::current_time_millis_Type();
+bool LibraryCallKit::inline_native_time_funcs(address funcAddr, const char* funcName) {
+  const TypeFunc *tf = OptoRuntime::void_long_Type();
   const TypePtr* no_memory_effects = NULL;
   Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects);
   Node* value = _gvn.transform(new (C, 1) ProjNode(time, TypeFunc::Parms+0));
@@ -3931,7 +4056,8 @@
       }
     }
   }
-  else if (method->is_method_handle_adapter()) {
+  else if (method->is_method_handle_intrinsic() ||
+           method->is_compiled_lambda_form()) {
     // This is an internal adapter frame from the MethodHandleCompiler -- skip it
     return true;
   }
@@ -3939,113 +4065,6 @@
   return false;
 }
 
-static int value_field_offset = -1;  // offset of the "value" field of AtomicLongCSImpl.  This is needed by
-                                     // inline_native_AtomicLong_attemptUpdate() but it has no way of
-                                     // computing it since there is no lookup field by name function in the
-                                     // CI interface.  This is computed and set by inline_native_AtomicLong_get().
-                                     // Using a static variable here is safe even if we have multiple compilation
-                                     // threads because the offset is constant.  At worst the same offset will be
-                                     // computed and  stored multiple
-
-bool LibraryCallKit::inline_native_AtomicLong_get() {
-  // Restore the stack and pop off the argument
-  _sp+=1;
-  Node *obj = pop();
-
-  // get the offset of the "value" field. Since the CI interfaces
-  // does not provide a way to look up a field by name, we scan the bytecodes
-  // to get the field index.  We expect the first 2 instructions of the method
-  // to be:
-  //    0 aload_0
-  //    1 getfield "value"
-  ciMethod* method = callee();
-  if (value_field_offset == -1)
-  {
-    ciField* value_field;
-    ciBytecodeStream iter(method);
-    Bytecodes::Code bc = iter.next();
-
-    if ((bc != Bytecodes::_aload_0) &&
-              ((bc != Bytecodes::_aload) || (iter.get_index() != 0)))
-      return false;
-    bc = iter.next();
-    if (bc != Bytecodes::_getfield)
-      return false;
-    bool ignore;
-    value_field = iter.get_field(ignore);
-    value_field_offset = value_field->offset_in_bytes();
-  }
-
-  // Null check without removing any arguments.
-  _sp++;
-  obj = do_null_check(obj, T_OBJECT);
-  _sp--;
-  // Check for locking null object
-  if (stopped()) return true;
-
-  Node *adr = basic_plus_adr(obj, obj, value_field_offset);
-  const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
-  int alias_idx = C->get_alias_index(adr_type);
-
-  Node *result = _gvn.transform(new (C, 3) LoadLLockedNode(control(), memory(alias_idx), adr));
-
-  push_pair(result);
-
-  return true;
-}
-
-bool LibraryCallKit::inline_native_AtomicLong_attemptUpdate() {
-  // Restore the stack and pop off the arguments
-  _sp+=5;
-  Node *newVal = pop_pair();
-  Node *oldVal = pop_pair();
-  Node *obj = pop();
-
-  // we need the offset of the "value" field which was computed when
-  // inlining the get() method.  Give up if we don't have it.
-  if (value_field_offset == -1)
-    return false;
-
-  // Null check without removing any arguments.
-  _sp+=5;
-  obj = do_null_check(obj, T_OBJECT);
-  _sp-=5;
-  // Check for locking null object
-  if (stopped()) return true;
-
-  Node *adr = basic_plus_adr(obj, obj, value_field_offset);
-  const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
-  int alias_idx = C->get_alias_index(adr_type);
-
-  Node *cas = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal));
-  Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(cas));
-  set_memory(store_proj, alias_idx);
-  Node *bol = _gvn.transform( new (C, 2) BoolNode( cas, BoolTest::eq ) );
-
-  Node *result;
-  // CMove node is not used to be able fold a possible check code
-  // after attemptUpdate() call. This code could be transformed
-  // into CMove node by loop optimizations.
-  {
-    RegionNode *r = new (C, 3) RegionNode(3);
-    result = new (C, 3) PhiNode(r, TypeInt::BOOL);
-
-    Node *iff = create_and_xform_if(control(), bol, PROB_FAIR, COUNT_UNKNOWN);
-    Node *iftrue = opt_iff(r, iff);
-    r->init_req(1, iftrue);
-    result->init_req(1, intcon(1));
-    result->init_req(2, intcon(0));
-
-    set_control(_gvn.transform(r));
-    record_for_igvn(r);
-
-    C->set_has_split_ifs(true); // Has chance for split-if optimization
-  }
-
-  push(_gvn.transform(result));
-  return true;
-}
-
 bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
   // restore the arguments
   _sp += arg_size();
@@ -5473,7 +5492,10 @@
               result /* pre_val */,
               T_OBJECT);
 
+  // Add memory barrier to prevent commoning reads from this field
+  // across safepoint since GC can change its value.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
   push(result);
   return true;
 }
-
--- a/src/share/vm/opto/loopPredicate.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/loopPredicate.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -212,9 +212,8 @@
     Node* use = rgn->fast_out(i);
     if (use->is_Phi() && use->outcnt() > 0) {
       assert(use->in(0) == rgn, "");
-      _igvn.hash_delete(use);
+      _igvn.rehash_node_delayed(use);
       use->add_req(use->in(proj_index));
-      _igvn._worklist.push(use);
       has_phi = true;
     }
   }
@@ -284,9 +283,8 @@
   for (DUIterator_Fast imax, i = rgn->fast_outs(imax); i < imax; i++) {
     Node* use = rgn->fast_out(i);
     if (use->is_Phi() && use->outcnt() > 0) {
-      hash_delete(use);
+      rehash_node_delayed(use);
       use->add_req(use->in(proj_index));
-      _worklist.push(use);
       has_phi = true;
     }
   }
--- a/src/share/vm/opto/loopTransform.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/loopTransform.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -547,11 +547,6 @@
     Node *nnn = old_new[old->_idx];
     if (!has_ctrl(nnn))
       set_idom(nnn, idom(nnn), dd-1);
-    // While we're at it, remove any SafePoints from the peeled code
-    if (old->Opcode() == Op_SafePoint) {
-      Node *nnn = old_new[old->_idx];
-      lazy_replace(nnn,nnn->in(TypeFunc::Control));
-    }
   }
 
   // Now force out all loop-invariant dominating tests.  The optimizer
@@ -961,9 +956,7 @@
   set_loop(zer_iff, loop->_parent);
 
   // Plug in the false-path, taken if we need to skip post-loop
-  _igvn.hash_delete( main_exit );
-  main_exit->set_req(0, zer_iff);
-  _igvn._worklist.push(main_exit);
+  _igvn.replace_input_of(main_exit, 0, zer_iff);
   set_idom(main_exit, zer_iff, dd_main_exit);
   set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
   // Make the true-path, must enter the post loop
@@ -1956,9 +1949,7 @@
       C->set_major_progress();
       Node *kill_con = _igvn.intcon( 1-flip );
       set_ctrl(kill_con, C->root());
-      _igvn.hash_delete(iff);
-      iff->set_req(1, kill_con);
-      _igvn._worklist.push(iff);
+      _igvn.replace_input_of(iff, 1, kill_con);
       // Find surviving projection
       assert(iff->is_If(), "");
       ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
@@ -1966,11 +1957,9 @@
       for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
         Node* cd = dp->fast_out(i); // Control-dependent node
         if( cd->is_Load() ) {   // Loads can now float around in the loop
-          _igvn.hash_delete(cd);
           // Allow the load to float around in the loop, or before it
           // but NOT before the pre-loop.
-          cd->set_req(0, ctrl);   // ctrl, not NULL
-          _igvn._worklist.push(cd);
+          _igvn.replace_input_of(cd, 0, ctrl); // ctrl, not NULL
           --i;
           --imax;
         }
@@ -2029,14 +2018,10 @@
     main_bol->set_req(1,main_cmp);
   }
   // Hack the now-private loop bounds
-  _igvn.hash_delete(main_cmp);
-  main_cmp->set_req(2, main_limit);
-  _igvn._worklist.push(main_cmp);
+  _igvn.replace_input_of(main_cmp, 2, main_limit);
   // The OpaqueNode is unshared by design
-  _igvn.hash_delete(opqzm);
   assert( opqzm->outcnt() == 1, "cannot hack shared node" );
-  opqzm->set_req(1,main_limit);
-  _igvn._worklist.push(opqzm);
+  _igvn.replace_input_of(opqzm, 1, main_limit);
 }
 
 //------------------------------DCE_loop_body----------------------------------
@@ -2178,9 +2163,7 @@
     Node* cmp = cl->loopexit()->cmp_node();
     assert(cl->limit() == cmp->in(2), "sanity");
     phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
-    phase->_igvn.hash_delete(cmp);
-    cmp->set_req(2, exact_limit);
-    phase->_igvn._worklist.push(cmp);        // put cmp on worklist
+    phase->_igvn.replace_input_of(cmp, 2, exact_limit); // put cmp on worklist
   }
   // Note: the final value after increment should not overflow since
   // counted loop has limit check predicate.
--- a/src/share/vm/opto/loopUnswitch.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/loopUnswitch.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -174,27 +174,21 @@
       Node* use = worklist.pop();
       Node* nuse = use->clone();
       nuse->set_req(0, invar_proj);
-      _igvn.hash_delete(use);
-      use->set_req(1, nuse);
-      _igvn._worklist.push(use);
+      _igvn.replace_input_of(use, 1, nuse);
       register_new_node(nuse, invar_proj);
       // Same for the clone
       Node* use_clone = old_new[use->_idx];
-      _igvn.hash_delete(use_clone);
-      use_clone->set_req(1, nuse);
-      _igvn._worklist.push(use_clone);
+      _igvn.replace_input_of(use_clone, 1, nuse);
     }
   }
 
   // Hardwire the control paths in the loops into if(true) and if(false)
-  _igvn.hash_delete(unswitch_iff);
+  _igvn.rehash_node_delayed(unswitch_iff);
   short_circuit_if(unswitch_iff, proj_true);
-  _igvn._worklist.push(unswitch_iff);
 
   IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If();
-  _igvn.hash_delete(unswitch_iff_clone);
+  _igvn.rehash_node_delayed(unswitch_iff_clone);
   short_circuit_if(unswitch_iff_clone, proj_false);
-  _igvn._worklist.push(unswitch_iff_clone);
 
   // Reoptimize loops
   loop->record_for_igvn();
@@ -224,8 +218,7 @@
   LoopNode* head  = loop->_head->as_Loop();
   bool counted_loop = head->is_CountedLoop();
   Node*     entry = head->in(LoopNode::EntryControl);
-  _igvn.hash_delete(entry);
-  _igvn._worklist.push(entry);
+  _igvn.rehash_node_delayed(entry);
   IdealLoopTree* outer_loop = loop->_parent;
 
   Node *cont      = _igvn.intcon(1);
@@ -249,18 +242,14 @@
 
   // Fast (true) control
   Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop);
-  _igvn.hash_delete(head);
-  head->set_req(LoopNode::EntryControl, iffast_pred);
+  _igvn.replace_input_of(head, LoopNode::EntryControl, iffast_pred);
   set_idom(head, iffast_pred, dom_depth(head));
-  _igvn._worklist.push(head);
 
   // Slow (false) control
   Node* ifslow_pred = clone_loop_predicates(entry, ifslow, !counted_loop);
   LoopNode* slow_head = old_new[head->_idx]->as_Loop();
-  _igvn.hash_delete(slow_head);
-  slow_head->set_req(LoopNode::EntryControl, ifslow_pred);
+  _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow_pred);
   set_idom(slow_head, ifslow_pred, dom_depth(slow_head));
-  _igvn._worklist.push(slow_head);
 
   recompute_dom_depth();
 
--- a/src/share/vm/opto/loopnode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/loopnode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -577,6 +577,9 @@
   Node *sfpt = x->in(LoopNode::LoopBackControl);
   if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
     lazy_replace( sfpt, iftrue );
+    if (loop->_safepts != NULL) {
+      loop->_safepts->yank(sfpt);
+    }
     loop->_tail = iftrue;
   }
 
@@ -668,8 +671,12 @@
 
   // Check for immediately preceding SafePoint and remove
   Node *sfpt2 = le->in(0);
-  if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2))
+  if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) {
     lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
+    if (loop->_safepts != NULL) {
+      loop->_safepts->yank(sfpt2);
+    }
+  }
 
   // Free up intermediate goo
   _igvn.remove_dead_node(hook);
@@ -1129,8 +1136,7 @@
         // I'm mid-iteration over the Region's uses.
         for (DUIterator_Last imin, i = old_phi->last_outs(imin); i >= imin; ) {
           Node* use = old_phi->last_out(i);
-          igvn.hash_delete(use);
-          igvn._worklist.push(use);
+          igvn.rehash_node_delayed(use);
           uint uses_found = 0;
           for (uint j = 0; j < use->len(); j++) {
             if (use->in(j) == old_phi) {
@@ -1186,10 +1192,8 @@
       phi->init_req(LoopNode::LoopBackControl, old_phi->in(outer_idx));
       phi = igvn.register_new_node_with_optimizer(phi, old_phi);
       // Make old Phi point to new Phi on the fall-in path
-      igvn.hash_delete(old_phi);
-      old_phi->set_req(LoopNode::EntryControl, phi);
+      igvn.replace_input_of(old_phi, LoopNode::EntryControl, phi);
       old_phi->del_req(outer_idx);
-      igvn._worklist.push(old_phi);
     }
   }
 
@@ -1529,10 +1533,8 @@
 void IdealLoopTree::check_safepts(VectorSet &visited, Node_List &stack) {
   // Bottom up traversal
   IdealLoopTree* ch = _child;
-  while (ch != NULL) {
-    ch->check_safepts(visited, stack);
-    ch = ch->_next;
-  }
+  if (_child) _child->check_safepts(visited, stack);
+  if (_next)  _next ->check_safepts(visited, stack);
 
   if (!_head->is_CountedLoop() && !_has_sfpt && _parent != NULL && !_irreducible) {
     bool  has_call         = false; // call on dom-path
@@ -1705,29 +1707,39 @@
       phase->is_counted_loop(_head, this)) {
     _has_sfpt = 1;              // Indicate we do not need a safepoint here
 
-    // Look for a safepoint to remove
-    for (Node* n = tail(); n != _head; n = phase->idom(n))
-      if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this &&
-          phase->is_deleteable_safept(n))
-        phase->lazy_replace(n,n->in(TypeFunc::Control));
+    // Look for safepoints to remove.
+    Node_List* sfpts = _safepts;
+    if (sfpts != NULL) {
+      for (uint i = 0; i < sfpts->size(); i++) {
+        Node* n = sfpts->at(i);
+        assert(phase->get_loop(n) == this, "");
+        if (phase->is_deleteable_safept(n)) {
+          phase->lazy_replace(n, n->in(TypeFunc::Control));
+        }
+      }
+    }
 
     // Look for induction variables
     phase->replace_parallel_iv(this);
 
   } else if (_parent != NULL && !_irreducible) {
     // Not a counted loop.
-    // Look for a safepoint on the idom-path to remove, preserving the first one
-    bool found = false;
-    Node* n = tail();
-    for (; n != _head && !found; n = phase->idom(n)) {
-      if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this)
-        found = true; // Found one
+    // Look for a safepoint on the idom-path.
+    Node* sfpt = tail();
+    for (; sfpt != _head; sfpt = phase->idom(sfpt)) {
+      if (sfpt->Opcode() == Op_SafePoint && phase->get_loop(sfpt) == this)
+        break; // Found one
     }
-    // Skip past it and delete the others
-    for (; n != _head; n = phase->idom(n)) {
-      if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this &&
-          phase->is_deleteable_safept(n))
-        phase->lazy_replace(n,n->in(TypeFunc::Control));
+    // Delete other safepoints in this loop.
+    Node_List* sfpts = _safepts;
+    if (sfpts != NULL && sfpt != _head && sfpt->Opcode() == Op_SafePoint) {
+      for (uint i = 0; i < sfpts->size(); i++) {
+        Node* n = sfpts->at(i);
+        assert(phase->get_loop(n) == this, "");
+        if (n != sfpt && phase->is_deleteable_safept(n)) {
+          phase->lazy_replace(n, n->in(TypeFunc::Control));
+        }
+      }
     }
   }
 
@@ -1776,6 +1788,8 @@
     if (stride_con > 0) tty->print("+");
     tty->print("%d", stride_con);
 
+    tty->print(" (%d iters) ", (int)cl->profile_trip_cnt());
+
     if (cl->is_pre_loop ()) tty->print(" pre" );
     if (cl->is_main_loop()) tty->print(" main");
     if (cl->is_post_loop()) tty->print(" post");
@@ -1992,9 +2006,7 @@
     // we do it here.
     for( uint i = 1; i < C->root()->req(); i++ ) {
       if( !_nodes[C->root()->in(i)->_idx] ) {    // Dead path into Root?
-        _igvn.hash_delete(C->root());
-        C->root()->del_req(i);
-        _igvn._worklist.push(C->root());
+        _igvn.delete_input_of(C->root(), i);
         i--;                      // Rerun same iteration on compressed edges
       }
     }
@@ -2756,7 +2768,8 @@
         // Do not count uncommon calls
         if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
           Node *iff = n->in(0)->in(0);
-          if( !iff->is_If() ||
+          // No any calls for vectorized loops.
+          if( UseSuperWord || !iff->is_If() ||
               (n->in(0)->Opcode() == Op_IfFalse &&
                (1.0 - iff->as_If()->_prob) >= 0.01) ||
               (iff->as_If()->_prob >= 0.01) )
@@ -2768,6 +2781,10 @@
         // if the allocation is not eliminated for some reason.
         innermost->_allow_optimizations = false;
         innermost->_has_call = 1; // = true
+      } else if (n->Opcode() == Op_SafePoint) {
+        // Record all safepoints in this loop.
+        if (innermost->_safepts == NULL) innermost->_safepts = new Node_List();
+        innermost->_safepts->push(n);
       }
     }
   }
@@ -2818,6 +2835,9 @@
               is_deleteable_safept(n)) {
             Node *in = n->in(TypeFunc::Control);
             lazy_replace(n,in);       // Pull safepoint now
+            if (ilt->_safepts != NULL) {
+              ilt->_safepts->yank(n);
+            }
             // Carry on with the recursion "as if" we are walking
             // only the control input
             if( !visited.test_set( in->_idx ) ) {
@@ -3221,7 +3241,8 @@
     case Op_ModF:
     case Op_ModD:
     case Op_LoadB:              // Same with Loads; they can sink
-    case Op_LoadUS:             // during loop optimizations.
+    case Op_LoadUB:             // during loop optimizations.
+    case Op_LoadUS:
     case Op_LoadD:
     case Op_LoadF:
     case Op_LoadI:
--- a/src/share/vm/opto/loopnode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/loopnode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -336,6 +336,7 @@
         _has_sfpt:1,            // True if has non-call safepoint
         _rce_candidate:1;       // True if candidate for range check elimination
 
+  Node_List* _safepts;          // List of safepoints in this loop
   Node_List* _required_safept;  // A inner loop cannot delete these safepts;
   bool  _allow_optimizations;   // Allow loop optimizations
 
@@ -343,6 +344,7 @@
     : _parent(0), _next(0), _child(0),
       _head(head), _tail(tail),
       _phase(phase),
+      _safepts(NULL),
       _required_safept(NULL),
       _allow_optimizations(true),
       _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0)
--- a/src/share/vm/opto/loopopts.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/loopopts.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -216,9 +216,7 @@
   Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
   set_ctrl(con, C->root()); // Constant gets a new use
   // Hack the dominated test
-  _igvn.hash_delete(iff);
-  iff->set_req(1, con);
-  _igvn._worklist.push(iff);
+  _igvn.replace_input_of(iff, 1, con);
 
   // If I dont have a reachable TRUE and FALSE path following the IfNode then
   // I can assume this path reaches an infinite loop.  In this case it's not
@@ -245,10 +243,8 @@
     Node* cd = dp->fast_out(i); // Control-dependent node
     if (cd->depends_only_on_test()) {
       assert(cd->in(0) == dp, "");
-      _igvn.hash_delete(cd);
-      cd->set_req(0, prevdom);
+      _igvn.replace_input_of(cd, 0, prevdom);
       set_early_ctrl(cd);
-      _igvn._worklist.push(cd);
       IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
       if (old_loop != new_loop) {
         if (!old_loop->_child) old_loop->_body.yank(cd);
@@ -952,8 +948,7 @@
         if (!n->is_Load() || late_load_ctrl != n_ctrl) {
           for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
             Node *u = n->last_out(j); // Clone private computation per use
-            _igvn.hash_delete(u);
-            _igvn._worklist.push(u);
+            _igvn.rehash_node_delayed(u);
             Node *x = n->clone(); // Clone computation
             Node *x_ctrl = NULL;
             if( u->is_Phi() ) {
@@ -1089,9 +1084,7 @@
   for( i = 1; i < phi->req(); i++ ) {
     Node *b = phi->in(i);
     if( b->is_Phi() ) {
-      _igvn.hash_delete(phi);
-      _igvn._worklist.push(phi);
-      phi->set_req(i, clone_iff( b->as_Phi(), loop ));
+      _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
     } else {
       assert( b->is_Bool(), "" );
     }
@@ -1161,9 +1154,7 @@
   for( i = 1; i < phi->req(); i++ ) {
     Node *b = phi->in(i);
     if( b->is_Phi() ) {
-      _igvn.hash_delete(phi);
-      _igvn._worklist.push(phi);
-      phi->set_req(i, clone_bool( b->as_Phi(), loop ));
+      _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
     } else {
       assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
     }
@@ -1347,8 +1338,7 @@
         // The original user of 'use' uses 'r' instead.
         for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
           Node* useuse = use->last_out(l);
-          _igvn.hash_delete(useuse);
-          _igvn._worklist.push(useuse);
+          _igvn.rehash_node_delayed(useuse);
           uint uses_found = 0;
           if( useuse->in(0) == use ) {
             useuse->set_req(0, r);
@@ -1435,9 +1425,7 @@
         if( use->is_Phi() )     // Phi use is in prior block
           cfg = prev->in(idx);  // NOT in block of Phi itself
         if (cfg->is_top()) {    // Use is dead?
-          _igvn.hash_delete(use);
-          _igvn._worklist.push(use);
-          use->set_req(idx, C->top());
+          _igvn.replace_input_of(use, idx, C->top());
           continue;
         }
 
@@ -1487,9 +1475,7 @@
           set_ctrl(phi, prev);
         }
         // Make 'use' use the Phi instead of the old loop body exit value
-        _igvn.hash_delete(use);
-        _igvn._worklist.push(use);
-        use->set_req(idx, phi);
+        _igvn.replace_input_of(use, idx, phi);
         if( use->_idx >= new_counter ) { // If updating new phis
           // Not needed for correctness, but prevents a weak assert
           // in AddPNode from tripping (when we end up with different
@@ -1517,9 +1503,7 @@
       Node *iff = split_if_set->pop();
       if( iff->in(1)->is_Phi() ) {
         BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
-        _igvn.hash_delete(iff);
-        _igvn._worklist.push(iff);
-        iff->set_req(1, b);
+        _igvn.replace_input_of(iff, 1, b);
       }
     }
   }
@@ -1529,9 +1513,7 @@
       Node *phi = b->in(1);
       assert( phi->is_Phi(), "" );
       CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
-      _igvn.hash_delete(b);
-      _igvn._worklist.push(b);
-      b->set_req(1, cmp);
+      _igvn.replace_input_of(b, 1, cmp);
     }
   }
   if( split_cex_set ) {
@@ -1686,10 +1668,8 @@
   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
   int ddepth = dom_depth(proj);
 
-  _igvn.hash_delete(iff);
-  _igvn._worklist.push(iff);
-  _igvn.hash_delete(proj);
-  _igvn._worklist.push(proj);
+  _igvn.rehash_node_delayed(iff);
+  _igvn.rehash_node_delayed(proj);
 
   proj->set_req(0, NULL);  // temporary disconnect
   ProjNode* proj2 = proj_clone(proj, iff);
@@ -1745,10 +1725,8 @@
   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
   int ddepth = dom_depth(proj);
 
-  _igvn.hash_delete(iff);
-  _igvn._worklist.push(iff);
-  _igvn.hash_delete(proj);
-  _igvn._worklist.push(proj);
+  _igvn.rehash_node_delayed(iff);
+  _igvn.rehash_node_delayed(proj);
 
   proj->set_req(0, NULL);  // temporary disconnect
   ProjNode* proj2 = proj_clone(proj, iff);
@@ -1970,9 +1948,7 @@
 
     // clone "n" and insert it between the inputs of "n" and the use outside the loop
     Node* n_clone = n->clone();
-    _igvn.hash_delete(use);
-    use->set_req(j, n_clone);
-    _igvn._worklist.push(use);
+    _igvn.replace_input_of(use, j, n_clone);
     Node* use_c;
     if (!use->is_Phi()) {
       use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
@@ -2028,8 +2004,7 @@
 #endif
     while( worklist.size() ) {
       Node *use = worklist.pop();
-      _igvn.hash_delete(use);
-      _igvn._worklist.push(use);
+      _igvn.rehash_node_delayed(use);
       for (uint j = 1; j < use->req(); j++) {
         if (use->in(j) == n) {
           use->set_req(j, n_clone);
@@ -2055,9 +2030,7 @@
     _igvn.remove_dead_node(phi);
     phi = hit;
   }
-  _igvn.hash_delete(use);
-  _igvn._worklist.push(use);
-  use->set_req(idx, phi);
+  _igvn.replace_input_of(use, idx, phi);
 }
 
 #ifdef ASSERT
@@ -2630,9 +2603,7 @@
               // use is in loop
               if (old_new[use->_idx] != NULL) { // null for dead code
                 Node* use_clone = old_new[use->_idx];
-                _igvn.hash_delete(use);
-                use->set_req(j, C->top());
-                _igvn._worklist.push(use);
+                _igvn.replace_input_of(use, j, C->top());
                 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
               }
             } else {
@@ -2667,46 +2638,35 @@
     if (!n->is_CFG()           && n->in(0) != NULL        &&
         not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
       Node* n_clone = old_new[n->_idx];
-      _igvn.hash_delete(n_clone);
-      n_clone->set_req(0, new_head_clone);
-      _igvn._worklist.push(n_clone);
+      _igvn.replace_input_of(n_clone, 0, new_head_clone);
     }
   }
 
   // Backedge of the surviving new_head (the clone) is original last_peel
-  _igvn.hash_delete(new_head_clone);
-  new_head_clone->set_req(LoopNode::LoopBackControl, last_peel);
-  _igvn._worklist.push(new_head_clone);
+  _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
 
   // Cut first node in original not_peel set
-  _igvn.hash_delete(new_head);
-  new_head->set_req(LoopNode::EntryControl, C->top());
-  new_head->set_req(LoopNode::LoopBackControl, C->top());
-  _igvn._worklist.push(new_head);
+  _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
+  new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
+  new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
 
   // Copy head_clone back-branch info to original head
   // and remove original head's loop entry and
   // clone head's back-branch
-  _igvn.hash_delete(head);
-  _igvn.hash_delete(head_clone);
-  head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl));
+  _igvn.rehash_node_delayed(head); // Multiple edge updates
+  head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
   head->set_req(LoopNode::LoopBackControl, C->top());
-  head_clone->set_req(LoopNode::LoopBackControl, C->top());
-  _igvn._worklist.push(head);
-  _igvn._worklist.push(head_clone);
+  _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
 
   // Similarly modify the phis
   for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
     Node* use = head->fast_out(k);
     if (use->is_Phi() && use->outcnt() > 0) {
       Node* use_clone = old_new[use->_idx];
-      _igvn.hash_delete(use);
-      _igvn.hash_delete(use_clone);
-      use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl));
+      _igvn.rehash_node_delayed(use); // Multiple edge updates
+      use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
       use->set_req(LoopNode::LoopBackControl, C->top());
-      use_clone->set_req(LoopNode::LoopBackControl, C->top());
-      _igvn._worklist.push(use);
-      _igvn._worklist.push(use_clone);
+      _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
     }
   }
 
@@ -2792,8 +2752,7 @@
       set_ctrl(neg_stride, C->root());
       Node *post = new (C, 3) AddINode( opaq, neg_stride);
       register_new_node( post, u_ctrl );
-      _igvn.hash_delete(use);
-      _igvn._worklist.push(use);
+      _igvn.rehash_node_delayed(use);
       for (uint j = 1; j < use->req(); j++) {
         if (use->in(j) == phi)
           use->set_req(j, post);
--- a/src/share/vm/opto/machnode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/machnode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -439,9 +439,9 @@
   // Don't remateralize somebody with bound inputs - it stretches a
   // fixed register lifetime.
   uint idx = oper_input_base();
-  if( req() > idx ) {
+  if (req() > idx) {
     const RegMask &rm = in_RegMask(idx);
-    if( rm.is_bound1() || rm.is_bound2() )
+    if (rm.is_bound(ideal_reg()))
       return false;
   }
 
--- a/src/share/vm/opto/machnode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/machnode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -319,6 +319,7 @@
 class MachTypeNode : public MachNode {
   virtual uint size_of() const { return sizeof(*this); } // Size is bigger
 public:
+  MachTypeNode( ) {}
   const Type *_bottom_type;
 
   virtual const class Type *bottom_type() const { return _bottom_type; }
@@ -370,12 +371,12 @@
 
 //------------------------------MachConstantNode-------------------------------
 // Machine node that holds a constant which is stored in the constant table.
-class MachConstantNode : public MachNode {
+class MachConstantNode : public MachTypeNode {
 protected:
   Compile::Constant _constant;  // This node's constant.
 
 public:
-  MachConstantNode() : MachNode() {
+  MachConstantNode() : MachTypeNode() {
     init_class_id(Class_MachConstant);
   }
 
--- a/src/share/vm/opto/macro.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/macro.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -409,7 +409,7 @@
   Node *alloc_mem = alloc->in(TypeFunc::Memory);
 
   uint length = mem->req();
-  GrowableArray <Node *> values(length, length, NULL);
+  GrowableArray <Node *> values(length, length, NULL, false);
 
   // create a new Phi for the value
   PhiNode *phi = new (C, length) PhiNode(mem->in(0), phi_type, NULL, instance_id, alias_idx, offset);
@@ -1447,9 +1447,8 @@
   if (!always_slow && _memproj_fallthrough != NULL) {
     for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) {
       Node *use = _memproj_fallthrough->fast_out(i);
-      _igvn.hash_delete(use);
+      _igvn.rehash_node_delayed(use);
       imax -= replace_input(use, _memproj_fallthrough, result_phi_rawmem);
-      _igvn._worklist.push(use);
       // back up iterator
       --i;
     }
@@ -1463,9 +1462,8 @@
     }
     for (DUIterator_Fast imax, i = _memproj_catchall->fast_outs(imax); i < imax; i++) {
       Node *use = _memproj_catchall->fast_out(i);
-      _igvn.hash_delete(use);
+      _igvn.rehash_node_delayed(use);
       imax -= replace_input(use, _memproj_catchall, _memproj_fallthrough);
-      _igvn._worklist.push(use);
       // back up iterator
       --i;
     }
@@ -1481,9 +1479,8 @@
   if (_ioproj_fallthrough != NULL) {
     for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) {
       Node *use = _ioproj_fallthrough->fast_out(i);
-      _igvn.hash_delete(use);
+      _igvn.rehash_node_delayed(use);
       imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o);
-      _igvn._worklist.push(use);
       // back up iterator
       --i;
     }
@@ -1497,9 +1494,8 @@
     }
     for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) {
       Node *use = _ioproj_catchall->fast_out(i);
-      _igvn.hash_delete(use);
+      _igvn.rehash_node_delayed(use);
       imax -= replace_input(use, _ioproj_catchall, _ioproj_fallthrough);
-      _igvn._worklist.push(use);
       // back up iterator
       --i;
     }
@@ -1857,18 +1853,16 @@
       if (alock->box_node() == oldbox && alock->obj_node()->eqv_uncast(obj)) {
         // Replace Box and mark eliminated all related locks and unlocks.
         alock->set_non_esc_obj();
-        _igvn.hash_delete(alock);
+        _igvn.rehash_node_delayed(alock);
         alock->set_box_node(newbox);
-        _igvn._worklist.push(alock);
         next_edge = false;
       }
     }
     if (u->is_FastLock() && u->as_FastLock()->obj_node()->eqv_uncast(obj)) {
       FastLockNode* flock = u->as_FastLock();
       assert(flock->box_node() == oldbox, "sanity");
-      _igvn.hash_delete(flock);
+      _igvn.rehash_node_delayed(flock);
       flock->set_box_node(newbox);
-      _igvn._worklist.push(flock);
       next_edge = false;
     }
 
@@ -1886,9 +1880,7 @@
           Node* box_node = sfn->monitor_box(jvms, idx);
           if (box_node == oldbox && obj_node->eqv_uncast(obj)) {
             int j = jvms->monitor_box_offset(idx);
-            _igvn.hash_delete(u);
-            u->set_req(j, newbox);
-            _igvn._worklist.push(u);
+            _igvn.replace_input_of(u, j, newbox);
             next_edge = false;
           }
         }
--- a/src/share/vm/opto/matcher.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/matcher.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
 #include "opto/rootnode.hpp"
 #include "opto/runtime.hpp"
 #include "opto/type.hpp"
+#include "opto/vectornode.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #ifdef TARGET_ARCH_MODEL_x86_32
@@ -58,18 +59,6 @@
 
 OptoReg::Name OptoReg::c_frame_pointer;
 
-
-
-const int Matcher::base2reg[Type::lastype] = {
-  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
-  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
-  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
-  0, 0/*abio*/,
-  Op_RegP /* Return address */, 0, /* the memories */
-  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
-  0  /*bottom*/
-};
-
 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
 RegMask Matcher::mreg2regmask[_last_Mach_Reg];
 RegMask Matcher::STACK_ONLY_mask;
@@ -107,6 +96,10 @@
   idealreg2spillmask  [Op_RegF] = NULL;
   idealreg2spillmask  [Op_RegD] = NULL;
   idealreg2spillmask  [Op_RegP] = NULL;
+  idealreg2spillmask  [Op_VecS] = NULL;
+  idealreg2spillmask  [Op_VecD] = NULL;
+  idealreg2spillmask  [Op_VecX] = NULL;
+  idealreg2spillmask  [Op_VecY] = NULL;
 
   idealreg2debugmask  [Op_RegI] = NULL;
   idealreg2debugmask  [Op_RegN] = NULL;
@@ -114,6 +107,10 @@
   idealreg2debugmask  [Op_RegF] = NULL;
   idealreg2debugmask  [Op_RegD] = NULL;
   idealreg2debugmask  [Op_RegP] = NULL;
+  idealreg2debugmask  [Op_VecS] = NULL;
+  idealreg2debugmask  [Op_VecD] = NULL;
+  idealreg2debugmask  [Op_VecX] = NULL;
+  idealreg2debugmask  [Op_VecY] = NULL;
 
   idealreg2mhdebugmask[Op_RegI] = NULL;
   idealreg2mhdebugmask[Op_RegN] = NULL;
@@ -121,6 +118,10 @@
   idealreg2mhdebugmask[Op_RegF] = NULL;
   idealreg2mhdebugmask[Op_RegD] = NULL;
   idealreg2mhdebugmask[Op_RegP] = NULL;
+  idealreg2mhdebugmask[Op_VecS] = NULL;
+  idealreg2mhdebugmask[Op_VecD] = NULL;
+  idealreg2mhdebugmask[Op_VecX] = NULL;
+  idealreg2mhdebugmask[Op_VecY] = NULL;
 
   debug_only(_mem_node = NULL;)   // Ideal memory node consumed by mach node
 }
@@ -134,7 +135,7 @@
     warped = OptoReg::add(warped, C->out_preserve_stack_slots());
     if( warped >= _in_arg_limit )
       _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
-    if (!RegMask::can_represent(warped)) {
+    if (!RegMask::can_represent_arg(warped)) {
       // the compiler cannot represent this method's calling sequence
       C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
       return OptoReg::Bad;
@@ -302,7 +303,7 @@
   _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
   assert( is_even(_out_arg_limit), "out_preserve must be even" );
 
-  if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) {
+  if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) {
     // the compiler cannot represent this method's calling sequence
     C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
   }
@@ -428,7 +429,7 @@
 void Matcher::init_first_stack_mask() {
 
   // Allocate storage for spill masks as masks for the appropriate load type.
-  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6);
+  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4));
 
   idealreg2spillmask  [Op_RegN] = &rms[0];
   idealreg2spillmask  [Op_RegI] = &rms[1];
@@ -451,6 +452,11 @@
   idealreg2mhdebugmask[Op_RegD] = &rms[16];
   idealreg2mhdebugmask[Op_RegP] = &rms[17];
 
+  idealreg2spillmask  [Op_VecS] = &rms[18];
+  idealreg2spillmask  [Op_VecD] = &rms[19];
+  idealreg2spillmask  [Op_VecX] = &rms[20];
+  idealreg2spillmask  [Op_VecY] = &rms[21];
+
   OptoReg::Name i;
 
   // At first, start with the empty mask
@@ -462,7 +468,7 @@
     C->FIRST_STACK_mask().Insert(i);
 
   // Add in all bits past the outgoing argument area
-  guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)),
+  guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)),
             "must be able to represent all call arguments in reg mask");
   init = _out_arg_limit;
   for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
@@ -472,21 +478,48 @@
   C->FIRST_STACK_mask().set_AllStack();
 
   // Make spill masks.  Registers for their class, plus FIRST_STACK_mask.
+  RegMask aligned_stack_mask = C->FIRST_STACK_mask();
+  // Keep spill masks aligned.
+  aligned_stack_mask.clear_to_pairs();
+  assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+
+  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
 #ifdef _LP64
   *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
    idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
+   idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask);
+#else
+   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
 #endif
   *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
    idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
   *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
-   idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask());
+   idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask);
   *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
    idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
   *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
-   idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask());
-  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
-   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
+   idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
 
+  if (Matcher::vector_size_supported(T_BYTE,4)) {
+    *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
+     idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,2)) {
+    *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
+     idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,4)) {
+     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
+     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+    *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
+     idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,8)) {
+     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
+     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+    *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
+     idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
+  }
    if (UseFPUForSpilling) {
      // This mask logic assumes that the spill operations are
      // symmetric and that the registers involved are the same size.
@@ -807,6 +840,25 @@
   idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
   idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
   idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
+
+  // Vector regmasks.
+  if (Matcher::vector_size_supported(T_BYTE,4)) {
+    TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
+    MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
+    idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask();
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,2)) {
+    MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD));
+    idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask();
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,4)) {
+    MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX));
+    idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask();
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,8)) {
+    MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY));
+    idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask();
+  }
 }
 
 #ifdef ASSERT
@@ -1063,7 +1115,7 @@
     // that is killed by the call.
     if( warped >= out_arg_limit_per_call )
       out_arg_limit_per_call = OptoReg::add(warped,1);
-    if (!RegMask::can_represent(warped)) {
+    if (!RegMask::can_represent_arg(warped)) {
       C->record_method_not_compilable_all_tiers("unsupported calling sequence");
       return OptoReg::Bad;
     }
@@ -1231,8 +1283,9 @@
   if (is_method_handle_invoke) {
     // Kill some extra stack space in case method handles want to do
     // a little in-place argument insertion.
+    // FIXME: Is this still necessary?
     int regs_per_word  = NOT_LP64(1) LP64_ONLY(2); // %%% make a global const!
-    out_arg_limit_per_call += MethodHandlePushLimit * regs_per_word;
+    out_arg_limit_per_call += methodOopDesc::extra_stack_entries() * regs_per_word;
     // Do not update mcall->_argsize because (a) the extra space is not
     // pushed as arguments and (b) _argsize is dead (not used anywhere).
   }
@@ -1251,7 +1304,7 @@
     // this killed area.
     uint r_cnt = mcall->tf()->range()->cnt();
     MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
-    if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) {
+    if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
       C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
     } else {
       for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
--- a/src/share/vm/opto/matcher.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/matcher.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -250,10 +250,21 @@
   static const bool convL2FSupported(void);
 
   // Vector width in bytes
-  static const uint vector_width_in_bytes(void);
+  static const int vector_width_in_bytes(BasicType bt);
+
+  // Limits on vector size (number of elements).
+  static const int max_vector_size(const BasicType bt);
+  static const int min_vector_size(const BasicType bt);
+  static const bool vector_size_supported(const BasicType bt, int size) {
+    return (Matcher::max_vector_size(bt) >= size &&
+            Matcher::min_vector_size(bt) <= size);
+  }
 
   // Vector ideal reg
-  static const uint vector_ideal_reg(void);
+  static const int vector_ideal_reg(int len);
+
+  // CPU supports misaligned vectors store/load.
+  static const bool misaligned_vectors_ok();
 
   // Used to determine a "low complexity" 64-bit constant.  (Zero is simple.)
   // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
--- a/src/share/vm/opto/memnode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/memnode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1543,6 +1543,7 @@
     // had an original form like p1:(AddP x x (LShiftL quux 3)), where the
     // expression (LShiftL quux 3) independently optimized to the constant 8.
     if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
+        && (_type->isa_vect() == NULL)
         && Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) {
       // t might actually be lower than _type, if _type is a unique
       // concrete subclass of abstract class t.
--- a/src/share/vm/opto/memnode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/memnode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -636,17 +636,6 @@
   virtual bool depends_only_on_test() const { return true; }
 };
 
-//------------------------------LoadLLockedNode---------------------------------
-// Load-locked a pointer from memory (either object or array).
-// On Sparc & Intel this is implemented as a normal long load.
-class LoadLLockedNode : public LoadLNode {
-public:
-  LoadLLockedNode( Node *c, Node *mem, Node *adr )
-    : LoadLNode(c,mem,adr,TypeRawPtr::BOTTOM, TypeLong::LONG) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_StoreLConditional; }
-};
-
 //------------------------------SCMemProjNode---------------------------------------
 // This class defines a projection of the memory  state of a store conditional node.
 // These nodes return a value, but also update memory.
--- a/src/share/vm/opto/mulnode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/mulnode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,9 @@
 class MulNode : public Node {
   virtual uint hash() const;
 public:
-  MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {}
+  MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {
+    init_class_id(Class_Mul);
+  }
 
   // Handle algebraic identities here.  If we have an identity, return the Node
   // we are equivalent to.  We look for "add of zero" as an identity.
--- a/src/share/vm/opto/node.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/node.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1576,6 +1576,9 @@
     } else {
       tty->print("no type");
     }
+  } else if (t->isa_vect() && this->is_MachSpillCopy()) {
+    // Dump MachSpillcopy vector type.
+    t->dump();
   }
   if (is_new) {
     debug_only(dump_orig(debug_orig()));
--- a/src/share/vm/opto/node.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/node.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -100,6 +100,7 @@
 class MemBarStoreStoreNode;
 class MemNode;
 class MergeMemNode;
+class MulNode;
 class MultiNode;
 class MultiBranchNode;
 class NeverBranchNode;
@@ -133,8 +134,8 @@
 class TypeNode;
 class UnlockNode;
 class VectorNode;
-class VectorLoadNode;
-class VectorStoreNode;
+class LoadVectorNode;
+class StoreVectorNode;
 class VectorSet;
 typedef void (*NFunc)(Node&,void*);
 extern "C" {
@@ -362,7 +363,7 @@
 #endif
 
   // Reference to the i'th input Node.  Error if out of bounds.
-  Node* in(uint i) const { assert(i < _max,"oob"); return _in[i]; }
+  Node* in(uint i) const { assert(i < _max, err_msg_res("oob: i=%d, _max=%d", i, _max)); return _in[i]; }
   // Reference to the i'th output Node.  Error if out of bounds.
   // Use this accessor sparingly.  We are going trying to use iterators instead.
   Node* raw_out(uint i) const { assert(i < _outcnt,"oob"); return _out[i]; }
@@ -393,7 +394,7 @@
   void ins_req( uint i, Node *n ); // Insert a NEW required input
   void set_req( uint i, Node *n ) {
     assert( is_not_dead(n), "can not use dead node");
-    assert( i < _cnt, "oob");
+    assert( i < _cnt, err_msg_res("oob: i=%d, _cnt=%d", i, _cnt));
     assert( !VerifyHashTableKeys || _hash_lock == 0,
             "remove node from hash table before modifying it");
     Node** p = &_in[i];    // cache this._in, across the del_out call
@@ -609,9 +610,9 @@
 
     DEFINE_CLASS_ID(Mem,   Node, 4)
       DEFINE_CLASS_ID(Load,  Mem, 0)
-        DEFINE_CLASS_ID(VectorLoad,  Load, 0)
+        DEFINE_CLASS_ID(LoadVector,  Load, 0)
       DEFINE_CLASS_ID(Store, Mem, 1)
-        DEFINE_CLASS_ID(VectorStore, Store, 0)
+        DEFINE_CLASS_ID(StoreVector, Store, 0)
       DEFINE_CLASS_ID(LoadStore, Mem, 2)
 
     DEFINE_CLASS_ID(Region, Node, 5)
@@ -629,8 +630,9 @@
     DEFINE_CLASS_ID(AddP,     Node, 9)
     DEFINE_CLASS_ID(BoxLock,  Node, 10)
     DEFINE_CLASS_ID(Add,      Node, 11)
-    DEFINE_CLASS_ID(Vector,   Node, 12)
-    DEFINE_CLASS_ID(ClearArray, Node, 13)
+    DEFINE_CLASS_ID(Mul,      Node, 12)
+    DEFINE_CLASS_ID(Vector,   Node, 13)
+    DEFINE_CLASS_ID(ClearArray, Node, 14)
 
     _max_classes  = ClassMask_ClearArray
   };
@@ -752,6 +754,7 @@
   DEFINE_CLASS_QUERY(MemBar)
   DEFINE_CLASS_QUERY(MemBarStoreStore)
   DEFINE_CLASS_QUERY(MergeMem)
+  DEFINE_CLASS_QUERY(Mul)
   DEFINE_CLASS_QUERY(Multi)
   DEFINE_CLASS_QUERY(MultiBranch)
   DEFINE_CLASS_QUERY(Parm)
@@ -767,8 +770,8 @@
   DEFINE_CLASS_QUERY(Sub)
   DEFINE_CLASS_QUERY(Type)
   DEFINE_CLASS_QUERY(Vector)
-  DEFINE_CLASS_QUERY(VectorLoad)
-  DEFINE_CLASS_QUERY(VectorStore)
+  DEFINE_CLASS_QUERY(LoadVector)
+  DEFINE_CLASS_QUERY(StoreVector)
   DEFINE_CLASS_QUERY(Unlock)
 
   #undef DEFINE_CLASS_QUERY
--- a/src/share/vm/opto/opcodes.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/opcodes.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,10 @@
   "RegD",
   "RegL",
   "RegFlags",
+  "VecS",
+  "VecD",
+  "VecX",
+  "VecY",
   "_last_machine_leaf",
 #include "classes.hpp"
   "_last_class_name",
--- a/src/share/vm/opto/opcodes.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/opcodes.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,10 @@
   macro(RegF)                   // Machine float   register
   macro(RegD)                   // Machine double  register
   macro(RegL)                   // Machine long    register
+  macro(VecS)                   // Machine vectors register
+  macro(VecD)                   // Machine vectord register
+  macro(VecX)                   // Machine vectorx register
+  macro(VecY)                   // Machine vectory register
   macro(RegFlags)               // Machine flags   register
   _last_machine_leaf,           // Split between regular opcodes and machine
 #include "classes.hpp"
--- a/src/share/vm/opto/output.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/output.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1871,6 +1871,8 @@
   if (!do_scheduling())
     return;
 
+  assert(MaxVectorSize <= 8, "scheduling code works only with pairs");
+
   NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
 
   // Create a data structure for all the scheduling information
--- a/src/share/vm/opto/parse.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/parse.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -84,7 +84,7 @@
   static const char* check_can_parse(ciMethod* callee);
 
   static InlineTree* build_inline_tree_root();
-  static InlineTree* find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee, bool create_if_not_found = false);
+  static InlineTree* find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee);
 
   // For temporary (stack-allocated, stateless) ilts:
   InlineTree(Compile* c, ciMethod* callee_method, JVMState* caller_jvms, float site_invoke_ratio, int max_inline_level);
@@ -527,6 +527,9 @@
   int     repush_if_args();
   void    adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
                               Block* path, Block* other_path);
+  void    sharpen_type_after_if(BoolTest::mask btest,
+                                Node* con, const Type* tcon,
+                                Node* val, const Type* tval);
   IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask);
   Node*   jump_if_join(Node* iffalse, Node* iftrue);
   void    jump_if_true_fork(IfNode *ifNode, int dest_bci_if_true, int prof_table_index);
--- a/src/share/vm/opto/parse1.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/parse1.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -398,7 +398,7 @@
   if (PrintCompilation || PrintOpto) {
     // Make sure I have an inline tree, so I can print messages about it.
     JVMState* ilt_caller = is_osr_parse() ? caller->caller() : caller;
-    InlineTree::find_subtree_from_root(C->ilt(), ilt_caller, parse_method, true);
+    InlineTree::find_subtree_from_root(C->ilt(), ilt_caller, parse_method);
   }
   _max_switch_depth = 0;
   _est_switch_depth = 0;
@@ -1398,8 +1398,8 @@
 #ifdef ASSERT
     int pre_bc_sp = sp();
     int inputs, depth;
-    bool have_se = !stopped() && compute_stack_effects(inputs, depth);
-    assert(!have_se || pre_bc_sp >= inputs, "have enough stack to execute this BC");
+    bool have_se = !stopped() && compute_stack_effects(inputs, depth, /*for_parse*/ true);
+    assert(!have_se || pre_bc_sp >= inputs, err_msg_res("have enough stack to execute this BC: pre_bc_sp=%d, inputs=%d", pre_bc_sp, inputs));
 #endif //ASSERT
 
     do_one_bytecode();
--- a/src/share/vm/opto/parse2.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/parse2.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1233,6 +1233,71 @@
   if (!have_con)                        // remaining adjustments need a con
     return;
 
+  sharpen_type_after_if(btest, con, tcon, val, tval);
+}
+
+
+static Node* extract_obj_from_klass_load(PhaseGVN* gvn, Node* n) {
+  Node* ldk;
+  if (n->is_DecodeN()) {
+    if (n->in(1)->Opcode() != Op_LoadNKlass) {
+      return NULL;
+    } else {
+      ldk = n->in(1);
+    }
+  } else if (n->Opcode() != Op_LoadKlass) {
+    return NULL;
+  } else {
+    ldk = n;
+  }
+  assert(ldk != NULL && ldk->is_Load(), "should have found a LoadKlass or LoadNKlass node");
+
+  Node* adr = ldk->in(MemNode::Address);
+  intptr_t off = 0;
+  Node* obj = AddPNode::Ideal_base_and_offset(adr, gvn, off);
+  if (obj == NULL || off != oopDesc::klass_offset_in_bytes()) // loading oopDesc::_klass?
+    return NULL;
+  const TypePtr* tp = gvn->type(obj)->is_ptr();
+  if (tp == NULL || !(tp->isa_instptr() || tp->isa_aryptr())) // is obj a Java object ptr?
+    return NULL;
+
+  return obj;
+}
+
+void Parse::sharpen_type_after_if(BoolTest::mask btest,
+                                  Node* con, const Type* tcon,
+                                  Node* val, const Type* tval) {
+  // Look for opportunities to sharpen the type of a node
+  // whose klass is compared with a constant klass.
+  if (btest == BoolTest::eq && tcon->isa_klassptr()) {
+    Node* obj = extract_obj_from_klass_load(&_gvn, val);
+    const TypeOopPtr* con_type = tcon->isa_klassptr()->as_instance_type();
+    if (obj != NULL && (con_type->isa_instptr() || con_type->isa_aryptr())) {
+       // Found:
+       //   Bool(CmpP(LoadKlass(obj._klass), ConP(Foo.klass)), [eq])
+       // or the narrowOop equivalent.
+       const Type* obj_type = _gvn.type(obj);
+       const TypeOopPtr* tboth = obj_type->join(con_type)->isa_oopptr();
+       if (tboth != NULL && tboth->klass_is_exact() && tboth != obj_type &&
+           tboth->higher_equal(obj_type)) {
+          // obj has to be of the exact type Foo if the CmpP succeeds.
+          int obj_in_map = map()->find_edge(obj);
+          JVMState* jvms = this->jvms();
+          if (obj_in_map >= 0 &&
+              (jvms->is_loc(obj_in_map) || jvms->is_stk(obj_in_map))) {
+            TypeNode* ccast = new (C, 2) CheckCastPPNode(control(), obj, tboth);
+            const Type* tcc = ccast->as_Type()->type();
+            assert(tcc != obj_type && tcc->higher_equal(obj_type), "must improve");
+            // Delay transform() call to allow recovery of pre-cast value
+            // at the control merge.
+            _gvn.set_type_bottom(ccast);
+            record_for_igvn(ccast);
+            // Here's the payoff.
+            replace_in_map(obj, ccast);
+          }
+       }
+    }
+  }
 
   int val_in_map = map()->find_edge(val);
   if (val_in_map < 0)  return;          // replace_in_map would be useless
@@ -1265,6 +1330,7 @@
         // Exclude tests vs float/double 0 as these could be
         // either +0 or -0.  Just because you are equal to +0
         // doesn't mean you ARE +0!
+        // Note, following code also replaces Long and Oop values.
         if ((!tf || tf->_f != 0.0) &&
             (!td || td->_d != 0.0))
           cast = con;                   // Replace non-constant val by con.
--- a/src/share/vm/opto/phase.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/phase.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,8 +39,9 @@
 
 // The next timers used for LogCompilation
 elapsedTimer Phase::_t_parser;
-elapsedTimer Phase::_t_escapeAnalysis;
 elapsedTimer Phase::_t_optimizer;
+elapsedTimer   Phase::_t_escapeAnalysis;
+elapsedTimer     Phase::_t_connectionGraph;
 elapsedTimer   Phase::_t_idealLoop;
 elapsedTimer   Phase::_t_ccp;
 elapsedTimer Phase::_t_matcher;
@@ -51,6 +52,7 @@
 elapsedTimer Phase::_t_graphReshaping;
 elapsedTimer Phase::_t_scheduler;
 elapsedTimer Phase::_t_blockOrdering;
+elapsedTimer Phase::_t_macroEliminate;
 elapsedTimer Phase::_t_macroExpand;
 elapsedTimer Phase::_t_peephole;
 elapsedTimer Phase::_t_codeGeneration;
@@ -104,6 +106,8 @@
     if (DoEscapeAnalysis) {
       // EA is part of Optimizer.
       tty->print_cr ("      escape analysis: %3.3f sec", Phase::_t_escapeAnalysis.seconds());
+      tty->print_cr ("        connection graph: %3.3f sec", Phase::_t_connectionGraph.seconds());
+      tty->print_cr ("      macroEliminate : %3.3f sec", Phase::_t_macroEliminate.seconds());
     }
     tty->print_cr ("      iterGVN        : %3.3f sec", Phase::_t_iterGVN.seconds());
     tty->print_cr ("      idealLoop      : %3.3f sec", Phase::_t_idealLoop.seconds());
@@ -112,9 +116,10 @@
     tty->print_cr ("      iterGVN2       : %3.3f sec", Phase::_t_iterGVN2.seconds());
     tty->print_cr ("      macroExpand    : %3.3f sec", Phase::_t_macroExpand.seconds());
     tty->print_cr ("      graphReshape   : %3.3f sec", Phase::_t_graphReshaping.seconds());
-    double optimizer_subtotal = Phase::_t_iterGVN.seconds() +
+    double optimizer_subtotal = Phase::_t_iterGVN.seconds() + Phase::_t_iterGVN2.seconds() +
+      Phase::_t_escapeAnalysis.seconds() + Phase::_t_macroEliminate.seconds() +
       Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
-      Phase::_t_graphReshaping.seconds();
+      Phase::_t_macroExpand.seconds() + Phase::_t_graphReshaping.seconds();
     double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0));
     tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", optimizer_subtotal, percent_of_optimizer);
   }
--- a/src/share/vm/opto/phase.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/phase.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -72,8 +72,12 @@
 
 // The next timers used for LogCompilation
   static elapsedTimer _t_parser;
-  static elapsedTimer _t_escapeAnalysis;
   static elapsedTimer _t_optimizer;
+public:
+  // ConnectionGraph can't be Phase since it is used after EA done.
+  static elapsedTimer   _t_escapeAnalysis;
+  static elapsedTimer     _t_connectionGraph;
+protected:
   static elapsedTimer   _t_idealLoop;
   static elapsedTimer   _t_ccp;
   static elapsedTimer _t_matcher;
@@ -84,6 +88,7 @@
   static elapsedTimer _t_graphReshaping;
   static elapsedTimer _t_scheduler;
   static elapsedTimer _t_blockOrdering;
+  static elapsedTimer _t_macroEliminate;
   static elapsedTimer _t_macroExpand;
   static elapsedTimer _t_peephole;
   static elapsedTimer _t_codeGeneration;
--- a/src/share/vm/opto/phaseX.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/phaseX.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -757,6 +757,7 @@
 //------------------------------PhaseIterGVN-----------------------------------
 // Initialize hash table to fresh and clean for +VerifyOpto
 PhaseIterGVN::PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ) : PhaseGVN(igvn,dummy), _worklist( ),
+                                                                      _stack(C->unique() >> 1),
                                                                       _delay_transform(false) {
 }
 
@@ -764,6 +765,7 @@
 // Initialize with previous PhaseIterGVN info; used by PhaseCCP
 PhaseIterGVN::PhaseIterGVN( PhaseIterGVN *igvn ) : PhaseGVN(igvn),
                                                    _worklist( igvn->_worklist ),
+                                                   _stack( igvn->_stack ),
                                                    _delay_transform(igvn->_delay_transform)
 {
 }
@@ -772,6 +774,7 @@
 // Initialize with previous PhaseGVN info from Parser
 PhaseIterGVN::PhaseIterGVN( PhaseGVN *gvn ) : PhaseGVN(gvn),
                                               _worklist(*C->for_igvn()),
+                                              _stack(C->unique() >> 1),
                                               _delay_transform(false)
 {
   uint max;
@@ -1138,51 +1141,77 @@
 // Kill a globally dead Node.  All uses are also globally dead and are
 // aggressively trimmed.
 void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
-  assert(dead != C->root(), "killing root, eh?");
-  if (dead->is_top())  return;
-  NOT_PRODUCT( set_progress(); )
-  // Remove from iterative worklist
-  _worklist.remove(dead);
-  if (!dead->is_Con()) { // Don't kill cons but uses
-    // Remove from hash table
-    _table.hash_delete( dead );
-    // Smash all inputs to 'dead', isolating him completely
-    for( uint i = 0; i < dead->req(); i++ ) {
-      Node *in = dead->in(i);
-      if( in ) {                 // Points to something?
-        dead->set_req(i,NULL);  // Kill the edge
-        if (in->outcnt() == 0 && in != C->top()) {// Made input go dead?
-          remove_dead_node(in); // Recursively remove
-        } else if (in->outcnt() == 1 &&
-                   in->has_special_unique_user()) {
-          _worklist.push(in->unique_out());
-        } else if (in->outcnt() <= 2 && dead->is_Phi()) {
-          if( in->Opcode() == Op_Region )
-            _worklist.push(in);
-          else if( in->is_Store() ) {
-            DUIterator_Fast imax, i = in->fast_outs(imax);
-            _worklist.push(in->fast_out(i));
-            i++;
-            if(in->outcnt() == 2) {
-              _worklist.push(in->fast_out(i));
-              i++;
+  enum DeleteProgress {
+    PROCESS_INPUTS,
+    PROCESS_OUTPUTS
+  };
+  assert(_stack.is_empty(), "not empty");
+  _stack.push(dead, PROCESS_INPUTS);
+
+  while (_stack.is_nonempty()) {
+    dead = _stack.node();
+    uint progress_state = _stack.index();
+    assert(dead != C->root(), "killing root, eh?");
+    assert(!dead->is_top(), "add check for top when pushing");
+    NOT_PRODUCT( set_progress(); )
+    if (progress_state == PROCESS_INPUTS) {
+      // After following inputs, continue to outputs
+      _stack.set_index(PROCESS_OUTPUTS);
+      // Remove from iterative worklist
+      _worklist.remove(dead);
+      if (!dead->is_Con()) { // Don't kill cons but uses
+        bool recurse = false;
+        // Remove from hash table
+        _table.hash_delete( dead );
+        // Smash all inputs to 'dead', isolating him completely
+        for( uint i = 0; i < dead->req(); i++ ) {
+          Node *in = dead->in(i);
+          if( in ) {                 // Points to something?
+            dead->set_req(i,NULL);  // Kill the edge
+            if (in->outcnt() == 0 && in != C->top()) {// Made input go dead?
+              _stack.push(in, PROCESS_INPUTS); // Recursively remove
+              recurse = true;
+            } else if (in->outcnt() == 1 &&
+                       in->has_special_unique_user()) {
+              _worklist.push(in->unique_out());
+            } else if (in->outcnt() <= 2 && dead->is_Phi()) {
+              if( in->Opcode() == Op_Region )
+                _worklist.push(in);
+              else if( in->is_Store() ) {
+                DUIterator_Fast imax, i = in->fast_outs(imax);
+                _worklist.push(in->fast_out(i));
+                i++;
+                if(in->outcnt() == 2) {
+                  _worklist.push(in->fast_out(i));
+                  i++;
+                }
+                assert(!(i < imax), "sanity");
+              }
             }
-            assert(!(i < imax), "sanity");
           }
         }
+
+        if (dead->is_macro()) {
+          C->remove_macro_node(dead);
+        }
+
+        if (recurse) {
+          continue;
+        }
       }
     }
 
-    if (dead->is_macro()) {
-      C->remove_macro_node(dead);
+    // Aggressively kill globally dead uses
+    // (Rather than pushing all the outs at once, we push one at a time,
+    // plus the parent to resume later, because of the indefinite number
+    // of edge deletions per loop trip.)
+    if (dead->outcnt() > 0) {
+      // Recursively remove
+      _stack.push(dead->raw_out(0), PROCESS_INPUTS);
+    } else {
+      _stack.pop();
     }
   }
-  // Aggressively kill globally dead uses
-  // (Cannot use DUIterator_Last because of the indefinite number
-  // of edge deletions per loop trip.)
-  while (dead->outcnt() > 0) {
-    remove_globally_dead_node(dead->raw_out(0));
-  }
 }
 
 //------------------------------subsume_node-----------------------------------
--- a/src/share/vm/opto/phaseX.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/phaseX.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -193,6 +193,7 @@
   // If you want the type of a very new (untransformed) node,
   // you must use type_or_null, and test the result for NULL.
   const Type* type(const Node* n) const {
+    assert(n != NULL, "must not be null");
     const Type* t = _types.fast_lookup(n->_idx);
     assert(t != NULL, "must set before get");
     return t;
@@ -403,6 +404,8 @@
   // Subsume users of node 'old' into node 'nn'
   void subsume_node( Node *old, Node *nn );
 
+  Node_Stack _stack;      // Stack used to avoid recursion
+
 protected:
 
   // Idealize new Node 'n' with respect to its inputs and its value
@@ -438,8 +441,8 @@
   // It is significant only for debugging and profiling.
   Node* register_new_node_with_optimizer(Node* n, Node* orig = NULL);
 
-  // Kill a globally dead Node.   It is allowed to have uses which are
-  // assumed dead and left 'in limbo'.
+  // Kill a globally dead Node.  All uses are also globally dead and are
+  // aggressively trimmed.
   void remove_globally_dead_node( Node *dead );
 
   // Kill all inputs to a dead node, recursively making more dead nodes.
@@ -460,6 +463,25 @@
     subsume_node(old, nn);
   }
 
+  // Delayed node rehash: remove a node from the hash table and rehash it during
+  // next optimizing pass
+  void rehash_node_delayed(Node* n) {
+    hash_delete(n);
+    _worklist.push(n);
+  }
+
+  // Replace ith edge of "n" with "in"
+  void replace_input_of(Node* n, int i, Node* in) {
+    rehash_node_delayed(n);
+    n->set_req(i, in);
+  }
+
+  // Delete ith edge of "n"
+  void delete_input_of(Node* n, int i) {
+    rehash_node_delayed(n);
+    n->del_req(i);
+  }
+
   bool delay_transform() const { return _delay_transform; }
 
   void set_delay_transform(bool delay) {
--- a/src/share/vm/opto/postaloc.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/postaloc.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,13 +27,15 @@
 #include "opto/chaitin.hpp"
 #include "opto/machnode.hpp"
 
-// see if this register kind does not requires two registers
-static bool is_single_register(uint x) {
-#ifdef _LP64
-  return (x != Op_RegD && x != Op_RegL && x != Op_RegP);
-#else
-  return (x != Op_RegD && x != Op_RegL);
-#endif
+// See if this register (or pairs, or vector) already contains the value.
+static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
+                                    Node_List& value) {
+  for (int i = 0; i < n_regs; i++) {
+    OptoReg::Name nreg = OptoReg::add(reg,-i);
+    if (value[nreg] != val)
+      return false;
+  }
+  return true;
 }
 
 //---------------------------may_be_copy_of_callee-----------------------------
@@ -167,9 +169,11 @@
   const RegMask &use_mask = n->in_RegMask(idx);
   bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
                                                    : (use_mask.is_AllStack() != 0));
-  // Check for a copy to or from a misaligned pair.
-  can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair();
-
+  if (!RegMask::is_vector(def->ideal_reg())) {
+    // Check for a copy to or from a misaligned pair.
+    // It is workaround for a sparc with misaligned pairs.
+    can_use = can_use && !use_mask.is_misaligned_pair() && !def_lrg.mask().is_misaligned_pair();
+  }
   if (!can_use)
     return 0;
 
@@ -263,18 +267,16 @@
     val = skip_copies(n->in(k));
   }
 
-  if( val == x ) return blk_adjust; // No progress?
+  if (val == x) return blk_adjust; // No progress?
 
-  bool single = is_single_register(val->ideal_reg());
+  int n_regs = RegMask::num_registers(val->ideal_reg());
   uint val_idx = n2lidx(val);
   OptoReg::Name val_reg = lrgs(val_idx).reg();
 
   // See if it happens to already be in the correct register!
   // (either Phi's direct register, or the common case of the name
   // never-clobbered original-def register)
-  if( value[val_reg] == val &&
-      // Doubles check both halves
-      ( single || value[val_reg-1] == val ) ) {
+  if (register_contains_value(val, val_reg, n_regs, value)) {
     blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
     if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
       return blk_adjust;
@@ -306,9 +308,10 @@
     }
 
     Node *vv = value[reg];
-    if( !single ) {             // Doubles check for aligned-adjacent pair
-      if( (reg&1)==0 ) continue;  // Wrong half of a pair
-      if( vv != value[reg-1] ) continue; // Not a complete pair
+    if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
+      uint last = (n_regs-1); // Looking for the last part of a set
+      if ((reg&last) != last) continue; // Wrong part of a set
+      if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
     }
     if( vv == val ||            // Got a direct hit?
         (t && vv && vv->bottom_type() == t && vv->is_Mach() &&
@@ -526,8 +529,9 @@
       if( pidx ) {
         value.map(preg,phi);
         regnd.map(preg,phi);
-        OptoReg::Name preg_lo = OptoReg::add(preg,-1);
-        if( !is_single_register(phi->ideal_reg()) ) {
+        int n_regs = RegMask::num_registers(phi->ideal_reg());
+        for (int l = 1; l < n_regs; l++) {
+          OptoReg::Name preg_lo = OptoReg::add(preg,-l);
           value.map(preg_lo,phi);
           regnd.map(preg_lo,phi);
         }
@@ -568,13 +572,16 @@
             value.map(ureg,valdef); // record improved reaching-def info
             regnd.map(ureg,   def);
             // Record other half of doubles
-            OptoReg::Name ureg_lo = OptoReg::add(ureg,-1);
-            if( !is_single_register(def->ideal_reg()) &&
-                ( !RegMask::can_represent(ureg_lo) ||
-                  lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent
-                !value[ureg_lo] ) {
-              value.map(ureg_lo,valdef); // record improved reaching-def info
-              regnd.map(ureg_lo,   def);
+            uint def_ideal_reg = def->ideal_reg();
+            int n_regs = RegMask::num_registers(def_ideal_reg);
+            for (int l = 1; l < n_regs; l++) {
+              OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
+              if (!value[ureg_lo] &&
+                  (!RegMask::can_represent(ureg_lo) ||
+                   lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent
+                value.map(ureg_lo,valdef); // record improved reaching-def info
+                regnd.map(ureg_lo,   def);
+              }
             }
           }
         }
@@ -607,7 +614,8 @@
       }
 
       uint n_ideal_reg = n->ideal_reg();
-      if( is_single_register(n_ideal_reg) ) {
+      int n_regs = RegMask::num_registers(n_ideal_reg);
+      if (n_regs == 1) {
         // If Node 'n' does not change the value mapped by the register,
         // then 'n' is a useless copy.  Do not update the register->node
         // mapping so 'n' will go dead.
@@ -625,6 +633,25 @@
           assert( n->is_Copy(), "" );
           j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
         }
+      } else if (RegMask::is_vector(n_ideal_reg)) {
+        // If Node 'n' does not change the value mapped by the register,
+        // then 'n' is a useless copy.  Do not update the register->node
+        // mapping so 'n' will go dead.
+        if (!register_contains_value(val, nreg, n_regs, value)) {
+          // Update the mapping: record new Node defined by the register
+          regnd.map(nreg,n);
+          // Update mapping for defined *value*, which is the defined
+          // Node after skipping all copies.
+          value.map(nreg,val);
+          for (int l = 1; l < n_regs; l++) {
+            OptoReg::Name nreg_lo = OptoReg::add(nreg,-l);
+            regnd.map(nreg_lo, n );
+            value.map(nreg_lo,val);
+          }
+        } else if (n->is_Copy()) {
+          // Note: vector can't be constant and can't be copy of calee.
+          j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+        }
       } else {
         // If the value occupies a register pair, record same info
         // in both registers.
--- a/src/share/vm/opto/reg_split.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/reg_split.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,12 +74,13 @@
   const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
   const RegMask *w_o_mask;
 
+  int num_regs = RegMask::num_registers(ireg);
+  bool is_vect = RegMask::is_vector(ireg);
   if( w_mask->overlap( *o_mask ) && // Overlap AND
-      ((ireg != Op_RegL && ireg != Op_RegD // Single use or aligned
-#ifdef _LP64
-        && ireg != Op_RegP
-#endif
-         ) || o_mask->is_aligned_Pairs()) ) {
+      ((num_regs == 1) // Single use or aligned
+        ||  is_vect    // or vector
+        || !is_vect && o_mask->is_aligned_pairs()) ) {
+    assert(!is_vect || o_mask->is_aligned_sets(num_regs), "vectors are aligned");
     // Don't come here for mis-aligned doubles
     w_o_mask = w_mask;
   } else {                      // wide ideal mask does not overlap with o_mask
@@ -400,15 +401,17 @@
   // CNC - Turned off 7/8/99, causes too much spilling
   // if( lrg->_is_bound ) return false;
 
+  // Use float pressure numbers for vectors.
+  bool is_float_or_vector = lrg->_is_float || lrg->_is_vector;
   // Not yet reached the high-pressure cutoff point, so low pressure
-  uint hrp_idx = lrg->_is_float ? b->_fhrp_index : b->_ihrp_index;
+  uint hrp_idx = is_float_or_vector ? b->_fhrp_index : b->_ihrp_index;
   if( insidx < hrp_idx ) return false;
   // Register pressure for the block as a whole depends on reg class
-  int block_pres = lrg->_is_float ? b->_freg_pressure : b->_reg_pressure;
+  int block_pres = is_float_or_vector ? b->_freg_pressure : b->_reg_pressure;
   // Bound live ranges will split at the binding points first;
   // Intermediate splits should assume the live range's register set
   // got "freed up" and that num_regs will become INT_PRESSURE.
-  int bound_pres = lrg->_is_float ? FLOATPRESSURE : INTPRESSURE;
+  int bound_pres = is_float_or_vector ? FLOATPRESSURE : INTPRESSURE;
   // Effective register pressure limit.
   int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
     ? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
@@ -446,9 +449,12 @@
 // USES: If USE is in HRP, split at use to leave main LRG on stack.
 //       Else, hoist LRG back up to register only (ie - split is also DEF)
 // We will compute a new maxlrg as we go
-uint PhaseChaitin::Split( uint maxlrg ) {
+uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
   NOT_PRODUCT( Compile::TracePhase t3("regAllocSplit", &_t_regAllocSplit, TimeCompiler); )
 
+  // Free thread local resources used by this method on exit.
+  ResourceMark rm(split_arena);
+
   uint                 bidx, pidx, slidx, insidx, inpidx, twoidx;
   uint                 non_phi = 1, spill_cnt = 0;
   Node               **Reachblock;
@@ -458,14 +464,17 @@
   bool                 u1, u2, u3;
   Block               *b, *pred;
   PhiNode             *phi;
-  GrowableArray<uint>  lidxs;
+  GrowableArray<uint>  lidxs(split_arena, _maxlrg, 0, 0);
 
   // Array of counters to count splits per live range
-  GrowableArray<uint>  splits;
+  GrowableArray<uint>  splits(split_arena, _maxlrg, 0, 0);
+
+#define NEW_SPLIT_ARRAY(type, size)\
+  (type*) split_arena->allocate_bytes((size) * sizeof(type))
 
   //----------Setup Code----------
   // Create a convenient mapping from lrg numbers to reaches/leaves indices
-  uint *lrg2reach = NEW_RESOURCE_ARRAY( uint, _maxlrg );
+  uint *lrg2reach = NEW_SPLIT_ARRAY( uint, _maxlrg );
   // Keep track of DEFS & Phis for later passes
   defs = new Node_List();
   phis = new Node_List();
@@ -497,15 +506,15 @@
   // a Def is UP or DOWN.  UP means that it should get a register (ie -
   // it is always in LRP regions), and DOWN means that it is probably
   // on the stack (ie - it crosses HRP regions).
-  Node ***Reaches     = NEW_RESOURCE_ARRAY( Node**, _cfg._num_blocks+1 );
-  bool  **UP          = NEW_RESOURCE_ARRAY( bool*, _cfg._num_blocks+1 );
-  Node  **debug_defs  = NEW_RESOURCE_ARRAY( Node*, spill_cnt );
-  VectorSet **UP_entry= NEW_RESOURCE_ARRAY( VectorSet*, spill_cnt );
+  Node ***Reaches     = NEW_SPLIT_ARRAY( Node**, _cfg._num_blocks+1 );
+  bool  **UP          = NEW_SPLIT_ARRAY( bool*, _cfg._num_blocks+1 );
+  Node  **debug_defs  = NEW_SPLIT_ARRAY( Node*, spill_cnt );
+  VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt );
 
   // Initialize Reaches & UP
   for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) {
-    Reaches[bidx]     = NEW_RESOURCE_ARRAY( Node*, spill_cnt );
-    UP[bidx]          = NEW_RESOURCE_ARRAY( bool, spill_cnt );
+    Reaches[bidx]     = NEW_SPLIT_ARRAY( Node*, spill_cnt );
+    UP[bidx]          = NEW_SPLIT_ARRAY( bool, spill_cnt );
     Node **Reachblock = Reaches[bidx];
     bool *UPblock     = UP[bidx];
     for( slidx = 0; slidx < spill_cnt; slidx++ ) {
@@ -514,9 +523,11 @@
     }
   }
 
+#undef NEW_SPLIT_ARRAY
+
   // Initialize to array of empty vectorsets
   for( slidx = 0; slidx < spill_cnt; slidx++ )
-    UP_entry[slidx] = new VectorSet(Thread::current()->resource_area());
+    UP_entry[slidx] = new VectorSet(split_arena);
 
   //----------PASS 1----------
   //----------Propagation & Node Insertion Code----------
@@ -794,12 +805,15 @@
                   if( i < n->req() ) break;
                   insert_point--;
                 }
+                uint orig_eidx = b->end_idx();
                 maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
                 // If it wasn't split bail
                 if (!maxlrg) {
                   return 0;
                 }
-                insidx++;
+                // Spill of NULL check mem op goes into the following block.
+                if (b->end_idx() > orig_eidx)
+                  insidx++;
               }
               // This is a new DEF, so update UP
               UPblock[slidx] = false;
@@ -960,7 +974,7 @@
             // Grab register mask info
             const RegMask &dmask = def->out_RegMask();
             const RegMask &umask = n->in_RegMask(inpidx);
-
+            bool is_vect = RegMask::is_vector(def->ideal_reg());
             assert(inpidx < oopoff, "cannot use-split oop map info");
 
             bool dup = UPblock[slidx];
@@ -972,7 +986,7 @@
             if( !umask.is_AllStack() &&
                 (int)umask.Size() <= lrgs(useidx).num_regs() &&
                 (!def->rematerialize() ||
-                 umask.is_misaligned_Pair())) {
+                 !is_vect && umask.is_misaligned_pair())) {
               // These need a Split regardless of overlap or pressure
               // SPLIT - NO DEF - NO CISC SPILL
               maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
@@ -1123,10 +1137,12 @@
         // Grab UP info for DEF
         const RegMask &dmask = n->out_RegMask();
         bool defup = dmask.is_UP();
+        int ireg = n->ideal_reg();
+        bool is_vect = RegMask::is_vector(ireg);
         // Only split at Def if this is a HRP block or bound (and spilled once)
         if( !n->rematerialize() &&
-            (((dmask.is_bound1() || dmask.is_bound2() || dmask.is_misaligned_Pair()) &&
-             (deflrg._direct_conflict || deflrg._must_spill)) ||
+            (((dmask.is_bound(ireg) || !is_vect && dmask.is_misaligned_pair()) &&
+              (deflrg._direct_conflict || deflrg._must_spill)) ||
              // Check for LRG being up in a register and we are inside a high
              // pressure area.  Spill it down immediately.
              (defup && is_high_pressure(b,&deflrg,insidx))) ) {
--- a/src/share/vm/opto/regmask.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/regmask.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,11 +129,34 @@
   0
 );
 
+//=============================================================================
+bool RegMask::is_vector(uint ireg) {
+  return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY);
+}
+
+int RegMask::num_registers(uint ireg) {
+    switch(ireg) {
+      case Op_VecY:
+        return 8;
+      case Op_VecX:
+        return 4;
+      case Op_VecD:
+      case Op_RegD:
+      case Op_RegL:
+#ifdef _LP64
+      case Op_RegP:
+#endif
+        return 2;
+    }
+    // Op_VecS and the rest ideal registers.
+    return 1;
+}
+
 //------------------------------find_first_pair--------------------------------
 // Find the lowest-numbered register pair in the mask.  Return the
 // HIGHEST register number in the pair, or BAD if no pairs.
 OptoReg::Name RegMask::find_first_pair() const {
-  VerifyPairs();
+  verify_pairs();
   for( int i = 0; i < RM_SIZE; i++ ) {
     if( _A[i] ) {               // Found some bits
       int bit = _A[i] & -_A[i]; // Extract low bit
@@ -146,30 +169,30 @@
 
 //------------------------------ClearToPairs-----------------------------------
 // Clear out partial bits; leave only bit pairs
-void RegMask::ClearToPairs() {
+void RegMask::clear_to_pairs() {
   for( int i = 0; i < RM_SIZE; i++ ) {
     int bits = _A[i];
     bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
     bits |= (bits>>1);          // Smear 1 hi-bit into a pair
     _A[i] = bits;
   }
-  VerifyPairs();
+  verify_pairs();
 }
 
 //------------------------------SmearToPairs-----------------------------------
 // Smear out partial bits; leave only bit pairs
-void RegMask::SmearToPairs() {
+void RegMask::smear_to_pairs() {
   for( int i = 0; i < RM_SIZE; i++ ) {
     int bits = _A[i];
     bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
     bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
     _A[i] = bits;
   }
-  VerifyPairs();
+  verify_pairs();
 }
 
 //------------------------------is_aligned_pairs-------------------------------
-bool RegMask::is_aligned_Pairs() const {
+bool RegMask::is_aligned_pairs() const {
   // Assert that the register mask contains only bit pairs.
   for( int i = 0; i < RM_SIZE; i++ ) {
     int bits = _A[i];
@@ -204,7 +227,7 @@
 
 //------------------------------is_bound2--------------------------------------
 // Return TRUE if the mask contains an adjacent pair of bits and no other bits.
-int RegMask::is_bound2() const {
+int RegMask::is_bound_pair() const {
   if( is_AllStack() ) return false;
 
   int bit = -1;                 // Set to hold the one bit allowed
@@ -226,6 +249,132 @@
   return true;
 }
 
+static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
+//------------------------------find_first_set---------------------------------
+// Find the lowest-numbered register set in the mask.  Return the
+// HIGHEST register number in the set, or BAD if no sets.
+// Works also for size 1.
+OptoReg::Name RegMask::find_first_set(int size) const {
+  verify_sets(size);
+  for (int i = 0; i < RM_SIZE; i++) {
+    if (_A[i]) {                // Found some bits
+      int bit = _A[i] & -_A[i]; // Extract low bit
+      // Convert to bit number, return hi bit in pair
+      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
+    }
+  }
+  return OptoReg::Bad;
+}
+
+//------------------------------clear_to_sets----------------------------------
+// Clear out partial bits; leave only aligned adjacent bit pairs
+void RegMask::clear_to_sets(int size) {
+  if (size == 1) return;
+  assert(2 <= size && size <= 8, "update low bits table");
+  assert(is_power_of_2(size), "sanity");
+  int low_bits_mask = low_bits[size>>2];
+  for (int i = 0; i < RM_SIZE; i++) {
+    int bits = _A[i];
+    int sets = (bits & low_bits_mask);
+    for (int j = 1; j < size; j++) {
+      sets = (bits & (sets<<1)); // filter bits which produce whole sets
+    }
+    sets |= (sets>>1);           // Smear 1 hi-bit into a set
+    if (size > 2) {
+      sets |= (sets>>2);         // Smear 2 hi-bits into a set
+      if (size > 4) {
+        sets |= (sets>>4);       // Smear 4 hi-bits into a set
+      }
+    }
+    _A[i] = sets;
+  }
+  verify_sets(size);
+}
+
+//------------------------------smear_to_sets----------------------------------
+// Smear out partial bits to aligned adjacent bit sets
+void RegMask::smear_to_sets(int size) {
+  if (size == 1) return;
+  assert(2 <= size && size <= 8, "update low bits table");
+  assert(is_power_of_2(size), "sanity");
+  int low_bits_mask = low_bits[size>>2];
+  for (int i = 0; i < RM_SIZE; i++) {
+    int bits = _A[i];
+    int sets = 0;
+    for (int j = 0; j < size; j++) {
+      sets |= (bits & low_bits_mask);  // collect partial bits
+      bits  = bits>>1;
+    }
+    sets |= (sets<<1);           // Smear 1 lo-bit  into a set
+    if (size > 2) {
+      sets |= (sets<<2);         // Smear 2 lo-bits into a set
+      if (size > 4) {
+        sets |= (sets<<4);       // Smear 4 lo-bits into a set
+      }
+    }
+    _A[i] = sets;
+  }
+  verify_sets(size);
+}
+
+//------------------------------is_aligned_set--------------------------------
+bool RegMask::is_aligned_sets(int size) const {
+  if (size == 1) return true;
+  assert(2 <= size && size <= 8, "update low bits table");
+  assert(is_power_of_2(size), "sanity");
+  int low_bits_mask = low_bits[size>>2];
+  // Assert that the register mask contains only bit sets.
+  for (int i = 0; i < RM_SIZE; i++) {
+    int bits = _A[i];
+    while (bits) {              // Check bits for pairing
+      int bit = bits & -bits;   // Extract low bit
+      // Low bit is not odd means its mis-aligned.
+      if ((bit & low_bits_mask) == 0) return false;
+      // Do extra work since (bit << size) may overflow.
+      int hi_bit = bit << (size-1); // high bit
+      int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+      // Check for aligned adjacent bits in this set
+      if ((bits & set) != set) return false;
+      bits -= set;  // Remove this set
+    }
+  }
+  return true;
+}
+
+//------------------------------is_bound_set-----------------------------------
+// Return TRUE if the mask contains one adjacent set of bits and no other bits.
+// Works also for size 1.
+int RegMask::is_bound_set(int size) const {
+  if( is_AllStack() ) return false;
+  assert(1 <= size && size <= 8, "update low bits table");
+  int bit = -1;                 // Set to hold the one bit allowed
+  for (int i = 0; i < RM_SIZE; i++) {
+    if (_A[i] ) {               // Found some bits
+      if (bit != -1)
+       return false;            // Already had bits, so fail
+      bit = _A[i] & -_A[i];     // Extract 1 bit from mask
+      int hi_bit = bit << (size-1); // high bit
+      if (hi_bit != 0) {        // Bit set stays in same word?
+        int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+        if (set != _A[i])
+          return false;         // Require adjacent bit set and no more bits
+      } else {                  // Else its a split-set case
+        if (((-1) & ~(bit-1)) != _A[i])
+          return false;         // Found many bits, so fail
+        i++;                    // Skip iteration forward and check high part
+        assert(size <= 8, "update next code");
+        // The lower 24 bits should be 0 since it is split case and size <= 8.
+        int set = bit>>24;
+        set = set & -set; // Remove sign extension.
+        set = (((set << size) - 1) >> 8);
+        if (_A[i] != set) return false; // Require 1 lo bit in next word
+      }
+    }
+  }
+  // True for both the empty mask and for a bit set
+  return true;
+}
+
 //------------------------------is_UP------------------------------------------
 // UP means register only, Register plus stack, or stack only is DOWN
 bool RegMask::is_UP() const {
--- a/src/share/vm/opto/regmask.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/regmask.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -113,7 +113,11 @@
   // the controlling alignment constraint.  Note that this alignment
   // requirement is internal to the allocator, and independent of any
   // particular platform.
-  enum { SlotsPerLong = 2 };
+  enum { SlotsPerLong = 2,
+         SlotsPerVecS = 1,
+         SlotsPerVecD = 2,
+         SlotsPerVecX = 4,
+         SlotsPerVecY = 8 };
 
   // A constructor only used by the ADLC output.  All mask fields are filled
   // in directly.  Calls to this look something like RM(1,2,3,4);
@@ -193,20 +197,53 @@
   OptoReg::Name find_first_pair() const;
 
   // Clear out partial bits; leave only aligned adjacent bit pairs.
-  void ClearToPairs();
+  void clear_to_pairs();
   // Smear out partial bits; leave only aligned adjacent bit pairs.
-  void SmearToPairs();
+  void smear_to_pairs();
   // Verify that the mask contains only aligned adjacent bit pairs
-  void VerifyPairs() const { assert( is_aligned_Pairs(), "mask is not aligned, adjacent pairs" ); }
+  void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
   // Test that the mask contains only aligned adjacent bit pairs
-  bool is_aligned_Pairs() const;
+  bool is_aligned_pairs() const;
 
   // mask is a pair of misaligned registers
-  bool is_misaligned_Pair() const { return Size()==2 && !is_aligned_Pairs();}
+  bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
   // Test for single register
   int is_bound1() const;
   // Test for a single adjacent pair
-  int is_bound2() const;
+  int is_bound_pair() const;
+  // Test for a single adjacent set of ideal register's size.
+  int is_bound(uint ireg) const {
+    if (is_vector(ireg)) {
+      if (is_bound_set(num_registers(ireg)))
+        return true;
+    } else if (is_bound1() || is_bound_pair()) {
+      return true;
+    }
+    return false;
+  }
+
+  // Find the lowest-numbered register set in the mask.  Return the
+  // HIGHEST register number in the set, or BAD if no sets.
+  // Assert that the mask contains only bit sets.
+  OptoReg::Name find_first_set(int size) const;
+
+  // Clear out partial bits; leave only aligned adjacent bit sets of size.
+  void clear_to_sets(int size);
+  // Smear out partial bits to aligned adjacent bit sets.
+  void smear_to_sets(int size);
+  // Verify that the mask contains only aligned adjacent bit sets
+  void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
+  // Test that the mask contains only aligned adjacent bit sets
+  bool is_aligned_sets(int size) const;
+
+  // mask is a set of misaligned registers
+  bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);}
+
+  // Test for a single adjacent set
+  int is_bound_set(int size) const;
+
+  static bool is_vector(uint ireg);
+  static int num_registers(uint ireg);
 
   // Fast overlap test.  Non-zero if any registers in common.
   int overlap( const RegMask &rm ) const {
@@ -280,9 +317,15 @@
 
   static bool can_represent(OptoReg::Name reg) {
     // NOTE: -1 in computation reflects the usage of the last
-    //       bit of the regmask as an infinite stack flag.
+    //       bit of the regmask as an infinite stack flag and
+    //       -7 is to keep mask aligned for largest value (VecY).
     return (int)reg < (int)(CHUNK_SIZE-1);
   }
+  static bool can_represent_arg(OptoReg::Name reg) {
+    // NOTE: -SlotsPerVecY in computation reflects the need
+    //       to keep mask aligned for largest value (VecY).
+    return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecY);
+  }
 };
 
 // Do not use this constant directly in client code!
--- a/src/share/vm/opto/runtime.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/runtime.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -709,9 +709,9 @@
   return TypeFunc::make(domain, range);
 }
 
-//-------------- currentTimeMillis
+//-------------- currentTimeMillis, currentTimeNanos, etc
 
-const TypeFunc* OptoRuntime::current_time_millis_Type() {
+const TypeFunc* OptoRuntime::void_long_Type() {
   // create input type (domain)
   const Type **fields = TypeTuple::fields(0);
   const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+0, fields);
@@ -880,46 +880,6 @@
   }
 JRT_END
 
-//-----------------------------------------------------------------------------
-// implicit exception support.
-
-static void report_null_exception_in_code_cache(address exception_pc) {
-  ResourceMark rm;
-  CodeBlob* n = CodeCache::find_blob(exception_pc);
-  if (n != NULL) {
-    tty->print_cr("#");
-    tty->print_cr("# HotSpot Runtime Error, null exception in generated code");
-    tty->print_cr("#");
-    tty->print_cr("# pc where exception happened = " INTPTR_FORMAT, exception_pc);
-
-    if (n->is_nmethod()) {
-      methodOop method = ((nmethod*)n)->method();
-      tty->print_cr("# Method where it happened %s.%s ", Klass::cast(method->method_holder())->name()->as_C_string(), method->name()->as_C_string());
-      tty->print_cr("#");
-      if (ShowMessageBoxOnError && UpdateHotSpotCompilerFileOnError &&
-          CompilerOracle::has_command_file()) {
-        const char* title    = "HotSpot Runtime Error";
-        const char* question = "Do you want to exclude compilation of this method in future runs?";
-        if (os::message_box(title, question)) {
-          CompilerOracle::append_comment_to_file("");
-          CompilerOracle::append_comment_to_file("Null exception in compiled code resulted in the following exclude");
-          CompilerOracle::append_comment_to_file("");
-          CompilerOracle::append_exclude_to_file(method);
-          tty->print_cr("#");
-          tty->print_cr("# %s has been updated to exclude the specified method", CompileCommandFile);
-          tty->print_cr("#");
-        }
-      }
-      fatal("Implicit null exception happened in compiled method");
-    } else {
-      n->print();
-      fatal("Implicit null exception happened in generated stub");
-    }
-  }
-  fatal("Implicit null exception at wrong place");
-}
-
-
 //-------------------------------------------------------------------------------------
 // register policy
 
--- a/src/share/vm/opto/runtime.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/runtime.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,7 +55,7 @@
 // code in various ways.  Currently they are used by the lock coarsening code
 //
 
-class NamedCounter : public CHeapObj {
+class NamedCounter : public CHeapObj<mtCompiler> {
 public:
     enum CounterTag {
     NoTag,
@@ -268,7 +268,7 @@
   static const TypeFunc* Math_DD_D_Type(); // mod,pow & friends
   static const TypeFunc* modf_Type();
   static const TypeFunc* l2f_Type();
-  static const TypeFunc* current_time_millis_Type();
+  static const TypeFunc* void_long_Type();
 
   static const TypeFunc* flush_windows_Type();
 
--- a/src/share/vm/opto/split_if.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/split_if.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -137,9 +137,7 @@
             Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
             Node *x = bol->clone();
             register_new_node(x, iff_ctrl);
-            _igvn.hash_delete(iff);
-            iff->set_req(1, x);
-            _igvn._worklist.push(iff);
+            _igvn.replace_input_of(iff, 1, x);
           }
           _igvn.remove_dead_node( bol );
           --i;
@@ -151,9 +149,7 @@
         assert( bol->in(1) == n, "" );
         Node *x = n->clone();
         register_new_node(x, get_ctrl(bol));
-        _igvn.hash_delete(bol);
-        bol->set_req(1, x);
-        _igvn._worklist.push(bol);
+        _igvn.replace_input_of(bol, 1, x);
       }
       _igvn.remove_dead_node( n );
 
@@ -387,9 +383,7 @@
     if( use->in(i) == def )
       break;
   assert( i < use->req(), "def should be among use's inputs" );
-  _igvn.hash_delete(use);
-  use->set_req(i, new_def);
-  _igvn._worklist.push(use);
+  _igvn.replace_input_of(use, i, new_def);
 }
 
 //------------------------------do_split_if------------------------------------
--- a/src/share/vm/opto/stringopts.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/stringopts.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -533,7 +533,17 @@
         if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) {
           CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
           if (csj->method() != NULL &&
-              csj->method()->intrinsic_id() == vmIntrinsics::_Integer_toString) {
+              csj->method()->intrinsic_id() == vmIntrinsics::_Integer_toString &&
+              arg->outcnt() == 1) {
+            // _control is the list of StringBuilder calls nodes which
+            // will be replaced by new String code after this optimization.
+            // Integer::toString() call is not part of StringBuilder calls
+            // chain. It could be eliminated only if its result is used
+            // only by this SB calls chain.
+            // Another limitation: it should be used only once because
+            // it is unknown that it is used only by this SB calls chain
+            // until all related SB calls nodes are collected.
+            assert(arg->unique_out() == cnode, "sanity");
             sc->add_control(csj);
             sc->push_int(csj->in(TypeFunc::Parms));
             continue;
@@ -929,8 +939,8 @@
 }
 
 Node* PhaseStringOpts::fetch_static_field(GraphKit& kit, ciField* field) {
-  const TypeKlassPtr* klass_type = TypeKlassPtr::make(field->holder());
-  Node* klass_node = __ makecon(klass_type);
+  const TypeInstPtr* mirror_type = TypeInstPtr::make(field->holder()->java_mirror());
+  Node* klass_node = __ makecon(mirror_type);
   BasicType bt = field->layout_type();
   ciType* field_klass = field->type();
 
@@ -945,6 +955,7 @@
       // and may yield a vacuous result if the field is of interface type.
       type = TypeOopPtr::make_from_constant(con, true)->isa_oopptr();
       assert(type != NULL, "field singleton type must be consistent");
+      return __ makecon(type);
     } else {
       type = TypeOopPtr::make_from_klass(field_klass->as_klass());
     }
@@ -954,7 +965,7 @@
 
   return kit.make_load(NULL, kit.basic_plus_adr(klass_node, field->offset_in_bytes()),
                        type, T_OBJECT,
-                       C->get_alias_index(klass_type->add_offset(field->offset_in_bytes())));
+                       C->get_alias_index(mirror_type->add_offset(field->offset_in_bytes())));
 }
 
 Node* PhaseStringOpts::int_stringSize(GraphKit& kit, Node* arg) {
--- a/src/share/vm/opto/subnode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/subnode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -554,9 +554,7 @@
       return TypeInt::CC_GE;
     } else if (hi0 <= lo1) {
       // Check for special case in Hashtable::get.  (See below.)
-      if ((jint)lo0 >= 0 && (jint)lo1 >= 0 &&
-          in(1)->Opcode() == Op_ModI &&
-          in(1)->in(2) == in(2) )
+      if ((jint)lo0 >= 0 && (jint)lo1 >= 0 && is_index_range_check())
         return TypeInt::CC_LT;
       return TypeInt::CC_LE;
     }
@@ -567,13 +565,17 @@
   // to be positive.
   // (This is a gross hack, since the sub method never
   // looks at the structure of the node in any other case.)
-  if ((jint)lo0 >= 0 && (jint)lo1 >= 0 &&
-      in(1)->Opcode() == Op_ModI &&
-      in(1)->in(2)->uncast() == in(2)->uncast())
+  if ((jint)lo0 >= 0 && (jint)lo1 >= 0 && is_index_range_check())
     return TypeInt::CC_LT;
   return TypeInt::CC;                   // else use worst case results
 }
 
+bool CmpUNode::is_index_range_check() const {
+  // Check for the "(X ModI Y) CmpU Y" shape
+  return (in(1)->Opcode() == Op_ModI &&
+          in(1)->in(2)->eqv_uncast(in(2)));
+}
+
 //------------------------------Idealize---------------------------------------
 Node *CmpINode::Ideal( PhaseGVN *phase, bool can_reshape ) {
   if (phase->type(in(2))->higher_equal(TypeInt::ZERO)) {
@@ -702,12 +704,84 @@
     return TypeInt::CC;
 }
 
+static inline Node* isa_java_mirror_load(PhaseGVN* phase, Node* n) {
+  // Return the klass node for
+  //   LoadP(AddP(foo:Klass, #java_mirror))
+  //   or NULL if not matching.
+  if (n->Opcode() != Op_LoadP) return NULL;
+
+  const TypeInstPtr* tp = phase->type(n)->isa_instptr();
+  if (!tp || tp->klass() != phase->C->env()->Class_klass()) return NULL;
+
+  Node* adr = n->in(MemNode::Address);
+  intptr_t off = 0;
+  Node* k = AddPNode::Ideal_base_and_offset(adr, phase, off);
+  if (k == NULL)  return NULL;
+  const TypeKlassPtr* tkp = phase->type(k)->isa_klassptr();
+  if (!tkp || off != in_bytes(Klass::java_mirror_offset())) return NULL;
+
+  // We've found the klass node of a Java mirror load.
+  return k;
+}
+
+static inline Node* isa_const_java_mirror(PhaseGVN* phase, Node* n) {
+  // for ConP(Foo.class) return ConP(Foo.klass)
+  // otherwise return NULL
+  if (!n->is_Con()) return NULL;
+
+  const TypeInstPtr* tp = phase->type(n)->isa_instptr();
+  if (!tp) return NULL;
+
+  ciType* mirror_type = tp->java_mirror_type();
+  // TypeInstPtr::java_mirror_type() returns non-NULL for compile-
+  // time Class constants only.
+  if (!mirror_type) return NULL;
+
+  // x.getClass() == int.class can never be true (for all primitive types)
+  // Return a ConP(NULL) node for this case.
+  if (mirror_type->is_classless()) {
+    return phase->makecon(TypePtr::NULL_PTR);
+  }
+
+  // return the ConP(Foo.klass)
+  assert(mirror_type->is_klass(), "mirror_type should represent a klassOop");
+  return phase->makecon(TypeKlassPtr::make(mirror_type->as_klass()));
+}
+
 //------------------------------Ideal------------------------------------------
-// Check for the case of comparing an unknown klass loaded from the primary
+// Normalize comparisons between Java mirror loads to compare the klass instead.
+//
+// Also check for the case of comparing an unknown klass loaded from the primary
 // super-type array vs a known klass with no subtypes.  This amounts to
 // checking to see an unknown klass subtypes a known klass with no subtypes;
 // this only happens on an exact match.  We can shorten this test by 1 load.
 Node *CmpPNode::Ideal( PhaseGVN *phase, bool can_reshape ) {
+  // Normalize comparisons between Java mirrors into comparisons of the low-
+  // level klass, where a dependent load could be shortened.
+  //
+  // The new pattern has a nice effect of matching the same pattern used in the
+  // fast path of instanceof/checkcast/Class.isInstance(), which allows
+  // redundant exact type check be optimized away by GVN.
+  // For example, in
+  //   if (x.getClass() == Foo.class) {
+  //     Foo foo = (Foo) x;
+  //     // ... use a ...
+  //   }
+  // a CmpPNode could be shared between if_acmpne and checkcast
+  {
+    Node* k1 = isa_java_mirror_load(phase, in(1));
+    Node* k2 = isa_java_mirror_load(phase, in(2));
+    Node* conk2 = isa_const_java_mirror(phase, in(2));
+
+    if (k1 && (k2 || conk2)) {
+      Node* lhs = k1;
+      Node* rhs = (k2 != NULL) ? k2 : conk2;
+      this->set_req(1, lhs);
+      this->set_req(2, rhs);
+      return this;
+    }
+  }
+
   // Constant pointer on right?
   const TypeKlassPtr* t2 = phase->type(in(2))->isa_klassptr();
   if (t2 == NULL || !t2->klass_is_exact())
@@ -1314,7 +1388,5 @@
   if( t2->base() != Type::DoubleCon ) return Type::DOUBLE;
   double d1 = t1->getd();
   double d2 = t2->getd();
-  if( d1 < 0.0 ) return Type::DOUBLE;
-  if( d2 < 0.0 ) return Type::DOUBLE;
   return TypeD::make( StubRoutines::intrinsic_pow( d1, d2 ) );
 }
--- a/src/share/vm/opto/subnode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/subnode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -158,6 +158,7 @@
   CmpUNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
   virtual int Opcode() const;
   virtual const Type *sub( const Type *, const Type * ) const;
+  bool is_index_range_check() const;
 };
 
 //------------------------------CmpPNode---------------------------------------
--- a/src/share/vm/opto/superword.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/superword.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,10 @@
 
 //------------------------------transform_loop---------------------------
 void SuperWord::transform_loop(IdealLoopTree* lpt) {
+  assert(UseSuperWord, "should be");
+  // Do vectors exist on this architecture?
+  if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+
   assert(lpt->_head->is_CountedLoop(), "must be");
   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
 
@@ -89,15 +93,12 @@
   Node *pre_opaq1 = pre_end->limit();
   if (pre_opaq1->Opcode() != Op_Opaque1) return;
 
-  // Do vectors exist on this architecture?
-  if (vector_width_in_bytes() == 0) return;
-
   init(); // initialize data structures
 
   set_lpt(lpt);
   set_lp(cl);
 
- // For now, define one block which is the entire loop body
+  // For now, define one block which is the entire loop body
   set_bb(cl);
 
   assert(_packset.length() == 0, "packset must be empty");
@@ -177,7 +178,7 @@
   Node_List memops;
   for (int i = 0; i < _block.length(); i++) {
     Node* n = _block.at(i);
-    if (n->is_Mem() && in_bb(n) &&
+    if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
         is_java_primitive(n->as_Mem()->memory_type())) {
       int align = memory_alignment(n->as_Mem(), 0);
       if (align != bottom_align) {
@@ -185,54 +186,141 @@
       }
     }
   }
-  if (memops.size() == 0) return;
 
-  // Find a memory reference to align to.  The pre-loop trip count
-  // is modified to align this reference to a vector-aligned address
-  find_align_to_ref(memops);
-  if (align_to_ref() == NULL) return;
+  Node_List align_to_refs;
+  int best_iv_adjustment = 0;
+  MemNode* best_align_to_mem_ref = NULL;
 
-  SWPointer align_to_ref_p(align_to_ref(), this);
-  int offset = align_to_ref_p.offset_in_bytes();
-  int scale  = align_to_ref_p.scale_in_bytes();
-  int vw              = vector_width_in_bytes();
-  int stride_sign     = (scale * iv_stride()) > 0 ? 1 : -1;
-  int iv_adjustment   = (stride_sign * vw - (offset % vw)) % vw;
-
-#ifndef PRODUCT
-  if (TraceSuperWord)
-    tty->print_cr("\noffset = %d iv_adjustment = %d  elt_align = %d scale = %d iv_stride = %d",
-                  offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride());
-#endif
+  while (memops.size() != 0) {
+    // Find a memory reference to align to.
+    MemNode* mem_ref = find_align_to_ref(memops);
+    if (mem_ref == NULL) break;
+    align_to_refs.push(mem_ref);
+    int iv_adjustment = get_iv_adjustment(mem_ref);
 
-  // Set alignment relative to "align_to_ref"
-  for (int i = memops.size() - 1; i >= 0; i--) {
-    MemNode* s = memops.at(i)->as_Mem();
-    SWPointer p2(s, this);
-    if (p2.comparable(align_to_ref_p)) {
-      int align = memory_alignment(s, iv_adjustment);
-      set_alignment(s, align);
-    } else {
-      memops.remove(i);
+    if (best_align_to_mem_ref == NULL) {
+      // Set memory reference which is the best from all memory operations
+      // to be used for alignment. The pre-loop trip count is modified to align
+      // this reference to a vector-aligned address.
+      best_align_to_mem_ref = mem_ref;
+      best_iv_adjustment = iv_adjustment;
     }
-  }
 
-  // Create initial pack pairs of memory operations
-  for (uint i = 0; i < memops.size(); i++) {
-    Node* s1 = memops.at(i);
-    for (uint j = 0; j < memops.size(); j++) {
-      Node* s2 = memops.at(j);
-      if (s1 != s2 && are_adjacent_refs(s1, s2)) {
-        int align = alignment(s1);
-        if (stmts_can_pack(s1, s2, align)) {
-          Node_List* pair = new Node_List();
-          pair->push(s1);
-          pair->push(s2);
-          _packset.append(pair);
+    SWPointer align_to_ref_p(mem_ref, this);
+    // Set alignment relative to "align_to_ref" for all related memory operations.
+    for (int i = memops.size() - 1; i >= 0; i--) {
+      MemNode* s = memops.at(i)->as_Mem();
+      if (isomorphic(s, mem_ref)) {
+        SWPointer p2(s, this);
+        if (p2.comparable(align_to_ref_p)) {
+          int align = memory_alignment(s, iv_adjustment);
+          set_alignment(s, align);
         }
       }
     }
-  }
+
+    // Create initial pack pairs of memory operations for which
+    // alignment is set and vectors will be aligned.
+    bool create_pack = true;
+    if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
+      if (!Matcher::misaligned_vectors_ok()) {
+        int vw = vector_width(mem_ref);
+        int vw_best = vector_width(best_align_to_mem_ref);
+        if (vw > vw_best) {
+          // Do not vectorize a memory access with more elements per vector
+          // if unaligned memory access is not allowed because number of
+          // iterations in pre-loop will be not enough to align it.
+          create_pack = false;
+        }
+      }
+    } else {
+      if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+        // Can't allow vectorization of unaligned memory accesses with the
+        // same type since it could be overlapped accesses to the same array.
+        create_pack = false;
+      } else {
+        // Allow independent (different type) unaligned memory operations
+        // if HW supports them.
+        if (!Matcher::misaligned_vectors_ok()) {
+          create_pack = false;
+        } else {
+          // Check if packs of the same memory type but
+          // with a different alignment were created before.
+          for (uint i = 0; i < align_to_refs.size(); i++) {
+            MemNode* mr = align_to_refs.at(i)->as_Mem();
+            if (same_velt_type(mr, mem_ref) &&
+                memory_alignment(mr, iv_adjustment) != 0)
+              create_pack = false;
+          }
+        }
+      }
+    }
+    if (create_pack) {
+      for (uint i = 0; i < memops.size(); i++) {
+        Node* s1 = memops.at(i);
+        int align = alignment(s1);
+        if (align == top_align) continue;
+        for (uint j = 0; j < memops.size(); j++) {
+          Node* s2 = memops.at(j);
+          if (alignment(s2) == top_align) continue;
+          if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+            if (stmts_can_pack(s1, s2, align)) {
+              Node_List* pair = new Node_List();
+              pair->push(s1);
+              pair->push(s2);
+              _packset.append(pair);
+            }
+          }
+        }
+      }
+    } else { // Don't create unaligned pack
+      // First, remove remaining memory ops of the same type from the list.
+      for (int i = memops.size() - 1; i >= 0; i--) {
+        MemNode* s = memops.at(i)->as_Mem();
+        if (same_velt_type(s, mem_ref)) {
+          memops.remove(i);
+        }
+      }
+
+      // Second, remove already constructed packs of the same type.
+      for (int i = _packset.length() - 1; i >= 0; i--) {
+        Node_List* p = _packset.at(i);
+        MemNode* s = p->at(0)->as_Mem();
+        if (same_velt_type(s, mem_ref)) {
+          remove_pack_at(i);
+        }
+      }
+
+      // If needed find the best memory reference for loop alignment again.
+      if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+        // Put memory ops from remaining packs back on memops list for
+        // the best alignment search.
+        uint orig_msize = memops.size();
+        for (int i = 0; i < _packset.length(); i++) {
+          Node_List* p = _packset.at(i);
+          MemNode* s = p->at(0)->as_Mem();
+          assert(!same_velt_type(s, mem_ref), "sanity");
+          memops.push(s);
+        }
+        MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
+        if (best_align_to_mem_ref == NULL) break;
+        best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
+        // Restore list.
+        while (memops.size() > orig_msize)
+          (void)memops.pop();
+      }
+    } // unaligned memory accesses
+
+    // Remove used mem nodes.
+    for (int i = memops.size() - 1; i >= 0; i--) {
+      MemNode* m = memops.at(i)->as_Mem();
+      if (alignment(m) != top_align) {
+        memops.remove(i);
+      }
+    }
+
+  } // while (memops.size() != 0
+  set_align_to_ref(best_align_to_mem_ref);
 
 #ifndef PRODUCT
   if (TraceSuperWord) {
@@ -246,7 +334,7 @@
 // Find a memory reference to align the loop induction variable to.
 // Looks first at stores then at loads, looking for a memory reference
 // with the largest number of references similar to it.
-void SuperWord::find_align_to_ref(Node_List &memops) {
+MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
   GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
 
   // Count number of comparable memory ops
@@ -270,20 +358,28 @@
     }
   }
 
-  // Find Store (or Load) with the greatest number of "comparable" references
+  // Find Store (or Load) with the greatest number of "comparable" references,
+  // biggest vector size, smallest data size and smallest iv offset.
   int max_ct        = 0;
+  int max_vw        = 0;
   int max_idx       = -1;
   int min_size      = max_jint;
   int min_iv_offset = max_jint;
   for (uint j = 0; j < memops.size(); j++) {
     MemNode* s = memops.at(j)->as_Mem();
     if (s->is_Store()) {
+      int vw = vector_width_in_bytes(s);
+      assert(vw > 1, "sanity");
       SWPointer p(s, this);
-      if (cmp_ct.at(j) > max_ct ||
-          cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
-                                     data_size(s) == min_size &&
-                                        p.offset_in_bytes() < min_iv_offset)) {
+      if (cmp_ct.at(j) >  max_ct ||
+          cmp_ct.at(j) == max_ct &&
+            (vw >  max_vw ||
+             vw == max_vw &&
+              (data_size(s) <  min_size ||
+               data_size(s) == min_size &&
+                 (p.offset_in_bytes() < min_iv_offset)))) {
         max_ct = cmp_ct.at(j);
+        max_vw = vw;
         max_idx = j;
         min_size = data_size(s);
         min_iv_offset = p.offset_in_bytes();
@@ -295,12 +391,18 @@
     for (uint j = 0; j < memops.size(); j++) {
       MemNode* s = memops.at(j)->as_Mem();
       if (s->is_Load()) {
+        int vw = vector_width_in_bytes(s);
+        assert(vw > 1, "sanity");
         SWPointer p(s, this);
-        if (cmp_ct.at(j) > max_ct ||
-            cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
-                                       data_size(s) == min_size &&
-                                          p.offset_in_bytes() < min_iv_offset)) {
+        if (cmp_ct.at(j) >  max_ct ||
+            cmp_ct.at(j) == max_ct &&
+              (vw >  max_vw ||
+               vw == max_vw &&
+                (data_size(s) <  min_size ||
+                 data_size(s) == min_size &&
+                   (p.offset_in_bytes() < min_iv_offset)))) {
           max_ct = cmp_ct.at(j);
+          max_vw = vw;
           max_idx = j;
           min_size = data_size(s);
           min_iv_offset = p.offset_in_bytes();
@@ -309,10 +411,7 @@
     }
   }
 
-  if (max_ct > 0)
-    set_align_to_ref(memops.at(max_idx)->as_Mem());
-
-#ifndef PRODUCT
+#ifdef ASSERT
   if (TraceSuperWord && Verbose) {
     tty->print_cr("\nVector memops after find_align_to_refs");
     for (uint i = 0; i < memops.size(); i++) {
@@ -321,6 +420,17 @@
     }
   }
 #endif
+
+  if (max_ct > 0) {
+#ifdef ASSERT
+    if (TraceSuperWord) {
+      tty->print("\nVector align to node: ");
+      memops.at(max_idx)->as_Mem()->dump();
+    }
+#endif
+    return memops.at(max_idx)->as_Mem();
+  }
+  return NULL;
 }
 
 //------------------------------ref_is_alignable---------------------------
@@ -341,7 +451,8 @@
 
   // If initial offset from start of object is computable,
   // compute alignment within the vector.
-  int vw = vector_width_in_bytes();
+  int vw = vector_width_in_bytes(p.mem());
+  assert(vw > 1, "sanity");
   if (vw % span == 0) {
     Node* init_nd = pre_end->init_trip();
     if (init_nd->is_Con() && p.invar() == NULL) {
@@ -361,6 +472,25 @@
   return false;
 }
 
+//---------------------------get_iv_adjustment---------------------------
+// Calculate loop's iv adjustment for this memory ops.
+int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
+  SWPointer align_to_ref_p(mem_ref, this);
+  int offset = align_to_ref_p.offset_in_bytes();
+  int scale  = align_to_ref_p.scale_in_bytes();
+  int vw       = vector_width_in_bytes(mem_ref);
+  assert(vw > 1, "sanity");
+  int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;
+  int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+
+#ifndef PRODUCT
+  if (TraceSuperWord)
+    tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
+                  offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw);
+#endif
+  return iv_adjustment;
+}
+
 //---------------------------dependence_graph---------------------------
 // Construct dependency graph.
 // Add dependence edges to load/store nodes for memory dependence
@@ -488,9 +618,13 @@
 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
 
   // Do not use superword for non-primitives
-  if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) ||
-     (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type())))
+  BasicType bt1 = velt_basic_type(s1);
+  BasicType bt2 = velt_basic_type(s2);
+  if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
     return false;
+  if (Matcher::max_vector_size(bt1) < 2) {
+    return false; // No vectors for this type
+  }
 
   if (isomorphic(s1, s2)) {
     if (independent(s1, s2)) {
@@ -552,7 +686,7 @@
   if (s1->Opcode() != s2->Opcode()) return false;
   if (s1->req() != s2->req()) return false;
   if (s1->in(0) != s2->in(0)) return false;
-  if (velt_type(s1) != velt_type(s2)) return false;
+  if (!same_velt_type(s1, s2)) return false;
   return true;
 }
 
@@ -595,14 +729,16 @@
 //------------------------------set_alignment---------------------------
 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
   set_alignment(s1, align);
-  set_alignment(s2, align + data_size(s1));
+  if (align == top_align || align == bottom_align) {
+    set_alignment(s2, align);
+  } else {
+    set_alignment(s2, align + data_size(s1));
+  }
 }
 
 //------------------------------data_size---------------------------
 int SuperWord::data_size(Node* s) {
-  const Type* t = velt_type(s);
-  BasicType  bt = t->array_element_basic_type();
-  int bsize = type2aelembytes(bt);
+  int bsize = type2aelembytes(velt_basic_type(s));
   assert(bsize != 0, "valid size");
   return bsize;
 }
@@ -631,9 +767,9 @@
 //------------------------------follow_use_defs---------------------------
 // Extend the packset by visiting operand definitions of nodes in pack p
 bool SuperWord::follow_use_defs(Node_List* p) {
+  assert(p->size() == 2, "just checking");
   Node* s1 = p->at(0);
   Node* s2 = p->at(1);
-  assert(p->size() == 2, "just checking");
   assert(s1->req() == s2->req(), "just checking");
   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
 
@@ -718,7 +854,12 @@
     for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
     for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
     if (i1 != i2) {
-      return false;
+      if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
+        // Further analysis relies on operands position matching.
+        u2->swap_edges(i1, i2);
+      } else {
+        return false;
+      }
     }
   } while (i1 < ct);
   return true;
@@ -727,7 +868,7 @@
 //------------------------------est_savings---------------------------
 // Estimate the savings from executing s1 and s2 as a pack
 int SuperWord::est_savings(Node* s1, Node* s2) {
-  int save = 2 - 1; // 2 operations per instruction in packed form
+  int save_in = 2 - 1; // 2 operations per instruction in packed form
 
   // inputs
   for (uint i = 1; i < s1->req(); i++) {
@@ -735,17 +876,18 @@
     Node* x2 = s2->in(i);
     if (x1 != x2) {
       if (are_adjacent_refs(x1, x2)) {
-        save += adjacent_profit(x1, x2);
+        save_in += adjacent_profit(x1, x2);
       } else if (!in_packset(x1, x2)) {
-        save -= pack_cost(2);
+        save_in -= pack_cost(2);
       } else {
-        save += unpack_cost(2);
+        save_in += unpack_cost(2);
       }
     }
   }
 
   // uses of result
   uint ct = 0;
+  int save_use = 0;
   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
     Node* s1_use = s1->fast_out(i);
     for (int j = 0; j < _packset.length(); j++) {
@@ -756,7 +898,7 @@
           if (p->at(p->size()-1) == s2_use) {
             ct++;
             if (are_adjacent_refs(s1_use, s2_use)) {
-              save += adjacent_profit(s1_use, s2_use);
+              save_use += adjacent_profit(s1_use, s2_use);
             }
           }
         }
@@ -764,10 +906,10 @@
     }
   }
 
-  if (ct < s1->outcnt()) save += unpack_cost(1);
-  if (ct < s2->outcnt()) save += unpack_cost(1);
+  if (ct < s1->outcnt()) save_use += unpack_cost(1);
+  if (ct < s2->outcnt()) save_use += unpack_cost(1);
 
-  return save;
+  return MAX2(save_in, save_use);
 }
 
 //------------------------------costs---------------------------
@@ -778,8 +920,9 @@
 //------------------------------combine_packs---------------------------
 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
 void SuperWord::combine_packs() {
-  bool changed;
-  do {
+  bool changed = true;
+  // Combine packs regardless max vector size.
+  while (changed) {
     changed = false;
     for (int i = 0; i < _packset.length(); i++) {
       Node_List* p1 = _packset.at(i);
@@ -787,6 +930,7 @@
       for (int j = 0; j < _packset.length(); j++) {
         Node_List* p2 = _packset.at(j);
         if (p2 == NULL) continue;
+        if (i == j) continue;
         if (p1->at(p1->size()-1) == p2->at(0)) {
           for (uint k = 1; k < p2->size(); k++) {
             p1->push(p2->at(k));
@@ -796,8 +940,39 @@
         }
       }
     }
-  } while (changed);
+  }
 
+  // Split packs which have size greater then max vector size.
+  for (int i = 0; i < _packset.length(); i++) {
+    Node_List* p1 = _packset.at(i);
+    if (p1 != NULL) {
+      BasicType bt = velt_basic_type(p1->at(0));
+      uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
+      assert(is_power_of_2(max_vlen), "sanity");
+      uint psize = p1->size();
+      if (!is_power_of_2(psize)) {
+        // Skip pack which can't be vector.
+        // case1: for(...) { a[i] = i; }    elements values are different (i+x)
+        // case2: for(...) { a[i] = b[i+1]; }  can't align both, load and store
+        _packset.at_put(i, NULL);
+        continue;
+      }
+      if (psize > max_vlen) {
+        Node_List* pack = new Node_List();
+        for (uint j = 0; j < psize; j++) {
+          pack->push(p1->at(j));
+          if (pack->size() >= max_vlen) {
+            assert(is_power_of_2(pack->size()), "sanity");
+            _packset.append(pack);
+            pack = new Node_List();
+          }
+        }
+        _packset.at_put(i, NULL);
+      }
+    }
+  }
+
+  // Compress list.
   for (int i = _packset.length() - 1; i >= 0; i--) {
     Node_List* p1 = _packset.at(i);
     if (p1 == NULL) {
@@ -880,8 +1055,22 @@
 // Can code be generated for pack p?
 bool SuperWord::implemented(Node_List* p) {
   Node* p0 = p->at(0);
-  int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0));
-  return vopc > 0 && Matcher::has_match_rule(vopc);
+  return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
+}
+
+//------------------------------same_inputs--------------------------
+// For pack p, are all idx operands the same?
+static bool same_inputs(Node_List* p, int idx) {
+  Node* p0 = p->at(0);
+  uint vlen = p->size();
+  Node* p0_def = p0->in(idx);
+  for (uint i = 1; i < vlen; i++) {
+    Node* pi = p->at(i);
+    Node* pi_def = pi->in(idx);
+    if (p0_def != pi_def)
+      return false;
+  }
+  return true;
 }
 
 //------------------------------profitable---------------------------
@@ -889,7 +1078,7 @@
 bool SuperWord::profitable(Node_List* p) {
   Node* p0 = p->at(0);
   uint start, end;
-  vector_opd_range(p0, &start, &end);
+  VectorNode::vector_operands(p0, &start, &end);
 
   // Return false if some input is not vector and inside block
   for (uint i = start; i < end; i++) {
@@ -897,15 +1086,20 @@
       // For now, return false if not scalar promotion case (inputs are the same.)
       // Later, implement PackNode and allow differing, non-vector inputs
       // (maybe just the ones from outside the block.)
-      Node* p0_def = p0->in(i);
-      for (uint j = 1; j < p->size(); j++) {
-        Node* use = p->at(j);
-        Node* def = use->in(i);
-        if (p0_def != def)
-          return false;
+      if (!same_inputs(p, i)) {
+        return false;
       }
     }
   }
+  if (VectorNode::is_shift(p0)) {
+    // For now, return false if shift count is vector because
+    // hw does not support it.
+    if (is_vector_use(p0, 2))
+      return false;
+    // For the same reason return false if different shift counts.
+    if (!same_inputs(p, 2))
+      return false;
+  }
   if (!p0->is_Store()) {
     // For now, return false if not all uses are vector.
     // Later, implement ExtractNode and allow non-vector uses (maybe
@@ -939,62 +1133,56 @@
 }
 
 //-------------------------------remove_and_insert-------------------
-//remove "current" from its current position in the memory graph and insert
-//it after the appropriate insertion point (lip or uip)
+// Remove "current" from its current position in the memory graph and insert
+// it after the appropriate insertion point (lip or uip).
 void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
                                   Node *uip, Unique_Node_List &sched_before) {
   Node* my_mem = current->in(MemNode::Memory);
-  _igvn.hash_delete(current);
-  _igvn.hash_delete(my_mem);
+  bool sched_up = sched_before.member(current);
 
-  //remove current_store from its current position in the memmory graph
+  // remove current_store from its current position in the memmory graph
   for (DUIterator i = current->outs(); current->has_out(i); i++) {
     Node* use = current->out(i);
     if (use->is_Mem()) {
       assert(use->in(MemNode::Memory) == current, "must be");
-      _igvn.hash_delete(use);
       if (use == prev) { // connect prev to my_mem
-        use->set_req(MemNode::Memory, my_mem);
+          _igvn.replace_input_of(use, MemNode::Memory, my_mem);
+          --i; //deleted this edge; rescan position
       } else if (sched_before.member(use)) {
-        _igvn.hash_delete(uip);
-        use->set_req(MemNode::Memory, uip);
+        if (!sched_up) { // Will be moved together with current
+          _igvn.replace_input_of(use, MemNode::Memory, uip);
+          --i; //deleted this edge; rescan position
+        }
       } else {
-        _igvn.hash_delete(lip);
-        use->set_req(MemNode::Memory, lip);
+        if (sched_up) { // Will be moved together with current
+          _igvn.replace_input_of(use, MemNode::Memory, lip);
+          --i; //deleted this edge; rescan position
+        }
       }
-      _igvn._worklist.push(use);
-      --i; //deleted this edge; rescan position
     }
   }
 
-  bool sched_up = sched_before.member(current);
   Node *insert_pt =  sched_up ?  uip : lip;
-  _igvn.hash_delete(insert_pt);
 
   // all uses of insert_pt's memory state should use current's instead
   for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
     Node* use = insert_pt->out(i);
     if (use->is_Mem()) {
       assert(use->in(MemNode::Memory) == insert_pt, "must be");
-      _igvn.hash_delete(use);
-      use->set_req(MemNode::Memory, current);
-      _igvn._worklist.push(use);
+      _igvn.replace_input_of(use, MemNode::Memory, current);
       --i; //deleted this edge; rescan position
     } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {
       uint pos; //lip (lower insert point) must be the last one in the memory slice
-      _igvn.hash_delete(use);
       for (pos=1; pos < use->req(); pos++) {
         if (use->in(pos) == insert_pt) break;
       }
-      use->set_req(pos, current);
-      _igvn._worklist.push(use);
+      _igvn.replace_input_of(use, pos, current);
       --i;
     }
   }
 
   //connect current to insert_pt
-  current->set_req(MemNode::Memory, insert_pt);
-  _igvn._worklist.push(current);
+  _igvn.replace_input_of(current, MemNode::Memory, insert_pt);
 }
 
 //------------------------------co_locate_pack----------------------------------
@@ -1031,7 +1219,7 @@
         if (use->is_Mem() && use != previous)
           memops.push(use);
       }
-      if(current == first) break;
+      if (current == first) break;
       previous = current;
       current  = current->in(MemNode::Memory)->as_Mem();
     }
@@ -1044,27 +1232,37 @@
           Node *s2 = memops.at(j);
           if (!independent(s1, s2)) {
             if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
-              schedule_before_pack.push(s1); //s1 must be scheduled before
+              schedule_before_pack.push(s1); // s1 must be scheduled before
               Node_List* mem_pk = my_pack(s1);
               if (mem_pk != NULL) {
                 for (uint ii = 0; ii < mem_pk->size(); ii++) {
-                  Node* s = mem_pk->at(ii); // follow partner
+                  Node* s = mem_pk->at(ii);  // follow partner
                   if (memops.member(s) && !schedule_before_pack.member(s))
                     schedule_before_pack.push(s);
                 }
               }
+              break;
             }
           }
         }
       }
     }
 
+    Node*    upper_insert_pt = first->in(MemNode::Memory);
+    // Following code moves loads connected to upper_insert_pt below aliased stores.
+    // Collect such loads here and reconnect them back to upper_insert_pt later.
+    memops.clear();
+    for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
+      Node* use = upper_insert_pt->out(i);
+      if (!use->is_Store())
+        memops.push(use);
+    }
+
     MemNode* lower_insert_pt = last;
-    Node*    upper_insert_pt = first->in(MemNode::Memory);
     previous                 = last; //previous store in pk
     current                  = last->in(MemNode::Memory)->as_Mem();
 
-    //start scheduling from "last" to "first"
+    // start scheduling from "last" to "first"
     while (true) {
       assert(in_bb(current), "stay in block");
       assert(in_pack(previous, pk), "previous stays in pack");
@@ -1072,20 +1270,15 @@
 
       if (in_pack(current, pk)) {
         // Forward users of my memory state (except "previous) to my input memory state
-        _igvn.hash_delete(current);
         for (DUIterator i = current->outs(); current->has_out(i); i++) {
           Node* use = current->out(i);
           if (use->is_Mem() && use != previous) {
             assert(use->in(MemNode::Memory) == current, "must be");
-            _igvn.hash_delete(use);
             if (schedule_before_pack.member(use)) {
-              _igvn.hash_delete(upper_insert_pt);
-              use->set_req(MemNode::Memory, upper_insert_pt);
+              _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);
             } else {
-              _igvn.hash_delete(lower_insert_pt);
-              use->set_req(MemNode::Memory, lower_insert_pt);
+              _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);
             }
-            _igvn._worklist.push(use);
             --i; // deleted this edge; rescan position
           }
         }
@@ -1097,6 +1290,14 @@
       if (current == first) break;
       current = my_mem->as_Mem();
     } // end while
+
+    // Reconnect loads back to upper_insert_pt.
+    for (uint i = 0; i < memops.size(); i++) {
+      Node *ld = memops.at(i);
+      if (ld->in(MemNode::Memory) != upper_insert_pt) {
+        _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);
+      }
+    }
   } else if (pk->at(0)->is_Load()) { //load
     // all loads in the pack should have the same memory state. By default,
     // we use the memory state of the last load. However, if any load could
@@ -1122,9 +1323,7 @@
     // Give each load the same memory state
     for (uint i = 0; i < pk->size(); i++) {
       LoadNode* ld = pk->at(i)->as_Load();
-      _igvn.hash_delete(ld);
-      ld->set_req(MemNode::Memory, mem_input);
-      _igvn._worklist.push(ld);
+      _igvn.replace_input_of(ld, MemNode::Memory, mem_input);
     }
   }
 }
@@ -1159,35 +1358,36 @@
       Node* vn = NULL;
       Node* low_adr = p->at(0);
       Node* first   = executed_first(p);
+      int   opc = n->Opcode();
       if (n->is_Load()) {
-        int   opc = n->Opcode();
+        Node* ctl = n->in(MemNode::Control);
+        Node* mem = first->in(MemNode::Memory);
+        Node* adr = low_adr->in(MemNode::Address);
+        const TypePtr* atyp = n->adr_type();
+        vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
+      } else if (n->is_Store()) {
+        // Promote value to be stored to vector
+        Node* val = vector_opd(p, MemNode::ValueIn);
         Node* ctl = n->in(MemNode::Control);
         Node* mem = first->in(MemNode::Memory);
         Node* adr = low_adr->in(MemNode::Address);
         const TypePtr* atyp = n->adr_type();
-        vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen);
-
-      } else if (n->is_Store()) {
-        // Promote value to be stored to vector
-        Node* val = vector_opd(p, MemNode::ValueIn);
-
-        int   opc = n->Opcode();
-        Node* ctl = n->in(MemNode::Control);
-        Node* mem = first->in(MemNode::Memory);
-        Node* adr = low_adr->in(MemNode::Address);
-        const TypePtr* atyp = n->adr_type();
-        vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
-
+        vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
       } else if (n->req() == 3) {
         // Promote operands to vector
         Node* in1 = vector_opd(p, 1);
         Node* in2 = vector_opd(p, 2);
-        vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n));
-
+        if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {
+          // Move invariant vector input into second position to avoid register spilling.
+          Node* tmp = in1;
+          in1 = in2;
+          in2 = tmp;
+        }
+        vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
       } else {
         ShouldNotReachHere();
       }
-
+      assert(vn != NULL, "sanity");
       _phase->_igvn.register_new_node_with_optimizer(vn);
       _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
       for (uint j = 0; j < p->size(); j++) {
@@ -1195,6 +1395,12 @@
         _igvn.replace_node(pm, vn);
       }
       _igvn._worklist.push(vn);
+#ifdef ASSERT
+      if (TraceNewVectors) {
+        tty->print("new Vector node: ");
+        vn->dump();
+      }
+#endif
     }
   }
 }
@@ -1206,46 +1412,79 @@
   uint vlen = p->size();
   Node* opd = p0->in(opd_idx);
 
-  bool same_opd = true;
-  for (uint i = 1; i < vlen; i++) {
-    Node* pi = p->at(i);
-    Node* in = pi->in(opd_idx);
-    if (opd != in) {
-      same_opd = false;
-      break;
-    }
-  }
-
-  if (same_opd) {
-    if (opd->is_Vector() || opd->is_VectorLoad()) {
+  if (same_inputs(p, opd_idx)) {
+    if (opd->is_Vector() || opd->is_LoadVector()) {
+      assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
       return opd; // input is matching vector
     }
-    assert(!opd->is_VectorStore(), "such vector is not expected here");
-    // Convert scalar input to vector. Use p0's type because it's container
-    // maybe smaller than the operand's container.
-    const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd);
-    const Type* p0_t  = velt_type(p0);
-    if (p0_t->higher_equal(opd_t)) opd_t = p0_t;
-    VectorNode* vn    = VectorNode::scalar2vector(_phase->C, opd, vlen, opd_t);
+    if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
+      // No vector is needed for shift count.
+      // Vector instructions do not mask shift count, do it here.
+      Compile* C = _phase->C;
+      Node* cnt = opd;
+      juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
+      const TypeInt* t = opd->find_int_type();
+      if (t != NULL && t->is_con()) {
+        juint shift = t->get_con();
+        if (shift > mask) { // Unsigned cmp
+          cnt = ConNode::make(C, TypeInt::make(shift & mask));
+        }
+      } else {
+        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
+          cnt = ConNode::make(C, TypeInt::make(mask));
+          _phase->_igvn.register_new_node_with_optimizer(cnt);
+          cnt = new (C, 3) AndINode(opd, cnt);
+          _phase->_igvn.register_new_node_with_optimizer(cnt);
+          _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
+        }
+        assert(opd->bottom_type()->isa_int(), "int type only");
+        // Move non constant shift count into XMM register.
+        cnt = new (_phase->C, 2) MoveI2FNode(cnt);
+      }
+      if (cnt != opd) {
+        _phase->_igvn.register_new_node_with_optimizer(cnt);
+        _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
+      }
+      return cnt;
+    }
+    assert(!opd->is_StoreVector(), "such vector is not expected here");
+    // Convert scalar input to vector with the same number of elements as
+    // p0's vector. Use p0's type because size of operand's container in
+    // vector should match p0's size regardless operand's size.
+    const Type* p0_t = velt_type(p0);
+    VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);
 
     _phase->_igvn.register_new_node_with_optimizer(vn);
     _phase->set_ctrl(vn, _phase->get_ctrl(opd));
+#ifdef ASSERT
+    if (TraceNewVectors) {
+      tty->print("new Vector node: ");
+      vn->dump();
+    }
+#endif
     return vn;
   }
 
   // Insert pack operation
-  const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd);
-  PackNode* pk = PackNode::make(_phase->C, opd, opd_t);
+  BasicType bt = velt_basic_type(p0);
+  PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);
+  DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
 
   for (uint i = 1; i < vlen; i++) {
     Node* pi = p->at(i);
     Node* in = pi->in(opd_idx);
     assert(my_pack(in) == NULL, "Should already have been unpacked");
-    assert(opd_t == velt_type(!in_bb(in) ? pi : in), "all same type");
+    assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
     pk->add_opd(in);
   }
   _phase->_igvn.register_new_node_with_optimizer(pk);
   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
+#ifdef ASSERT
+    if (TraceNewVectors) {
+      tty->print("new Vector node: ");
+      pk->dump();
+    }
+#endif
   return pk;
 }
 
@@ -1282,19 +1521,16 @@
 
     // Insert extract operation
     _igvn.hash_delete(def);
-    _igvn.hash_delete(use);
     int def_pos = alignment(def) / data_size(def);
-    const Type* def_t = velt_type(def);
 
-    Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t);
+    Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));
     _phase->_igvn.register_new_node_with_optimizer(ex);
     _phase->set_ctrl(ex, _phase->get_ctrl(def));
-    use->set_req(idx, ex);
+    _igvn.replace_input_of(use, idx, ex);
     _igvn._worklist.push(def);
-    _igvn._worklist.push(use);
 
     bb_insert_after(ex, bb_idx(def));
-    set_velt_type(ex, def_t);
+    set_velt_type(ex, velt_type(def));
   }
 }
 
@@ -1521,10 +1757,7 @@
   // Initial type
   for (int i = 0; i < _block.length(); i++) {
     Node* n = _block.at(i);
-    const Type* t  = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type())
-                                 : _igvn.type(n);
-    const Type* vt = container_type(t);
-    set_velt_type(n, vt);
+    set_velt_type(n, container_type(n));
   }
 
   // Propagate narrowed type backwards through operations
@@ -1532,37 +1765,27 @@
   for (int i = _block.length() - 1; i >= 0; i--) {
     Node* n = _block.at(i);
     // Only integer types need be examined
-    if (n->bottom_type()->isa_int()) {
+    const Type* vt = velt_type(n);
+    if (vt->basic_type() == T_INT) {
       uint start, end;
-      vector_opd_range(n, &start, &end);
+      VectorNode::vector_operands(n, &start, &end);
       const Type* vt = velt_type(n);
 
       for (uint j = start; j < end; j++) {
         Node* in  = n->in(j);
-        // Don't propagate through a type conversion
-        if (n->bottom_type() != in->bottom_type())
-          continue;
-        switch(in->Opcode()) {
-        case Op_AddI:    case Op_AddL:
-        case Op_SubI:    case Op_SubL:
-        case Op_MulI:    case Op_MulL:
-        case Op_AndI:    case Op_AndL:
-        case Op_OrI:     case Op_OrL:
-        case Op_XorI:    case Op_XorL:
-        case Op_LShiftI: case Op_LShiftL:
-        case Op_CMoveI:  case Op_CMoveL:
-          if (in_bb(in)) {
-            bool same_type = true;
-            for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
-              Node *use = in->fast_out(k);
-              if (!in_bb(use) || velt_type(use) != vt) {
-                same_type = false;
-                break;
-              }
+        // Don't propagate through a memory
+        if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT &&
+            data_size(n) < data_size(in)) {
+          bool same_type = true;
+          for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
+            Node *use = in->fast_out(k);
+            if (!in_bb(use) || !same_velt_type(use, n)) {
+              same_type = false;
+              break;
             }
-            if (same_type) {
-              set_velt_type(in, vt);
-            }
+          }
+          if (same_type) {
+            set_velt_type(in, vt);
           }
         }
       }
@@ -1587,59 +1810,40 @@
   if (!p.valid()) {
     return bottom_align;
   }
+  int vw = vector_width_in_bytes(s);
+  if (vw < 2) {
+    return bottom_align; // No vectors for this type
+  }
   int offset  = p.offset_in_bytes();
   offset     += iv_adjust_in_bytes;
-  int off_rem = offset % vector_width_in_bytes();
-  int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes();
+  int off_rem = offset % vw;
+  int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
   return off_mod;
 }
 
 //---------------------------container_type---------------------------
 // Smallest type containing range of values
-const Type* SuperWord::container_type(const Type* t) {
-  const Type* tp = t->make_ptr();
-  if (tp && tp->isa_aryptr()) {
-    t = tp->is_aryptr()->elem();
+const Type* SuperWord::container_type(Node* n) {
+  if (n->is_Mem()) {
+    return Type::get_const_basic_type(n->as_Mem()->memory_type());
   }
+  const Type* t = _igvn.type(n);
   if (t->basic_type() == T_INT) {
-    if (t->higher_equal(TypeInt::BOOL))  return TypeInt::BOOL;
-    if (t->higher_equal(TypeInt::BYTE))  return TypeInt::BYTE;
-    if (t->higher_equal(TypeInt::CHAR))  return TypeInt::CHAR;
-    if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT;
+    // A narrow type of arithmetic operations will be determined by
+    // propagating the type of memory operations.
     return TypeInt::INT;
   }
   return t;
 }
 
-//-------------------------vector_opd_range-----------------------
-// (Start, end] half-open range defining which operands are vector
-void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
-  switch (n->Opcode()) {
-  case Op_LoadB:   case Op_LoadUS:
-  case Op_LoadI:   case Op_LoadL:
-  case Op_LoadF:   case Op_LoadD:
-  case Op_LoadP:
-    *start = 0;
-    *end   = 0;
-    return;
-  case Op_StoreB:  case Op_StoreC:
-  case Op_StoreI:  case Op_StoreL:
-  case Op_StoreF:  case Op_StoreD:
-  case Op_StoreP:
-    *start = MemNode::ValueIn;
-    *end   = *start + 1;
-    return;
-  case Op_LShiftI: case Op_LShiftL:
-    *start = 1;
-    *end   = 2;
-    return;
-  case Op_CMoveI:  case Op_CMoveL:  case Op_CMoveF:  case Op_CMoveD:
-    *start = 2;
-    *end   = n->req();
-    return;
+bool SuperWord::same_velt_type(Node* n1, Node* n2) {
+  const Type* vt1 = velt_type(n1);
+  const Type* vt2 = velt_type(n1);
+  if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {
+    // Compare vectors element sizes for integer types.
+    return data_size(n1) == data_size(n2);
   }
-  *start = 1;
-  *end   = n->req(); // default is all operands
+  return vt1 == vt2;
 }
 
 //------------------------------in_packset---------------------------
@@ -1733,12 +1937,13 @@
   assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
 
   SWPointer align_to_ref_p(align_to_ref, this);
+  assert(align_to_ref_p.valid(), "sanity");
 
   // Given:
   //     lim0 == original pre loop limit
   //     V == v_align (power of 2)
   //     invar == extra invariant piece of the address expression
-  //     e == k [ +/- invar ]
+  //     e == offset [ +/- invar ]
   //
   // When reassociating expressions involving '%' the basic rules are:
   //     (a - b) % k == 0   =>  a % k == b % k
@@ -1785,17 +1990,18 @@
   //     N = (V - (e - lim0)) % V
   //     lim = lim0 - (V - (e - lim0)) % V
 
+  int vw = vector_width_in_bytes(align_to_ref);
   int stride   = iv_stride();
   int scale    = align_to_ref_p.scale_in_bytes();
   int elt_size = align_to_ref_p.memory_size();
-  int v_align  = vector_width_in_bytes() / elt_size;
-  int k        = align_to_ref_p.offset_in_bytes() / elt_size;
+  int v_align  = vw / elt_size;
+  assert(v_align > 1, "sanity");
+  int offset   = align_to_ref_p.offset_in_bytes() / elt_size;
+  Node *offsn  = _igvn.intcon(offset);
 
-  Node *kn   = _igvn.intcon(k);
-
-  Node *e = kn;
+  Node *e = offsn;
   if (align_to_ref_p.invar() != NULL) {
-    // incorporate any extra invariant piece producing k +/- invar >>> log2(elt)
+    // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
     Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
     Node* aref     = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt);
     _phase->_igvn.register_new_node_with_optimizer(aref);
@@ -1808,6 +2014,25 @@
     _phase->_igvn.register_new_node_with_optimizer(e);
     _phase->set_ctrl(e, pre_ctrl);
   }
+  if (vw > ObjectAlignmentInBytes) {
+    // incorporate base e +/- base && Mask >>> log2(elt)
+    Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base());
+    _phase->_igvn.register_new_node_with_optimizer(xbase);
+#ifdef _LP64
+    xbase  = new (_phase->C, 2) ConvL2INode(xbase);
+    _phase->_igvn.register_new_node_with_optimizer(xbase);
+#endif
+    Node* mask = _igvn.intcon(vw-1);
+    Node* masked_xbase  = new (_phase->C, 3) AndINode(xbase, mask);
+    _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+    Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
+    Node* bref     = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt);
+    _phase->_igvn.register_new_node_with_optimizer(bref);
+    _phase->set_ctrl(bref, pre_ctrl);
+    e = new (_phase->C, 3) AddINode(e, bref);
+    _phase->_igvn.register_new_node_with_optimizer(e);
+    _phase->set_ctrl(e, pre_ctrl);
+  }
 
   // compute e +/- lim0
   if (scale < 0) {
--- a/src/share/vm/opto/superword.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/superword.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,8 +264,14 @@
                                      _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
   int      iv_stride()             { return lp()->as_CountedLoop()->stride_con(); }
 
-  int vector_width_in_bytes()      { return Matcher::vector_width_in_bytes(); }
-
+  int vector_width(Node* n) {
+    BasicType bt = velt_basic_type(n);
+    return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt));
+  }
+  int vector_width_in_bytes(Node* n) {
+    BasicType bt = velt_basic_type(n);
+    return vector_width(n)*type2aelembytes(bt);
+  }
   MemNode* align_to_ref()            { return _align_to_ref; }
   void  set_align_to_ref(MemNode* m) { _align_to_ref = m; }
 
@@ -298,7 +304,9 @@
 
   // vector element type
   const Type* velt_type(Node* n)             { return _node_info.adr_at(bb_idx(n))->_velt_type; }
+  BasicType velt_basic_type(Node* n)         { return velt_type(n)->array_element_basic_type(); }
   void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
+  bool same_velt_type(Node* n1, Node* n2);
 
   // my_pack
   Node_List* my_pack(Node* n)                { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
@@ -311,7 +319,9 @@
   // Find the adjacent memory references and create pack pairs for them.
   void find_adjacent_refs();
   // Find a memory reference to align the loop induction variable to.
-  void find_align_to_ref(Node_List &memops);
+  MemNode* find_align_to_ref(Node_List &memops);
+  // Calculate loop's iv adjustment for this memory ops.
+  int get_iv_adjustment(MemNode* mem);
   // Can the preloop align the reference to position zero in the vector?
   bool ref_is_alignable(SWPointer& p);
   // Construct dependency graph.
@@ -394,7 +404,7 @@
   // (Start, end] half-open range defining which operands are vector
   void vector_opd_range(Node* n, uint* start, uint* end);
   // Smallest type containing range of values
-  static const Type* container_type(const Type* t);
+  const Type* container_type(Node* n);
   // Adjust pre-loop limit so that in main loop, a load/store reference
   // to align_to_ref will be a position zero in the vector.
   void align_initial_loop_index(MemNode* align_to_ref);
@@ -462,6 +472,7 @@
 
   Node* base()            { return _base; }
   Node* adr()             { return _adr; }
+  MemNode* mem()          { return _mem; }
   int   scale_in_bytes()  { return _scale; }
   Node* invar()           { return _invar; }
   bool  negate_invar()    { return _negate_invar; }
--- a/src/share/vm/opto/type.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/type.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,6 +60,10 @@
 
   T_ILLEGAL,    // Tuple
   T_ARRAY,      // Array
+  T_ILLEGAL,    // VectorS
+  T_ILLEGAL,    // VectorD
+  T_ILLEGAL,    // VectorX
+  T_ILLEGAL,    // VectorY
 
   T_ADDRESS,    // AnyPtr   // shows up in factory methods for NULL_PTR
   T_ADDRESS,    // RawPtr
@@ -208,7 +212,7 @@
   // locking.
 
   Arena* save = current->type_arena();
-  Arena* shared_type_arena = new Arena();
+  Arena* shared_type_arena = new (mtCompiler)Arena();
 
   current->set_type_arena(shared_type_arena);
   _shared_type_dict =
@@ -414,6 +418,24 @@
   // get_zero_type() should not happen for T_CONFLICT
   _zero_type[T_CONFLICT]= NULL;
 
+  // Vector predefined types, it needs initialized _const_basic_type[].
+  if (Matcher::vector_size_supported(T_BYTE,4)) {
+    TypeVect::VECTS = TypeVect::make(T_BYTE,4);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,2)) {
+    TypeVect::VECTD = TypeVect::make(T_FLOAT,2);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,4)) {
+    TypeVect::VECTX = TypeVect::make(T_FLOAT,4);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,8)) {
+    TypeVect::VECTY = TypeVect::make(T_FLOAT,8);
+  }
+  mreg2type[Op_VecS] = TypeVect::VECTS;
+  mreg2type[Op_VecD] = TypeVect::VECTD;
+  mreg2type[Op_VecX] = TypeVect::VECTX;
+  mreg2type[Op_VecY] = TypeVect::VECTY;
+
   // Restore working type arena.
   current->set_type_arena(save);
   current->set_type_dict(NULL);
@@ -668,6 +690,10 @@
 
   Bad,          // Tuple - handled in v-call
   Bad,          // Array - handled in v-call
+  Bad,          // VectorS - handled in v-call
+  Bad,          // VectorD - handled in v-call
+  Bad,          // VectorX - handled in v-call
+  Bad,          // VectorY - handled in v-call
 
   Bad,          // AnyPtr - handled in v-call
   Bad,          // RawPtr - handled in v-call
@@ -728,8 +754,8 @@
 //------------------------------data-------------------------------------------
 const char * const Type::msg[Type::lastype] = {
   "bad","control","top","int:","long:","half", "narrowoop:",
-  "tuple:", "aryptr",
-  "anyptr:", "rawptr:", "java:", "inst:", "ary:", "klass:",
+  "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
+  "anyptr:", "rawptr:", "java:", "inst:", "aryptr:", "klass:",
   "func", "abIO", "return_address", "memory",
   "float_top", "ftcon:", "float",
   "double_top", "dblcon:", "double",
@@ -790,7 +816,7 @@
 //------------------------------isa_oop_ptr------------------------------------
 // Return true if type is an oop pointer type.  False for raw pointers.
 static char isa_oop_ptr_tbl[Type::lastype] = {
-  0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*ary*/,
+  0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*array*/, 0, 0, 0, 0/*vector*/,
   0/*anyptr*/,0/*rawptr*/,1/*OopPtr*/,1/*InstPtr*/,1/*AryPtr*/,1/*KlassPtr*/,
   0/*func*/,0,0/*return_address*/,0,
   /*floats*/0,0,0, /*doubles*/0,0,0,
@@ -1926,6 +1952,121 @@
   return false;
 }
 
+//==============================TypeVect=======================================
+// Convenience common pre-built types.
+const TypeVect *TypeVect::VECTS = NULL; //  32-bit vectors
+const TypeVect *TypeVect::VECTD = NULL; //  64-bit vectors
+const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
+const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors
+
+//------------------------------make-------------------------------------------
+const TypeVect* TypeVect::make(const Type *elem, uint length) {
+  BasicType elem_bt = elem->array_element_basic_type();
+  assert(is_java_primitive(elem_bt), "only primitive types in vector");
+  assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
+  assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
+  int size = length * type2aelembytes(elem_bt);
+  switch (Matcher::vector_ideal_reg(size)) {
+  case Op_VecS:
+    return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
+  case Op_VecD:
+  case Op_RegD:
+    return (TypeVect*)(new TypeVectD(elem, length))->hashcons();
+  case Op_VecX:
+    return (TypeVect*)(new TypeVectX(elem, length))->hashcons();
+  case Op_VecY:
+    return (TypeVect*)(new TypeVectY(elem, length))->hashcons();
+  }
+ ShouldNotReachHere();
+  return NULL;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeVect::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is Vector
+  switch (t->base()) {          // switch on original type
+
+  case Bottom:                  // Ye Olde Default
+    return t;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case VectorS:
+  case VectorD:
+  case VectorX:
+  case VectorY: {                // Meeting 2 vectors?
+    const TypeVect* v = t->is_vect();
+    assert(  base() == v->base(), "");
+    assert(length() == v->length(), "");
+    assert(element_basic_type() == v->element_basic_type(), "");
+    return TypeVect::make(_elem->xmeet(v->_elem), _length);
+  }
+  case Top:
+    break;
+  }
+  return this;
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeVect::xdual() const {
+  return new TypeVect(base(), _elem->dual(), _length);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeVect::eq(const Type *t) const {
+  const TypeVect *v = t->is_vect();
+  return (_elem == v->_elem) && (_length == v->_length);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeVect::hash(void) const {
+  return (intptr_t)_elem + (intptr_t)_length;
+}
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Vector is singleton if all elements are the same
+// constant value (when vector is created with Replicate code).
+bool TypeVect::singleton(void) const {
+// There is no Con node for vectors yet.
+//  return _elem->singleton();
+  return false;
+}
+
+bool TypeVect::empty(void) const {
+  return _elem->empty();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
+  switch (base()) {
+  case VectorS:
+    st->print("vectors["); break;
+  case VectorD:
+    st->print("vectord["); break;
+  case VectorX:
+    st->print("vectorx["); break;
+  case VectorY:
+    st->print("vectory["); break;
+  default:
+    ShouldNotReachHere();
+  }
+  st->print("%d]:{", _length);
+  _elem->dump2(d, depth, st);
+  st->print("}");
+}
+#endif
+
+
 //=============================================================================
 // Convenience common pre-built types.
 const TypePtr *TypePtr::NULL_PTR;
@@ -4144,7 +4285,7 @@
 // Print a 'flattened' signature
 static const char * const flat_type_msg[Type::lastype] = {
   "bad","control","top","int","long","_", "narrowoop",
-  "tuple:", "array:",
+  "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
   "ptr", "rawptr", "ptr", "ptr", "ptr", "ptr",
   "func", "abIO", "return_address", "mem",
   "float_top", "ftcon:", "flt",
--- a/src/share/vm/opto/type.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/type.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,11 @@
 class   TypeNarrowOop;
 class   TypeAry;
 class   TypeTuple;
+class   TypeVect;
+class     TypeVectS;
+class     TypeVectD;
+class     TypeVectX;
+class     TypeVectY;
 class   TypePtr;
 class     TypeRawPtr;
 class     TypeOopPtr;
@@ -78,6 +83,10 @@
 
     Tuple,                      // Method signature or object layout
     Array,                      // Array types
+    VectorS,                    //  32bit Vector types
+    VectorD,                    //  64bit Vector types
+    VectorX,                    // 128bit Vector types
+    VectorY,                    // 256bit Vector types
 
     AnyPtr,                     // Any old raw, klass, inst, or array pointer
     RawPtr,                     // Raw (non-oop) pointers
@@ -222,6 +231,8 @@
   const TypeF      *isa_float_constant() const;  // Returns NULL if not a FloatCon
   const TypeTuple  *is_tuple() const;            // Collection of fields, NOT a pointer
   const TypeAry    *is_ary() const;              // Array, NOT array pointer
+  const TypeVect   *is_vect() const;             // Vector
+  const TypeVect   *isa_vect() const;            // Returns NULL if not a Vector
   const TypePtr    *is_ptr() const;              // Asserts it is a ptr type
   const TypePtr    *isa_ptr() const;             // Returns NULL if not ptr type
   const TypeRawPtr *isa_rawptr() const;          // NOT Java oop
@@ -574,6 +585,69 @@
 #endif
 };
 
+//------------------------------TypeVect---------------------------------------
+// Class of Vector Types
+class TypeVect : public Type {
+  const Type*   _elem;  // Vector's element type
+  const uint  _length;  // Elements in vector (power of 2)
+
+protected:
+  TypeVect(TYPES t, const Type* elem, uint length) : Type(t),
+    _elem(elem), _length(length) {}
+
+public:
+  const Type* element_type() const { return _elem; }
+  BasicType element_basic_type() const { return _elem->array_element_basic_type(); }
+  uint length() const { return _length; }
+  uint length_in_bytes() const {
+   return _length * type2aelembytes(element_basic_type());
+  }
+
+  virtual bool eq(const Type *t) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+
+  static const TypeVect *make(const BasicType elem_bt, uint length) {
+    // Use bottom primitive type.
+    return make(get_const_basic_type(elem_bt), length);
+  }
+  // Used directly by Replicate nodes to construct singleton vector.
+  static const TypeVect *make(const Type* elem, uint length);
+
+  virtual const Type *xmeet( const Type *t) const;
+  virtual const Type *xdual() const;     // Compute dual right now.
+
+  static const TypeVect *VECTS;
+  static const TypeVect *VECTD;
+  static const TypeVect *VECTX;
+  static const TypeVect *VECTY;
+
+#ifndef PRODUCT
+  virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping
+#endif
+};
+
+class TypeVectS : public TypeVect {
+  friend class TypeVect;
+  TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
+};
+
+class TypeVectD : public TypeVect {
+  friend class TypeVect;
+  TypeVectD(const Type* elem, uint length) : TypeVect(VectorD, elem, length) {}
+};
+
+class TypeVectX : public TypeVect {
+  friend class TypeVect;
+  TypeVectX(const Type* elem, uint length) : TypeVect(VectorX, elem, length) {}
+};
+
+class TypeVectY : public TypeVect {
+  friend class TypeVect;
+  TypeVectY(const Type* elem, uint length) : TypeVect(VectorY, elem, length) {}
+};
+
 //------------------------------TypePtr----------------------------------------
 // Class of machine Pointer Types: raw data, instances or arrays.
 // If the _base enum is AnyPtr, then this refers to all of the above.
@@ -1113,6 +1187,15 @@
   return (TypeAry*)this;
 }
 
+inline const TypeVect *Type::is_vect() const {
+  assert( _base >= VectorS && _base <= VectorY, "Not a Vector" );
+  return (TypeVect*)this;
+}
+
+inline const TypeVect *Type::isa_vect() const {
+  return (_base >= VectorS && _base <= VectorY) ? (TypeVect*)this : NULL;
+}
+
 inline const TypePtr *Type::is_ptr() const {
   // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
   assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer");
--- a/src/share/vm/opto/vectornode.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/vectornode.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,147 +28,16 @@
 
 //------------------------------VectorNode--------------------------------------
 
-// Return vector type for an element type and vector length.
-const Type* VectorNode::vect_type(BasicType elt_bt, uint len) {
-  assert(len <= VectorNode::max_vlen(elt_bt), "len in range");
-  switch(elt_bt) {
-  case T_BOOLEAN:
-  case T_BYTE:
-    switch(len) {
-    case 2:  return TypeInt::CHAR;
-    case 4:  return TypeInt::INT;
-    case 8:  return TypeLong::LONG;
-    }
-    break;
-  case T_CHAR:
-  case T_SHORT:
-    switch(len) {
-    case 2:  return TypeInt::INT;
-    case 4:  return TypeLong::LONG;
-    }
-    break;
-  case T_INT:
-    switch(len) {
-    case 2:  return TypeLong::LONG;
-    }
-    break;
-  case T_LONG:
-    break;
-  case T_FLOAT:
-    switch(len) {
-    case 2:  return Type::DOUBLE;
-    }
-    break;
-  case T_DOUBLE:
-    break;
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
-// Scalar promotion
-VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  assert(vlen <= VectorNode::max_vlen(bt), "vlen in range");
-  switch (bt) {
-  case T_BOOLEAN:
-  case T_BYTE:
-    if (vlen == 16) return new (C, 2) Replicate16BNode(s);
-    if (vlen ==  8) return new (C, 2) Replicate8BNode(s);
-    if (vlen ==  4) return new (C, 2) Replicate4BNode(s);
-    break;
-  case T_CHAR:
-    if (vlen == 8) return new (C, 2) Replicate8CNode(s);
-    if (vlen == 4) return new (C, 2) Replicate4CNode(s);
-    if (vlen == 2) return new (C, 2) Replicate2CNode(s);
-    break;
-  case T_SHORT:
-    if (vlen == 8) return new (C, 2) Replicate8SNode(s);
-    if (vlen == 4) return new (C, 2) Replicate4SNode(s);
-    if (vlen == 2) return new (C, 2) Replicate2SNode(s);
-    break;
-  case T_INT:
-    if (vlen == 4) return new (C, 2) Replicate4INode(s);
-    if (vlen == 2) return new (C, 2) Replicate2INode(s);
-    break;
-  case T_LONG:
-    if (vlen == 2) return new (C, 2) Replicate2LNode(s);
-    break;
-  case T_FLOAT:
-    if (vlen == 4) return new (C, 2) Replicate4FNode(s);
-    if (vlen == 2) return new (C, 2) Replicate2FNode(s);
-    break;
-  case T_DOUBLE:
-    if (vlen == 2) return new (C, 2) Replicate2DNode(s);
-    break;
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
-// Return initial Pack node. Additional operands added with add_opd() calls.
-PackNode* PackNode::make(Compile* C, Node* s, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  switch (bt) {
-  case T_BOOLEAN:
-  case T_BYTE:
-    return new (C, 2) PackBNode(s);
-  case T_CHAR:
-    return new (C, 2) PackCNode(s);
-  case T_SHORT:
-    return new (C, 2) PackSNode(s);
-  case T_INT:
-    return new (C, 2) PackINode(s);
-  case T_LONG:
-    return new (C, 2) PackLNode(s);
-  case T_FLOAT:
-    return new (C, 2) PackFNode(s);
-  case T_DOUBLE:
-    return new (C, 2) PackDNode(s);
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
-// Create a binary tree form for Packs. [lo, hi) (half-open) range
-Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
-  int ct = hi - lo;
-  assert(is_power_of_2(ct), "power of 2");
-  int mid = lo + ct/2;
-  Node* n1 = ct == 2 ? in(lo)   : binaryTreePack(C, lo,  mid);
-  Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi );
-  int rslt_bsize = ct * type2aelembytes(elt_basic_type());
-  if (bottom_type()->is_floatingpoint()) {
-    switch (rslt_bsize) {
-    case  8: return new (C, 3) PackFNode(n1, n2);
-    case 16: return new (C, 3) PackDNode(n1, n2);
-    }
-  } else {
-    assert(bottom_type()->isa_int() || bottom_type()->isa_long(), "int or long");
-    switch (rslt_bsize) {
-    case  2: return new (C, 3) Pack2x1BNode(n1, n2);
-    case  4: return new (C, 3) Pack2x2BNode(n1, n2);
-    case  8: return new (C, 3) PackINode(n1, n2);
-    case 16: return new (C, 3) PackLNode(n1, n2);
-    }
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
 // Return the vector operator for the specified scalar operation
-// and vector length.  One use is to check if the code generator
+// and vector length.  Also used to check if the code generator
 // supports the vector operation.
-int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  if (!(is_power_of_2(vlen) && vlen <= max_vlen(bt)))
-    return 0; // unimplemented
+int VectorNode::opcode(int sopc, BasicType bt) {
   switch (sopc) {
   case Op_AddI:
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:      return Op_AddVB;
-    case T_CHAR:      return Op_AddVC;
+    case T_CHAR:
     case T_SHORT:     return Op_AddVS;
     case T_INT:       return Op_AddVI;
     }
@@ -186,7 +55,7 @@
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:   return Op_SubVB;
-    case T_CHAR:   return Op_SubVC;
+    case T_CHAR:
     case T_SHORT:  return Op_SubVS;
     case T_INT:    return Op_SubVI;
     }
@@ -200,6 +69,15 @@
   case Op_SubD:
     assert(bt == T_DOUBLE, "must be");
     return Op_SubVD;
+  case Op_MulI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:   return 0;   // Unimplemented
+    case T_CHAR:
+    case T_SHORT:  return Op_MulVS;
+    case T_INT:    return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1
+    }
+    ShouldNotReachHere();
   case Op_MulF:
     assert(bt == T_FLOAT, "must be");
     return Op_MulVF;
@@ -216,20 +94,38 @@
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:   return Op_LShiftVB;
-    case T_CHAR:   return Op_LShiftVC;
+    case T_CHAR:
     case T_SHORT:  return Op_LShiftVS;
     case T_INT:    return Op_LShiftVI;
     }
     ShouldNotReachHere();
+  case Op_LShiftL:
+    assert(bt == T_LONG, "must be");
+    return Op_LShiftVL;
+  case Op_RShiftI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:   return Op_RShiftVB;
+    case T_CHAR:
+    case T_SHORT:  return Op_RShiftVS;
+    case T_INT:    return Op_RShiftVI;
+    }
+    ShouldNotReachHere();
+  case Op_RShiftL:
+    assert(bt == T_LONG, "must be");
+    return Op_RShiftVL;
   case Op_URShiftI:
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:   return Op_URShiftVB;
-    case T_CHAR:   return Op_URShiftVC;
+    case T_CHAR:
     case T_SHORT:  return Op_URShiftVS;
     case T_INT:    return Op_URShiftVI;
     }
     ShouldNotReachHere();
+  case Op_URShiftL:
+    assert(bt == T_LONG, "must be");
+    return Op_URShiftVL;
   case Op_AndI:
   case Op_AndL:
     return Op_AndV;
@@ -241,13 +137,14 @@
     return Op_XorV;
 
   case Op_LoadB:
+  case Op_LoadUB:
   case Op_LoadUS:
   case Op_LoadS:
   case Op_LoadI:
   case Op_LoadL:
   case Op_LoadF:
   case Op_LoadD:
-    return VectorLoadNode::opcode(sopc, vlen);
+    return Op_LoadVector;
 
   case Op_StoreB:
   case Op_StoreC:
@@ -255,211 +152,252 @@
   case Op_StoreL:
   case Op_StoreF:
   case Op_StoreD:
-    return VectorStoreNode::opcode(sopc, vlen);
-  }
-  return 0; // Unimplemented
-}
-
-// Helper for above.
-int VectorLoadNode::opcode(int sopc, uint vlen) {
-  switch (sopc) {
-  case Op_LoadB:
-    switch (vlen) {
-    case  2:       return 0; // Unimplemented
-    case  4:       return Op_Load4B;
-    case  8:       return Op_Load8B;
-    case 16:       return Op_Load16B;
-    }
-    break;
-  case Op_LoadUS:
-    switch (vlen) {
-    case  2:       return Op_Load2C;
-    case  4:       return Op_Load4C;
-    case  8:       return Op_Load8C;
-    }
-    break;
-  case Op_LoadS:
-    switch (vlen) {
-    case  2:       return Op_Load2S;
-    case  4:       return Op_Load4S;
-    case  8:       return Op_Load8S;
-    }
-    break;
-  case Op_LoadI:
-    switch (vlen) {
-    case  2:       return Op_Load2I;
-    case  4:       return Op_Load4I;
-    }
-    break;
-  case Op_LoadL:
-    if (vlen == 2) return Op_Load2L;
-    break;
-  case Op_LoadF:
-    switch (vlen) {
-    case  2:       return Op_Load2F;
-    case  4:       return Op_Load4F;
-    }
-    break;
-  case Op_LoadD:
-    if (vlen == 2) return Op_Load2D;
-    break;
+    return Op_StoreVector;
   }
   return 0; // Unimplemented
 }
 
-// Helper for above
-int VectorStoreNode::opcode(int sopc, uint vlen) {
-  switch (sopc) {
-  case Op_StoreB:
-    switch (vlen) {
-    case  2:       return 0; // Unimplemented
-    case  4:       return Op_Store4B;
-    case  8:       return Op_Store8B;
-    case 16:       return Op_Store16B;
-    }
-    break;
-  case Op_StoreC:
-    switch (vlen) {
-    case  2:       return Op_Store2C;
-    case  4:       return Op_Store4C;
-    case  8:       return Op_Store8C;
-    }
+bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
+  if (is_java_primitive(bt) &&
+      (vlen > 1) && is_power_of_2(vlen) &&
+      Matcher::vector_size_supported(bt, vlen)) {
+    int vopc = VectorNode::opcode(opc, bt);
+    return vopc > 0 && Matcher::has_match_rule(vopc);
+  }
+  return false;
+}
+
+bool VectorNode::is_shift(Node* n) {
+  switch (n->Opcode()) {
+  case Op_LShiftI:
+  case Op_LShiftL:
+  case Op_RShiftI:
+  case Op_RShiftL:
+  case Op_URShiftI:
+  case Op_URShiftL:
+    return true;
+  }
+  return false;
+}
+
+// Check if input is loop invariant vector.
+bool VectorNode::is_invariant_vector(Node* n) {
+  // Only Replicate vector nodes are loop invariant for now.
+  switch (n->Opcode()) {
+  case Op_ReplicateB:
+  case Op_ReplicateS:
+  case Op_ReplicateI:
+  case Op_ReplicateL:
+  case Op_ReplicateF:
+  case Op_ReplicateD:
+    return true;
+  }
+  return false;
+}
+
+// [Start, end) half-open range defining which operands are vectors
+void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
+  switch (n->Opcode()) {
+  case Op_LoadB:   case Op_LoadUB:
+  case Op_LoadS:   case Op_LoadUS:
+  case Op_LoadI:   case Op_LoadL:
+  case Op_LoadF:   case Op_LoadD:
+  case Op_LoadP:   case Op_LoadN:
+    *start = 0;
+    *end   = 0; // no vector operands
     break;
-  case Op_StoreI:
-    switch (vlen) {
-    case  2:       return Op_Store2I;
-    case  4:       return Op_Store4I;
-    }
+  case Op_StoreB:  case Op_StoreC:
+  case Op_StoreI:  case Op_StoreL:
+  case Op_StoreF:  case Op_StoreD:
+  case Op_StoreP:  case Op_StoreN:
+    *start = MemNode::ValueIn;
+    *end   = MemNode::ValueIn + 1; // 1 vector operand
     break;
-  case Op_StoreL:
-    if (vlen == 2) return Op_Store2L;
+  case Op_LShiftI:  case Op_LShiftL:
+  case Op_RShiftI:  case Op_RShiftL:
+  case Op_URShiftI: case Op_URShiftL:
+    *start = 1;
+    *end   = 2; // 1 vector operand
     break;
-  case Op_StoreF:
-    switch (vlen) {
-    case  2:       return Op_Store2F;
-    case  4:       return Op_Store4F;
-    }
+  case Op_AddI: case Op_AddL: case Op_AddF: case Op_AddD:
+  case Op_SubI: case Op_SubL: case Op_SubF: case Op_SubD:
+  case Op_MulI: case Op_MulL: case Op_MulF: case Op_MulD:
+  case Op_DivF: case Op_DivD:
+  case Op_AndI: case Op_AndL:
+  case Op_OrI:  case Op_OrL:
+  case Op_XorI: case Op_XorL:
+    *start = 1;
+    *end   = 3; // 2 vector operands
     break;
-  case Op_StoreD:
-    if (vlen == 2) return Op_Store2D;
+  case Op_CMoveI:  case Op_CMoveL:  case Op_CMoveF:  case Op_CMoveD:
+    *start = 2;
+    *end   = n->req();
     break;
+  default:
+    *start = 1;
+    *end   = n->req(); // default is all operands
   }
-  return 0; // Unimplemented
 }
 
 // Return the vector version of a scalar operation node.
-VectorNode* VectorNode::make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* opd_t) {
-  int vopc = opcode(sopc, vlen, opd_t);
+VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
+  const TypeVect* vt = TypeVect::make(bt, vlen);
+  int vopc = VectorNode::opcode(opc, bt);
 
   switch (vopc) {
-  case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vlen);
-  case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vlen);
-  case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vlen);
-  case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vlen);
-  case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vlen);
-  case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vlen);
-  case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vlen);
+  case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vt);
+  case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vt);
+  case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vt);
+  case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vt);
+  case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vt);
+  case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vt);
+
+  case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vt);
+  case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vt);
+  case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vt);
+  case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vt);
+  case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vt);
+  case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vt);
 
-  case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vlen);
-  case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vlen);
-  case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vlen);
-  case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vlen);
-  case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vlen);
-  case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vlen);
-  case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vlen);
+  case Op_MulVS: return new (C, 3) MulVSNode(n1, n2, vt);
+  case Op_MulVI: return new (C, 3) MulVINode(n1, n2, vt);
+  case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vt);
+  case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vt);
+
+  case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vt);
+  case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vt);
+
+  case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vt);
+  case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vt);
+  case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vt);
+  case Op_LShiftVL: return new (C, 3) LShiftVLNode(n1, n2, vt);
+
+  case Op_RShiftVB: return new (C, 3) RShiftVBNode(n1, n2, vt);
+  case Op_RShiftVS: return new (C, 3) RShiftVSNode(n1, n2, vt);
+  case Op_RShiftVI: return new (C, 3) RShiftVINode(n1, n2, vt);
+  case Op_RShiftVL: return new (C, 3) RShiftVLNode(n1, n2, vt);
 
-  case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vlen);
-  case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vlen);
+  case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vt);
+  case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vt);
+  case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vt);
+  case Op_URShiftVL: return new (C, 3) URShiftVLNode(n1, n2, vt);
 
-  case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vlen);
-  case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vlen);
+  case Op_AndV: return new (C, 3) AndVNode(n1, n2, vt);
+  case Op_OrV:  return new (C, 3) OrVNode (n1, n2, vt);
+  case Op_XorV: return new (C, 3) XorVNode(n1, n2, vt);
+  }
+  ShouldNotReachHere();
+  return NULL;
+
+}
 
-  case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vlen);
-  case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vlen);
-  case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vlen);
-  case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vlen);
-
-  case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vlen);
-  case Op_URShiftVC: return new (C, 3) URShiftVCNode(n1, n2, vlen);
-  case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vlen);
-  case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vlen);
-
-  case Op_AndV: return new (C, 3) AndVNode(n1, n2, vlen, opd_t->array_element_basic_type());
-  case Op_OrV:  return new (C, 3) OrVNode (n1, n2, vlen, opd_t->array_element_basic_type());
-  case Op_XorV: return new (C, 3) XorVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+// Scalar promotion
+VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
+  BasicType bt = opd_t->array_element_basic_type();
+  const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen)
+                                          : TypeVect::make(bt, vlen);
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    return new (C, 2) ReplicateBNode(s, vt);
+  case T_CHAR:
+  case T_SHORT:
+    return new (C, 2) ReplicateSNode(s, vt);
+  case T_INT:
+    return new (C, 2) ReplicateINode(s, vt);
+  case T_LONG:
+    return new (C, 2) ReplicateLNode(s, vt);
+  case T_FLOAT:
+    return new (C, 2) ReplicateFNode(s, vt);
+  case T_DOUBLE:
+    return new (C, 2) ReplicateDNode(s, vt);
   }
   ShouldNotReachHere();
   return NULL;
 }
 
-// Return the vector version of a scalar load node.
-VectorLoadNode* VectorLoadNode::make(Compile* C, int opc, Node* ctl, Node* mem,
-                                     Node* adr, const TypePtr* atyp, uint vlen) {
-  int vopc = opcode(opc, vlen);
-
-  switch(vopc) {
-  case Op_Load16B: return new (C, 3) Load16BNode(ctl, mem, adr, atyp);
-  case Op_Load8B:  return new (C, 3) Load8BNode(ctl, mem, adr, atyp);
-  case Op_Load4B:  return new (C, 3) Load4BNode(ctl, mem, adr, atyp);
-
-  case Op_Load8C:  return new (C, 3) Load8CNode(ctl, mem, adr, atyp);
-  case Op_Load4C:  return new (C, 3) Load4CNode(ctl, mem, adr, atyp);
-  case Op_Load2C:  return new (C, 3) Load2CNode(ctl, mem, adr, atyp);
-
-  case Op_Load8S:  return new (C, 3) Load8SNode(ctl, mem, adr, atyp);
-  case Op_Load4S:  return new (C, 3) Load4SNode(ctl, mem, adr, atyp);
-  case Op_Load2S:  return new (C, 3) Load2SNode(ctl, mem, adr, atyp);
-
-  case Op_Load4I:  return new (C, 3) Load4INode(ctl, mem, adr, atyp);
-  case Op_Load2I:  return new (C, 3) Load2INode(ctl, mem, adr, atyp);
-
-  case Op_Load2L:  return new (C, 3) Load2LNode(ctl, mem, adr, atyp);
-
-  case Op_Load4F:  return new (C, 3) Load4FNode(ctl, mem, adr, atyp);
-  case Op_Load2F:  return new (C, 3) Load2FNode(ctl, mem, adr, atyp);
-
-  case Op_Load2D:  return new (C, 3) Load2DNode(ctl, mem, adr, atyp);
+// Return initial Pack node. Additional operands added with add_opd() calls.
+PackNode* PackNode::make(Compile* C, Node* s, uint vlen, BasicType bt) {
+  const TypeVect* vt = TypeVect::make(bt, vlen);
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    return new (C, 2) PackBNode(s, vt);
+  case T_CHAR:
+  case T_SHORT:
+    return new (C, 2) PackSNode(s, vt);
+  case T_INT:
+    return new (C, 2) PackINode(s, vt);
+  case T_LONG:
+    return new (C, 2) PackLNode(s, vt);
+  case T_FLOAT:
+    return new (C, 2) PackFNode(s, vt);
+  case T_DOUBLE:
+    return new (C, 2) PackDNode(s, vt);
   }
   ShouldNotReachHere();
   return NULL;
 }
 
-// Return the vector version of a scalar store node.
-VectorStoreNode* VectorStoreNode::make(Compile* C, int opc, Node* ctl, Node* mem,
-                                       Node* adr, const TypePtr* atyp, Node* val,
-                                       uint vlen) {
-  int vopc = opcode(opc, vlen);
+// Create a binary tree form for Packs. [lo, hi) (half-open) range
+PackNode* PackNode::binary_tree_pack(Compile* C, int lo, int hi) {
+  int ct = hi - lo;
+  assert(is_power_of_2(ct), "power of 2");
+  if (ct == 2) {
+    PackNode* pk = PackNode::make(C, in(lo), 2, vect_type()->element_basic_type());
+    pk->add_opd(in(lo+1));
+    return pk;
 
-  switch(vopc) {
-  case Op_Store16B: return new (C, 4) Store16BNode(ctl, mem, adr, atyp, val);
-  case Op_Store8B: return new (C, 4) Store8BNode(ctl, mem, adr, atyp, val);
-  case Op_Store4B: return new (C, 4) Store4BNode(ctl, mem, adr, atyp, val);
+  } else {
+    int mid = lo + ct/2;
+    PackNode* n1 = binary_tree_pack(C, lo,  mid);
+    PackNode* n2 = binary_tree_pack(C, mid, hi );
 
-  case Op_Store8C: return new (C, 4) Store8CNode(ctl, mem, adr, atyp, val);
-  case Op_Store4C: return new (C, 4) Store4CNode(ctl, mem, adr, atyp, val);
-  case Op_Store2C: return new (C, 4) Store2CNode(ctl, mem, adr, atyp, val);
-
-  case Op_Store4I: return new (C, 4) Store4INode(ctl, mem, adr, atyp, val);
-  case Op_Store2I: return new (C, 4) Store2INode(ctl, mem, adr, atyp, val);
-
-  case Op_Store2L: return new (C, 4) Store2LNode(ctl, mem, adr, atyp, val);
-
-  case Op_Store4F: return new (C, 4) Store4FNode(ctl, mem, adr, atyp, val);
-  case Op_Store2F: return new (C, 4) Store2FNode(ctl, mem, adr, atyp, val);
-
-  case Op_Store2D: return new (C, 4) Store2DNode(ctl, mem, adr, atyp, val);
+    BasicType bt = n1->vect_type()->element_basic_type();
+    assert(bt == n2->vect_type()->element_basic_type(), "should be the same");
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:
+      return new (C, 3) PackSNode(n1, n2, TypeVect::make(T_SHORT, 2));
+    case T_CHAR:
+    case T_SHORT:
+      return new (C, 3) PackINode(n1, n2, TypeVect::make(T_INT, 2));
+    case T_INT:
+      return new (C, 3) PackLNode(n1, n2, TypeVect::make(T_LONG, 2));
+    case T_LONG:
+      return new (C, 3) Pack2LNode(n1, n2, TypeVect::make(T_LONG, 2));
+    case T_FLOAT:
+      return new (C, 3) PackDNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+    case T_DOUBLE:
+      return new (C, 3) Pack2DNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+    }
+    ShouldNotReachHere();
   }
-  ShouldNotReachHere();
   return NULL;
 }
 
+// Return the vector version of a scalar load node.
+LoadVectorNode* LoadVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+                                     Node* adr, const TypePtr* atyp, uint vlen, BasicType bt) {
+  const TypeVect* vt = TypeVect::make(bt, vlen);
+  return new (C, 3) LoadVectorNode(ctl, mem, adr, atyp, vt);
+  return NULL;
+}
+
+// Return the vector version of a scalar store node.
+StoreVectorNode* StoreVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+                                       Node* adr, const TypePtr* atyp, Node* val,
+                                       uint vlen) {
+  return new (C, 4) StoreVectorNode(ctl, mem, adr, atyp, val);
+}
+
 // Extract a scalar element of vector.
-Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  assert(position < VectorNode::max_vlen(bt), "pos in range");
+Node* ExtractNode::make(Compile* C, Node* v, uint position, BasicType bt) {
+  assert((int)position < Matcher::max_vector_size(bt), "pos in range");
   ConINode* pos = ConINode::make(C, (int)position);
   switch (bt) {
   case T_BOOLEAN:
+    return new (C, 3) ExtractUBNode(v, pos);
   case T_BYTE:
     return new (C, 3) ExtractBNode(v, pos);
   case T_CHAR:
@@ -478,3 +416,4 @@
   ShouldNotReachHere();
   return NULL;
 }
+
--- a/src/share/vm/opto/vectornode.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/opto/vectornode.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,49 +31,37 @@
 
 //------------------------------VectorNode--------------------------------------
 // Vector Operation
-class VectorNode : public Node {
-  virtual uint size_of() const { return sizeof(*this); }
- protected:
-  uint _length; // vector length
-  virtual BasicType elt_basic_type() const = 0; // Vector element basic type
+class VectorNode : public TypeNode {
+ public:
 
-  static const Type* vect_type(BasicType elt_bt, uint len);
-  static const Type* vect_type(const Type* elt_type, uint len) {
-    return vect_type(elt_type->array_element_basic_type(), len);
+  VectorNode(Node* n1, const TypeVect* vt) : TypeNode(vt, 2) {
+    init_class_id(Class_Vector);
+    init_req(1, n1);
+  }
+  VectorNode(Node* n1, Node* n2, const TypeVect* vt) : TypeNode(vt, 3) {
+    init_class_id(Class_Vector);
+    init_req(1, n1);
+    init_req(2, n2);
   }
 
- public:
-  friend class VectorLoadNode;  // For vect_type
-  friend class VectorStoreNode; // ditto.
+  const TypeVect* vect_type() const { return type()->is_vect(); }
+  uint length() const { return vect_type()->length(); } // Vector length
+  uint length_in_bytes() const { return vect_type()->length_in_bytes(); }
 
-  VectorNode(Node* n1, uint vlen) : Node(NULL, n1), _length(vlen) {
-    init_class_id(Class_Vector);
-  }
-  VectorNode(Node* n1, Node* n2, uint vlen) : Node(NULL, n1, n2), _length(vlen) {
-    init_class_id(Class_Vector);
-  }
   virtual int Opcode() const;
 
-  uint length() const { return _length; } // Vector length
-
-  static uint max_vlen(BasicType bt) { // max vector length
-    return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes(bt));
-  }
-
-  // Element and vector type
-  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
-  const Type* vect_type() const { return vect_type(elt_basic_type(), length()); }
-
-  virtual const Type *bottom_type() const { return vect_type(); }
-  virtual uint        ideal_reg()   const { return Matcher::vector_ideal_reg(); }
-
-  // Vector opcode from scalar opcode
-  static int opcode(int sopc, uint vlen, const Type* opd_t);
+  virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
 
   static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t);
 
-  static VectorNode* make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* elt_t);
+  static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
 
+  static int  opcode(int opc, BasicType bt);
+  static bool implemented(int opc, uint vlen, BasicType bt);
+  static bool is_shift(Node* n);
+  static bool is_invariant_vector(Node* n);
+  // [Start, end) half-open range defining which operands are vectors
+  static void vector_operands(Node* n, uint* start, uint* end);
 };
 
 //===========================Vector=ALU=Operations====================================
@@ -81,981 +69,458 @@
 //------------------------------AddVBNode---------------------------------------
 // Vector add byte
 class AddVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
  public:
-  AddVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------AddVCNode---------------------------------------
-// Vector add char
-class AddVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  AddVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVSNode---------------------------------------
-// Vector add short
+// Vector add char/short
 class AddVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  AddVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVINode---------------------------------------
 // Vector add int
 class AddVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  AddVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVLNode---------------------------------------
 // Vector add long
 class AddVLNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
  public:
-  AddVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVFNode---------------------------------------
 // Vector add float
 class AddVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  AddVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVDNode---------------------------------------
 // Vector add double
 class AddVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  AddVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVBNode---------------------------------------
 // Vector subtract byte
 class SubVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
  public:
-  SubVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------SubVCNode---------------------------------------
-// Vector subtract char
-class SubVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  SubVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVSNode---------------------------------------
 // Vector subtract short
 class SubVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  SubVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVINode---------------------------------------
 // Vector subtract int
 class SubVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  SubVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVLNode---------------------------------------
 // Vector subtract long
 class SubVLNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
  public:
-  SubVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVFNode---------------------------------------
 // Vector subtract float
 class SubVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  SubVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVDNode---------------------------------------
 // Vector subtract double
 class SubVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  SubVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------MulVSNode---------------------------------------
+// Vector multiply short
+class MulVSNode : public VectorNode {
  public:
-  SubVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  MulVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------MulVINode---------------------------------------
+// Vector multiply int
+class MulVINode : public VectorNode {
+ public:
+  MulVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MulVFNode---------------------------------------
 // Vector multiply float
 class MulVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  MulVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  MulVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MulVDNode---------------------------------------
 // Vector multiply double
 class MulVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  MulVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  MulVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------DivVFNode---------------------------------------
 // Vector divide float
 class DivVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  DivVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  DivVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------DivVDNode---------------------------------------
 // Vector Divide double
 class DivVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  DivVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  DivVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------LShiftVBNode---------------------------------------
-// Vector lshift byte
+// Vector left shift bytes
 class LShiftVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
  public:
-  LShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------LShiftVCNode---------------------------------------
-// Vector lshift chars
-class LShiftVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  LShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  LShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------LShiftVSNode---------------------------------------
-// Vector lshift shorts
+// Vector left shift shorts
 class LShiftVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  LShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  LShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------LShiftVINode---------------------------------------
-// Vector lshift ints
+// Vector left shift ints
 class LShiftVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  LShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------LShiftVLNode---------------------------------------
+// Vector left shift longs
+class LShiftVLNode : public VectorNode {
+ public:
+  LShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------RShiftVBNode---------------------------------------
+// Vector right arithmetic (signed) shift bytes
+class RShiftVBNode : public VectorNode {
  public:
-  LShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  RShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------RShiftVSNode---------------------------------------
+// Vector right arithmetic (signed) shift shorts
+class RShiftVSNode : public VectorNode {
+ public:
+  RShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------RShiftVINode---------------------------------------
+// Vector right arithmetic (signed) shift ints
+class RShiftVINode : public VectorNode {
+ public:
+  RShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------RShiftVLNode---------------------------------------
+// Vector right arithmetic (signed) shift longs
+class RShiftVLNode : public VectorNode {
+ public:
+  RShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------URShiftVBNode---------------------------------------
-// Vector urshift bytes
+// Vector right logical (unsigned) shift bytes
 class URShiftVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
  public:
-  URShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------URShiftVCNode---------------------------------------
-// Vector urshift char
-class URShiftVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  URShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  URShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------URShiftVSNode---------------------------------------
-// Vector urshift shorts
+// Vector right logical (unsigned) shift shorts
 class URShiftVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  URShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  URShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------URShiftVINode---------------------------------------
-// Vector urshift ints
+// Vector right logical (unsigned) shift ints
 class URShiftVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  URShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  URShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
+//------------------------------URShiftVLNode---------------------------------------
+// Vector right logical (unsigned) shift longs
+class URShiftVLNode : public VectorNode {
+ public:
+  URShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+
 //------------------------------AndVNode---------------------------------------
-// Vector and
+// Vector and integer
 class AndVNode : public VectorNode {
- protected:
-  BasicType _bt;
-  virtual BasicType elt_basic_type() const { return _bt; }
  public:
-  AndVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  AndVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------OrVNode---------------------------------------
-// Vector or
+// Vector or integer
 class OrVNode : public VectorNode {
- protected:
-  BasicType _bt;
-  virtual BasicType elt_basic_type() const { return _bt; }
  public:
-  OrVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------XorVNode---------------------------------------
-// Vector xor
+// Vector xor integer
 class XorVNode : public VectorNode {
- protected:
-  BasicType _bt;
-  virtual BasicType elt_basic_type() const { return _bt; }
  public:
-  XorVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
-//================================= M E M O R Y ==================================
-
+//================================= M E M O R Y ===============================
 
-//------------------------------VectorLoadNode--------------------------------------
-// Vector Load from memory
-class VectorLoadNode : public LoadNode {
-  virtual uint size_of() const { return sizeof(*this); }
-
- protected:
-  virtual BasicType elt_basic_type()  const = 0; // Vector element basic type
-  // For use in constructor
-  static const Type* vect_type(const Type* elt_type, uint len) {
-    return VectorNode::vect_type(elt_type, len);
+//------------------------------LoadVectorNode---------------------------------
+// Load Vector from memory
+class LoadVectorNode : public LoadNode {
+ public:
+  LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt)
+    : LoadNode(c, mem, adr, at, vt) {
+    init_class_id(Class_LoadVector);
   }
 
- public:
-  VectorLoadNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *rt)
-    : LoadNode(c,mem,adr,at,rt) {
-    init_class_id(Class_VectorLoad);
-  }
+  const TypeVect* vect_type() const { return type()->is_vect(); }
+  uint length() const { return vect_type()->length(); } // Vector length
+
   virtual int Opcode() const;
 
-  virtual uint  length() const = 0; // Vector length
-
-  // Element and vector type
-  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
-  const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
-
-  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(); }
+  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(memory_size()); }
   virtual BasicType memory_type() const { return T_VOID; }
-  virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
-  // Vector opcode from scalar opcode
-  static int opcode(int sopc, uint vlen);
-
-  static VectorLoadNode* make(Compile* C, int opc, Node* ctl, Node* mem,
-                              Node* adr, const TypePtr* atyp, uint vlen);
-};
-
-//------------------------------Load16BNode--------------------------------------
-// Vector load of 16 bytes (8bits signed) from memory
-class Load16BNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Load16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,16)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store16B; }
-  virtual uint length() const { return 16; }
-};
-
-//------------------------------Load8BNode--------------------------------------
-// Vector load of 8 bytes (8bits signed) from memory
-class Load8BNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Load8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store8B; }
-  virtual uint length() const { return 8; }
-};
+  virtual int memory_size() const { return vect_type()->length_in_bytes(); }
 
-//------------------------------Load4BNode--------------------------------------
-// Vector load of 4 bytes (8bits signed) from memory
-class Load4BNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Load4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4B; }
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Load8CNode--------------------------------------
-// Vector load of 8 chars (16bits unsigned) from memory
-class Load8CNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Load8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store8C; }
-  virtual uint length() const { return 8; }
-};
+  virtual int store_Opcode() const { return Op_StoreVector; }
 
-//------------------------------Load4CNode--------------------------------------
-// Vector load of 4 chars (16bits unsigned) from memory
-class Load4CNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Load4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4C; }
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2CNode--------------------------------------
-// Vector load of 2 chars (16bits unsigned) from memory
-class Load2CNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Load2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2C; }
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Load8SNode--------------------------------------
-// Vector load of 8 shorts (16bits signed) from memory
-class Load8SNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Load8SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store8C; }
-  virtual uint length() const { return 8; }
+  static LoadVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+                              Node* adr, const TypePtr* atyp, uint vlen, BasicType bt);
 };
 
-//------------------------------Load4SNode--------------------------------------
-// Vector load of 4 shorts (16bits signed) from memory
-class Load4SNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Load4SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4C; }
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2SNode--------------------------------------
-// Vector load of 2 shorts (16bits signed) from memory
-class Load2SNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Load2SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2C; }
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Load4INode--------------------------------------
-// Vector load of 4 integers (32bits signed) from memory
-class Load4INode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Load4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4I; }
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2INode--------------------------------------
-// Vector load of 2 integers (32bits signed) from memory
-class Load2INode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Load2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2I; }
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Load2LNode--------------------------------------
-// Vector load of 2 longs (64bits signed) from memory
-class Load2LNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------StoreVectorNode--------------------------------
+// Store Vector to memory
+class StoreVectorNode : public StoreNode {
  public:
-  Load2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong *tl = TypeLong::LONG)
-    : VectorLoadNode(c,mem,adr,at,vect_type(tl,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2L; }
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Load4FNode--------------------------------------
-// Vector load of 4 floats (32bits) from memory
-class Load4FNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Load4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(t,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4F; }
-  virtual uint length() const { return 4; }
-};
+  StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : StoreNode(c, mem, adr, at, val) {
+    assert(val->is_Vector() || val->is_LoadVector(), "sanity");
+    init_class_id(Class_StoreVector);
+  }
 
-//------------------------------Load2FNode--------------------------------------
-// Vector load of 2 floats (32bits) from memory
-class Load2FNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Load2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2F; }
-  virtual uint length() const { return 2; }
-};
+  const TypeVect* vect_type() const { return in(MemNode::ValueIn)->bottom_type()->is_vect(); }
+  uint length() const { return vect_type()->length(); } // Vector length
 
-//------------------------------Load2DNode--------------------------------------
-// Vector load of 2 doubles (64bits) from memory
-class Load2DNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
-  Load2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::DOUBLE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2D; }
-  virtual uint length() const { return 2; }
-};
-
-
-//------------------------------VectorStoreNode--------------------------------------
-// Vector Store to memory
-class VectorStoreNode : public StoreNode {
-  virtual uint size_of() const { return sizeof(*this); }
-
- protected:
-  virtual BasicType elt_basic_type()  const = 0; // Vector element basic type
-
- public:
-  VectorStoreNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : StoreNode(c,mem,adr,at,val) {
-    init_class_id(Class_VectorStore);
-  }
   virtual int Opcode() const;
 
-  virtual uint  length() const = 0; // Vector length
-
-  // Element and vector type
-  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
-  const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(memory_size()); }
+  virtual BasicType memory_type() const { return T_VOID; }
+  virtual int memory_size() const { return vect_type()->length_in_bytes(); }
 
-  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(); }
-  virtual BasicType memory_type() const { return T_VOID; }
-  virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
-  // Vector opcode from scalar opcode
-  static int opcode(int sopc, uint vlen);
-
-  static VectorStoreNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+  static StoreVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
                                Node* adr, const TypePtr* atyp, Node* val,
                                uint vlen);
 };
 
-//------------------------------Store16BNode--------------------------------------
-// Vector store of 16 bytes (8bits signed) to memory
-class Store16BNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Store16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 16; }
-};
+
+//=========================Promote_Scalar_to_Vector============================
 
-//------------------------------Store8BNode--------------------------------------
-// Vector store of 8 bytes (8bits signed) to memory
-class Store8BNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateBNode---------------------------------
+// Replicate byte scalar to be vector
+class ReplicateBNode : public VectorNode {
  public:
-  Store8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4BNode--------------------------------------
-// Vector store of 4 bytes (8bits signed) to memory
-class Store4BNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Store4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
+  ReplicateBNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Store8CNode--------------------------------------
-// Vector store of 8 chars (16bits signed/unsigned) to memory
-class Store8CNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Store8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4CNode--------------------------------------
-// Vector store of 4 chars (16bits signed/unsigned) to memory
-class Store4CNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Store4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2CNode--------------------------------------
-// Vector store of 2 chars (16bits signed/unsigned) to memory
-class Store2CNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Store2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 2; }
 };
 
-//------------------------------Store4INode--------------------------------------
-// Vector store of 4 integers (32bits signed) to memory
-class Store4INode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Store4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2INode--------------------------------------
-// Vector store of 2 integers (32bits signed) to memory
-class Store2INode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Store2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Store2LNode--------------------------------------
-// Vector store of 2 longs (64bits signed) to memory
-class Store2LNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------ReplicateSNode---------------------------------
+// Replicate short scalar to be vector
+class ReplicateSNode : public VectorNode {
  public:
-  Store2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Store4FNode--------------------------------------
-// Vector store of 4 floats (32bits) to memory
-class Store4FNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Store4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2FNode--------------------------------------
-// Vector store of 2 floats (32bits) to memory
-class Store2FNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Store2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Store2DNode--------------------------------------
-// Vector store of 2 doubles (64bits) to memory
-class Store2DNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
-  Store2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 2; }
-};
-
-//=========================Promote_Scalar_to_Vector====================================
-
-//------------------------------Replicate16BNode---------------------------------------
-// Replicate byte scalar to be vector of 16 bytes
-class Replicate16BNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Replicate16BNode(Node* in1) : VectorNode(in1, 16) {}
+  ReplicateSNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------Replicate8BNode---------------------------------------
-// Replicate byte scalar to be vector of 8 bytes
-class Replicate8BNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateINode---------------------------------
+// Replicate int scalar to be vector
+class ReplicateINode : public VectorNode {
  public:
-  Replicate8BNode(Node* in1) : VectorNode(in1, 8) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4BNode---------------------------------------
-// Replicate byte scalar to be vector of 4 bytes
-class Replicate4BNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Replicate4BNode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate8CNode---------------------------------------
-// Replicate char scalar to be vector of 8 chars
-class Replicate8CNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Replicate8CNode(Node* in1) : VectorNode(in1, 8) {}
+  ReplicateINode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------Replicate4CNode---------------------------------------
-// Replicate char scalar to be vector of 4 chars
-class Replicate4CNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Replicate4CNode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2CNode---------------------------------------
-// Replicate char scalar to be vector of 2 chars
-class Replicate2CNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
+//------------------------------ReplicateLNode---------------------------------
+// Replicate long scalar to be vector
+class ReplicateLNode : public VectorNode {
  public:
-  Replicate2CNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate8SNode---------------------------------------
-// Replicate short scalar to be vector of 8 shorts
-class Replicate8SNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Replicate8SNode(Node* in1) : VectorNode(in1, 8) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4SNode---------------------------------------
-// Replicate short scalar to be vector of 4 shorts
-class Replicate4SNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Replicate4SNode(Node* in1) : VectorNode(in1, 4) {}
+  ReplicateLNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------Replicate2SNode---------------------------------------
-// Replicate short scalar to be vector of 2 shorts
-class Replicate2SNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
+//------------------------------ReplicateFNode---------------------------------
+// Replicate float scalar to be vector
+class ReplicateFNode : public VectorNode {
  public:
-  Replicate2SNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4INode---------------------------------------
-// Replicate int scalar to be vector of 4 ints
-class Replicate4INode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Replicate4INode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2INode---------------------------------------
-// Replicate int scalar to be vector of 2 ints
-class Replicate2INode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Replicate2INode(Node* in1) : VectorNode(in1, 2) {}
+  ReplicateFNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------Replicate2LNode---------------------------------------
-// Replicate long scalar to be vector of 2 longs
-class Replicate2LNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------ReplicateDNode---------------------------------
+// Replicate double scalar to be vector
+class ReplicateDNode : public VectorNode {
  public:
-  Replicate2LNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4FNode---------------------------------------
-// Replicate float scalar to be vector of 4 floats
-class Replicate4FNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Replicate4FNode(Node* in1) : VectorNode(in1, 4) {}
+  ReplicateDNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------Replicate2FNode---------------------------------------
-// Replicate float scalar to be vector of 2 floats
-class Replicate2FNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Replicate2FNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2DNode---------------------------------------
-// Replicate double scalar to be vector of 2 doubles
-class Replicate2DNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
-  Replicate2DNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//========================Pack_Scalars_into_a_Vector==============================
+//========================Pack_Scalars_into_a_Vector===========================
 
 //------------------------------PackNode---------------------------------------
 // Pack parent class (not for code generation).
 class PackNode : public VectorNode {
  public:
-  PackNode(Node* in1)  : VectorNode(in1, 1) {}
-  PackNode(Node* in1, Node* n2)  : VectorNode(in1, n2, 2) {}
+  PackNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
+  PackNode(Node* in1, Node* n2, const TypeVect* vt) : VectorNode(in1, n2, vt) {}
   virtual int Opcode() const;
 
   void add_opd(Node* n) {
     add_req(n);
-    _length++;
-    assert(_length == req() - 1, "vector length matches edge count");
   }
 
   // Create a binary tree form for Packs. [lo, hi) (half-open) range
-  Node* binaryTreePack(Compile* C, int lo, int hi);
+  PackNode* binary_tree_pack(Compile* C, int lo, int hi);
 
-  static PackNode* make(Compile* C, Node* s, const Type* elt_t);
+  static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt);
 };
 
 //------------------------------PackBNode---------------------------------------
 // Pack byte scalars into vector
 class PackBNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
  public:
-  PackBNode(Node* in1)  : PackNode(in1) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------PackCNode---------------------------------------
-// Pack char scalars into vector
-class PackCNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  PackCNode(Node* in1)  : PackNode(in1) {}
+  PackBNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackSNode---------------------------------------
 // Pack short scalars into a vector
 class PackSNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  PackSNode(Node* in1)  : PackNode(in1) {}
+  PackSNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackSNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackINode---------------------------------------
 // Pack integer scalars into a vector
 class PackINode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  PackINode(Node* in1)  : PackNode(in1) {}
-  PackINode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackINode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackINode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackLNode---------------------------------------
 // Pack long scalars into a vector
 class PackLNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
  public:
-  PackLNode(Node* in1)  : PackNode(in1) {}
-  PackLNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackLNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackLNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Pack2LNode--------------------------------------
+// Pack 2 long scalars into a vector
+class Pack2LNode : public PackNode {
+ public:
+  Pack2LNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackFNode---------------------------------------
 // Pack float scalars into vector
 class PackFNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  PackFNode(Node* in1)  : PackNode(in1) {}
-  PackFNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackFNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackFNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackDNode---------------------------------------
 // Pack double scalars into a vector
 class PackDNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  PackDNode(Node* in1)  : PackNode(in1) {}
-  PackDNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackDNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+  PackDNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
-// The Pack2xN nodes assist code generation.  They are created from
-// Pack4C, etc. nodes in final_graph_reshape in the form of a
-// balanced, binary tree.
-
-//------------------------------Pack2x1BNode-----------------------------------------
-// Pack 2 1-byte integers into vector of 2 bytes
-class Pack2x1BNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------Pack2DNode--------------------------------------
+// Pack 2 double scalars into a vector
+class Pack2DNode : public PackNode {
  public:
-  Pack2x1BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+  Pack2DNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
-  virtual uint ideal_reg() const { return Op_RegI; }
 };
 
-//------------------------------Pack2x2BNode---------------------------------------
-// Pack 2 2-byte integers into vector of 4 bytes
-class Pack2x2BNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Pack2x2BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
-  virtual int Opcode() const;
-  virtual uint ideal_reg() const { return Op_RegI; }
-};
 
 //========================Extract_Scalar_from_Vector===============================
 
@@ -1069,7 +534,7 @@
   virtual int Opcode() const;
   uint  pos() const { return in(2)->get_int(); }
 
-  static Node* make(Compile* C, Node* v, uint position, const Type* opd_t);
+  static Node* make(Compile* C, Node* v, uint position, BasicType bt);
 };
 
 //------------------------------ExtractBNode---------------------------------------
@@ -1082,6 +547,16 @@
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
+//------------------------------ExtractUBNode--------------------------------------
+// Extract a boolean from a vector at position "pos"
+class ExtractUBNode : public ExtractNode {
+ public:
+  ExtractUBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
 //------------------------------ExtractCNode---------------------------------------
 // Extract a char from a vector at position "pos"
 class ExtractCNode : public ExtractNode {
--- a/src/share/vm/precompiled/precompiled.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/precompiled/precompiled.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -293,13 +293,10 @@
 # include "c1/c1_globals.hpp"
 #endif // COMPILER1
 #ifndef SERIALGC
-# include "gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp"
 # include "gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp"
 # include "gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp"
 # include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp"
-# include "gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp"
 # include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
-# include "gc_implementation/concurrentMarkSweep/freeList.hpp"
 # include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp"
 # include "gc_implementation/g1/dirtyCardQueue.hpp"
 # include "gc_implementation/g1/g1BlockOffsetTable.hpp"
@@ -309,7 +306,6 @@
 # include "gc_implementation/g1/g1_specialized_oop_closures.hpp"
 # include "gc_implementation/g1/ptrQueue.hpp"
 # include "gc_implementation/g1/satbQueue.hpp"
-# include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 # include "gc_implementation/parNew/parOopClosures.hpp"
 # include "gc_implementation/parallelScavenge/objectStartArray.hpp"
 # include "gc_implementation/parallelScavenge/parMarkBitMap.hpp"
@@ -325,6 +321,7 @@
 # include "gc_implementation/parallelScavenge/psYoungGen.hpp"
 # include "gc_implementation/shared/gcAdaptivePolicyCounters.hpp"
 # include "gc_implementation/shared/gcPolicyCounters.hpp"
+# include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #endif // SERIALGC
 
 #endif // !DONT_USE_PRECOMPILED_HEADER
--- a/src/share/vm/prims/jni.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jni.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,6 +33,7 @@
 #ifndef SERIALGC
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #endif // SERIALGC
+#include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "memory/oopFactory.hpp"
@@ -3270,7 +3271,7 @@
   int s_len = java_lang_String::length(s);
   typeArrayOop s_value = java_lang_String::value(s);
   int s_offset = java_lang_String::offset(s);
-  jchar* buf = NEW_C_HEAP_ARRAY(jchar, s_len + 1);  // add one for zero termination
+  jchar* buf = NEW_C_HEAP_ARRAY(jchar, s_len + 1, mtInternal);  // add one for zero termination
   if (s_len > 0) {
     memcpy(buf, s_value->char_at_addr(s_offset), sizeof(jchar)*s_len);
   }
@@ -3363,7 +3364,7 @@
 #endif /* USDT2 */
   oop java_string = JNIHandles::resolve_non_null(string);
   size_t length = java_lang_String::utf8_length(java_string);
-  char* result = AllocateHeap(length + 1, "GetStringUTFChars");
+  char* result = AllocateHeap(length + 1, mtInternal);
   java_lang_String::as_utf8_string(java_string, result, (int) length + 1);
   if (isCopy != NULL) *isCopy = JNI_TRUE;
 #ifndef USDT2
@@ -3619,7 +3620,7 @@
      * Avoid asserts in typeArrayOop. */ \
     result = (ElementType*)get_bad_address(); \
   } else { \
-    result = NEW_C_HEAP_ARRAY(ElementType, len); \
+    result = NEW_C_HEAP_ARRAY(ElementType, len, mtInternal); \
     /* copy the array to the c chunk */ \
     memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \
   } \
@@ -3656,7 +3657,7 @@
      * Avoid asserts in typeArrayOop. */ \
     result = (ElementType*)get_bad_address(); \
   } else { \
-    result = NEW_C_HEAP_ARRAY(ElementType, len); \
+    result = NEW_C_HEAP_ARRAY(ElementType, len, mtInternal); \
     /* copy the array to the c chunk */ \
     memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \
   } \
--- a/src/share/vm/prims/jniCheck.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jniCheck.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1308,7 +1308,7 @@
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringChars didn't return a copy as expected");
 
     size_t len = UNCHECKED()->GetStringLength(env,str) + 1; // + 1 for NULL termination
-    jint* tagLocation = (jint*) AllocateHeap(len * sizeof(jchar) + sizeof(jint), "checked_jni_GetStringChars");
+    jint* tagLocation = (jint*) AllocateHeap(len * sizeof(jchar) + sizeof(jint), mtInternal);
     *tagLocation = STRING_TAG;
     jchar* newResult = (jchar*) (tagLocation + 1);
     memcpy(newResult, result, len * sizeof(jchar));
@@ -1378,13 +1378,13 @@
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringUTFChars didn't return a copy as expected");
 
     size_t len = strlen(result) + 1; // + 1 for NULL termination
-    jint* tagLocation = (jint*) AllocateHeap(len + sizeof(jint), "checked_jni_GetStringUTFChars");
+    jint* tagLocation = (jint*) AllocateHeap(len + sizeof(jint), mtInternal);
     *tagLocation = STRING_UTF_TAG;
     char* newResult = (char*) (tagLocation + 1);
     strcpy(newResult, result);
     // Avoiding call to UNCHECKED()->ReleaseStringUTFChars() since that will fire unexpected dtrace probes
     // Note that the dtrace arguments for the allocated memory will not match up with this solution.
-    FreeHeap((char*)result);
+    FreeHeap((char*)result, mtInternal);
 
     functionExit(env);
     return newResult;
--- a/src/share/vm/prims/jvm.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvm.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,6 +35,7 @@
 #include "oops/fieldStreams.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/objArrayKlass.hpp"
+#include "oops/methodOop.hpp"
 #include "prims/jvm.h"
 #include "prims/jvm_misc.hpp"
 #include "prims/jvmtiExport.hpp"
@@ -345,9 +346,13 @@
   // Do this after setting user properties to prevent people
   // from setting the value with a -D option, as requested.
   {
-    char as_chars[256];
-    jio_snprintf(as_chars, sizeof(as_chars), INTX_FORMAT, MaxDirectMemorySize);
-    PUTPROP(props, "sun.nio.MaxDirectMemorySize", as_chars);
+    if (FLAG_IS_DEFAULT(MaxDirectMemorySize)) {
+      PUTPROP(props, "sun.nio.MaxDirectMemorySize", "-1");
+    } else {
+      char as_chars[256];
+      jio_snprintf(as_chars, sizeof(as_chars), UINTX_FORMAT, MaxDirectMemorySize);
+      PUTPROP(props, "sun.nio.MaxDirectMemorySize", as_chars);
+    }
   }
 
   // JVM monitoring and management support
@@ -1301,9 +1306,6 @@
 // Inner class reflection ///////////////////////////////////////////////////////////////////////////////
 
 JVM_ENTRY(jobjectArray, JVM_GetDeclaredClasses(JNIEnv *env, jclass ofClass))
-  const int inner_class_info_index = 0;
-  const int outer_class_info_index = 1;
-
   JvmtiVMObjectAllocEventCollector oam;
   // ofClass is a reference to a java_lang_Class object. The mirror object
   // of an instanceKlass
@@ -1315,26 +1317,26 @@
   }
 
   instanceKlassHandle k(thread, java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(ofClass)));
-
-  if (k->inner_classes()->length() == 0) {
+  InnerClassesIterator iter(k);
+
+  if (iter.length() == 0) {
     // Neither an inner nor outer class
     oop result = oopFactory::new_objArray(SystemDictionary::Class_klass(), 0, CHECK_NULL);
     return (jobjectArray)JNIHandles::make_local(env, result);
   }
 
   // find inner class info
-  typeArrayHandle    icls(thread, k->inner_classes());
   constantPoolHandle cp(thread, k->constants());
-  int length = icls->length();
+  int length = iter.length();
 
   // Allocate temp. result array
   objArrayOop r = oopFactory::new_objArray(SystemDictionary::Class_klass(), length/4, CHECK_NULL);
   objArrayHandle result (THREAD, r);
   int members = 0;
 
-  for(int i = 0; i < length; i += 4) {
-    int ioff = icls->ushort_at(i + inner_class_info_index);
-    int ooff = icls->ushort_at(i + outer_class_info_index);
+  for (; !iter.done(); iter.next()) {
+    int ioff = iter.inner_class_info_index();
+    int ooff = iter.outer_class_info_index();
 
     if (ioff != 0 && ooff != 0) {
       // Check to see if the name matches the class we're looking for
@@ -1392,17 +1394,13 @@
                                                      bool* inner_is_member,
                                                      TRAPS) {
   Thread* thread = THREAD;
-  const int inner_class_info_index = inner_class_inner_class_info_offset;
-  const int outer_class_info_index = inner_class_outer_class_info_offset;
-
-  if (k->inner_classes()->length() == 0) {
+  InnerClassesIterator iter(k);
+  if (iter.length() == 0) {
     // No inner class info => no declaring class
     return NULL;
   }
 
-  typeArrayHandle i_icls(thread, k->inner_classes());
   constantPoolHandle i_cp(thread, k->constants());
-  int i_length = i_icls->length();
 
   bool found = false;
   klassOop ok;
@@ -1410,10 +1408,10 @@
   *inner_is_member = false;
 
   // Find inner_klass attribute
-  for (int i = 0; i < i_length && !found; i += inner_class_next_offset) {
-    int ioff = i_icls->ushort_at(i + inner_class_info_index);
-    int ooff = i_icls->ushort_at(i + outer_class_info_index);
-    int noff = i_icls->ushort_at(i + inner_class_inner_name_offset);
+  for (; !iter.done() && !found; iter.next()) {
+    int ioff = iter.inner_class_info_index();
+    int ooff = iter.outer_class_info_index();
+    int noff = iter.inner_name_index();
     if (ioff != 0) {
       // Check to see if the name matches the class we're looking for
       // before attempting to find the class.
@@ -2186,11 +2184,11 @@
   klassOop k = java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(cls));
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
   oop method = instanceKlass::cast(k)->methods()->obj_at(method_index);
-  typeArrayOop extable = methodOop(method)->exception_table();
-  entry->start_pc   = extable->int_at(entry_index * 4);
-  entry->end_pc     = extable->int_at(entry_index * 4 + 1);
-  entry->handler_pc = extable->int_at(entry_index * 4 + 2);
-  entry->catchType  = extable->int_at(entry_index * 4 + 3);
+  ExceptionTable extable((methodOop(method)));
+  entry->start_pc   = extable.start_pc(entry_index);
+  entry->end_pc     = extable.end_pc(entry_index);
+  entry->handler_pc = extable.handler_pc(entry_index);
+  entry->catchType  = extable.catch_type_index(entry_index);
 JVM_END
 
 
@@ -2199,7 +2197,7 @@
   klassOop k = java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(cls));
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
   oop method = instanceKlass::cast(k)->methods()->obj_at(method_index);
-  return methodOop(method)->exception_table()->length() / 4;
+  return methodOop(method)->exception_table_length();
 JVM_END
 
 
@@ -2243,7 +2241,7 @@
   klassOop k = java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(cls));
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
   oop method = instanceKlass::cast(k)->methods()->obj_at(method_index);
-  return methodOop(method)->max_stack();
+  return methodOop(method)->verifier_max_stack();
 JVM_END
 
 
--- a/src/share/vm/prims/jvm.h	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvm.h	Sat Oct 20 00:08:35 2012 -0700
@@ -634,7 +634,7 @@
 JVM_AssertionStatusDirectives(JNIEnv *env, jclass unused);
 
 /*
- * sun.misc.AtomicLong
+ * java.util.concurrent.atomic.AtomicLong
  */
 JNIEXPORT jboolean JNICALL
 JVM_SupportsCX8(void);
--- a/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -191,15 +191,14 @@
     }
   }
 
-  typeArrayHandle exception_table(thread(), const_method->exception_table());
-  int exception_table_length = exception_table->length();
-  int exception_table_entries = exception_table_length / 4;
+  ExceptionTable exception_table(method());
+  int exception_table_length = exception_table.length();
   int code_size = const_method->code_size();
   int size =
     2+2+4 +                                // max_stack, max_locals, code_length
     code_size +                            // code
     2 +                                    // exception_table_length
-    (2+2+2+2) * exception_table_entries +  // exception_table
+    (2+2+2+2) * exception_table_length +   // exception_table
     2 +                                    // attributes_count
     attr_size;                             // attributes
 
@@ -209,12 +208,12 @@
   write_u2(method->max_locals());
   write_u4(code_size);
   copy_bytecodes(method, (unsigned char*)writeable_address(code_size));
-  write_u2(exception_table_entries);
-  for (int index = 0; index < exception_table_length; ) {
-    write_u2(exception_table->int_at(index++));
-    write_u2(exception_table->int_at(index++));
-    write_u2(exception_table->int_at(index++));
-    write_u2(exception_table->int_at(index++));
+  write_u2(exception_table_length);
+  for (int index = 0; index < exception_table_length; index++) {
+    write_u2(exception_table.start_pc(index));
+    write_u2(exception_table.end_pc(index));
+    write_u2(exception_table.handler_pc(index));
+    write_u2(exception_table.catch_type_index(index));
   }
   write_u2(attr_count);
   if (line_num_cnt != 0) {
@@ -296,8 +295,8 @@
 
 // Compute the number of entries in the InnerClasses attribute
 u2 JvmtiClassFileReconstituter::inner_classes_attribute_length() {
-  typeArrayOop inner_class_list = ikh()->inner_classes();
-  return (inner_class_list == NULL) ? 0 : inner_class_list->length();
+  InnerClassesIterator iter(ikh());
+  return iter.length();
 }
 
 // Write an annotation attribute.  The VM stores them in raw form, so all we need
@@ -328,26 +327,20 @@
 // JVMSpec|     } classes[number_of_classes];
 // JVMSpec|   }
 void JvmtiClassFileReconstituter::write_inner_classes_attribute(int length) {
-  typeArrayOop inner_class_list = ikh()->inner_classes();
-  guarantee(inner_class_list != NULL && inner_class_list->length() == length,
+  InnerClassesIterator iter(ikh());
+  guarantee(iter.length() != 0 && iter.length() == length,
             "caller must check");
-  typeArrayHandle inner_class_list_h(thread(), inner_class_list);
-  assert (length % instanceKlass::inner_class_next_offset == 0, "just checking");
   u2 entry_count = length / instanceKlass::inner_class_next_offset;
   u4 size = 2 + entry_count * (2+2+2+2);
 
   write_attribute_name_index("InnerClasses");
   write_u4(size);
   write_u2(entry_count);
-  for (int i = 0; i < length; i += instanceKlass::inner_class_next_offset) {
-    write_u2(inner_class_list_h->ushort_at(
-                      i + instanceKlass::inner_class_inner_class_info_offset));
-    write_u2(inner_class_list_h->ushort_at(
-                      i + instanceKlass::inner_class_outer_class_info_offset));
-    write_u2(inner_class_list_h->ushort_at(
-                      i + instanceKlass::inner_class_inner_name_offset));
-    write_u2(inner_class_list_h->ushort_at(
-                      i + instanceKlass::inner_class_access_flags_offset));
+  for (; !iter.done(); iter.next()) {
+    write_u2(iter.inner_class_info_index());
+    write_u2(iter.outer_class_info_index());
+    write_u2(iter.inner_name_index());
+    write_u2(iter.inner_access_flags());
   }
 }
 
@@ -732,8 +725,8 @@
       case Bytecodes::_invokedynamic   :  // fall through
       case Bytecodes::_invokeinterface :
         assert(len == 3 ||
-               (code == Bytecodes::_invokeinterface && len ==5) ||
-               (code == Bytecodes::_invokedynamic   && len ==5),
+               (code == Bytecodes::_invokeinterface && len == 5) ||
+               (code == Bytecodes::_invokedynamic   && len == 5),
                "sanity check");
 
         int cpci = Bytes::get_native_u2(bcp+1);
--- a/src/share/vm/prims/jvmtiClassFileReconstituter.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiClassFileReconstituter.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -68,11 +68,11 @@
 
   ~JvmtiConstantPoolReconstituter() {
     if (_symmap != NULL) {
-      os::free(_symmap);
+      os::free(_symmap, mtClass);
       _symmap = NULL;
     }
     if (_classmap != NULL) {
-      os::free(_classmap);
+      os::free(_classmap, mtClass);
       _classmap = NULL;
     }
   }
--- a/src/share/vm/prims/jvmtiCodeBlobEvents.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiCodeBlobEvents.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -157,7 +157,7 @@
   assert(_global_code_blobs == NULL, "checking");
 
   // create the global list
-  _global_code_blobs = new (ResourceObj::C_HEAP) GrowableArray<JvmtiCodeBlobDesc*>(50,true);
+  _global_code_blobs = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<JvmtiCodeBlobDesc*>(50,true);
 
   // iterate over the stub code descriptors and put them in the list first.
   int index = 0;
@@ -247,7 +247,7 @@
     int pcds_in_method;
 
     pcds_in_method = (nm->scopes_pcs_end() - nm->scopes_pcs_begin());
-    map = NEW_C_HEAP_ARRAY(jvmtiAddrLocationMap, pcds_in_method);
+    map = NEW_C_HEAP_ARRAY(jvmtiAddrLocationMap, pcds_in_method, mtInternal);
 
     address scopes_data = nm->scopes_data_begin();
     for( pcd = nm->scopes_pcs_begin(); pcd < nm->scopes_pcs_end(); ++pcd ) {
--- a/src/share/vm/prims/jvmtiEnv.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1012,7 +1012,7 @@
 
   // growable array of jvmti monitors info on the C-heap
   GrowableArray<jvmtiMonitorStackDepthInfo*> *owned_monitors_list =
-      new (ResourceObj::C_HEAP) GrowableArray<jvmtiMonitorStackDepthInfo*>(1, true);
+      new (ResourceObj::C_HEAP, mtInternal) GrowableArray<jvmtiMonitorStackDepthInfo*>(1, true);
 
   uint32_t debug_bits = 0;
   if (is_thread_fully_suspended(java_thread, true, &debug_bits)) {
@@ -1057,7 +1057,7 @@
 
   // growable array of jvmti monitors info on the C-heap
   GrowableArray<jvmtiMonitorStackDepthInfo*> *owned_monitors_list =
-         new (ResourceObj::C_HEAP) GrowableArray<jvmtiMonitorStackDepthInfo*>(1, true);
+         new (ResourceObj::C_HEAP, mtInternal) GrowableArray<jvmtiMonitorStackDepthInfo*>(1, true);
 
   uint32_t debug_bits = 0;
   if (is_thread_fully_suspended(java_thread, true, &debug_bits)) {
--- a/src/share/vm/prims/jvmtiEnvBase.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -381,7 +381,7 @@
     _native_method_prefixes = NULL;
   } else {
     // there are prefixes, allocate an array to hold them, and fill it
-    char** new_prefixes = (char**)os::malloc((prefix_count) * sizeof(char*));
+    char** new_prefixes = (char**)os::malloc((prefix_count) * sizeof(char*), mtInternal);
     if (new_prefixes == NULL) {
       return JVMTI_ERROR_OUT_OF_MEMORY;
     }
@@ -1150,7 +1150,7 @@
 
 ResourceTracker::ResourceTracker(JvmtiEnv* env) {
   _env = env;
-  _allocations = new (ResourceObj::C_HEAP) GrowableArray<unsigned char*>(20, true);
+  _allocations = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<unsigned char*>(20, true);
   _failed = false;
 }
 ResourceTracker::~ResourceTracker() {
--- a/src/share/vm/prims/jvmtiEnvBase.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiEnvBase.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -52,7 +52,7 @@
 // done via JNI GetEnv() call. Multiple attachments are
 // allowed in jvmti.
 
-class JvmtiEnvBase : public CHeapObj {
+class JvmtiEnvBase : public CHeapObj<mtInternal> {
 
  private:
 
@@ -175,7 +175,7 @@
     if (size == 0) {
       *mem_ptr = NULL;
     } else {
-      *mem_ptr = (unsigned char *)os::malloc((size_t)size);
+      *mem_ptr = (unsigned char *)os::malloc((size_t)size, mtInternal);
       if (*mem_ptr == NULL) {
         return JVMTI_ERROR_OUT_OF_MEMORY;
       }
@@ -185,7 +185,7 @@
 
   jvmtiError deallocate(unsigned char* mem) {
     if (mem != NULL) {
-      os::free(mem);
+      os::free(mem, mtInternal);
     }
     return JVMTI_ERROR_NONE;
   }
--- a/src/share/vm/prims/jvmtiEnvThreadState.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiEnvThreadState.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -95,7 +95,7 @@
 //
 
 JvmtiFramePops::JvmtiFramePops() {
-  _pops = new (ResourceObj::C_HEAP) GrowableArray<int> (2, true);
+  _pops = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int> (2, true);
 }
 
 JvmtiFramePops::~JvmtiFramePops() {
--- a/src/share/vm/prims/jvmtiEnvThreadState.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiEnvThreadState.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -76,7 +76,7 @@
 // It records what frames on a threads stack should post frame_pop events when they're exited.
 //
 
-class JvmtiFramePops : public CHeapObj {
+class JvmtiFramePops : public CHeapObj<mtInternal> {
  private:
   GrowableArray<int>* _pops;
 
@@ -107,7 +107,7 @@
 // 3: Location of last executed instruction, used to filter out duplicate
 //    events due to instruction rewriting.
 
-class JvmtiEnvThreadState : public CHeapObj {
+class JvmtiEnvThreadState : public CHeapObj<mtInternal> {
 private:
   friend class JvmtiEnv;
   JavaThread        *_thread;
--- a/src/share/vm/prims/jvmtiExport.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiExport.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -617,7 +617,7 @@
       if (caching_needed && *_cached_data_ptr == NULL) {
         // data has been changed by the new retransformable agent
         // and it hasn't already been cached, cache it
-        *_cached_data_ptr = (unsigned char *)os::malloc(_curr_len);
+        *_cached_data_ptr = (unsigned char *)os::malloc(_curr_len, mtInternal);
         memcpy(*_cached_data_ptr, _curr_data, _curr_len);
         *_cached_length_ptr = _curr_len;
       }
@@ -720,7 +720,7 @@
     JvmtiCodeBlobEvents::build_jvmti_addr_location_map(nm, &_map, &_map_length);
   }
   ~JvmtiCompiledMethodLoadEventMark() {
-     FREE_C_HEAP_ARRAY(jvmtiAddrLocationMap, _map);
+     FREE_C_HEAP_ARRAY(jvmtiAddrLocationMap, _map, mtInternal);
   }
 
   jint code_size() { return _code_size; }
@@ -2323,7 +2323,7 @@
 // register a stub
 void JvmtiDynamicCodeEventCollector::register_stub(const char* name, address start, address end) {
  if (_code_blobs == NULL) {
-   _code_blobs = new (ResourceObj::C_HEAP) GrowableArray<JvmtiCodeBlobDesc*>(1,true);
+   _code_blobs = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<JvmtiCodeBlobDesc*>(1,true);
  }
  _code_blobs->append(new JvmtiCodeBlobDesc(name, start, end));
 }
@@ -2357,7 +2357,7 @@
 void JvmtiVMObjectAllocEventCollector::record_allocation(oop obj) {
   assert(is_enabled(), "VM object alloc event collector is not enabled");
   if (_allocated == NULL) {
-    _allocated = new (ResourceObj::C_HEAP) GrowableArray<oop>(1, true);
+    _allocated = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<oop>(1, true);
   }
   _allocated->push(obj);
 }
--- a/src/share/vm/prims/jvmtiExport.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiExport.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -350,7 +350,7 @@
 
 // Support class used by JvmtiDynamicCodeEventCollector and others. It
 // describes a single code blob by name and address range.
-class JvmtiCodeBlobDesc : public CHeapObj {
+class JvmtiCodeBlobDesc : public CHeapObj<mtInternal> {
  private:
   char _name[64];
   address _code_begin;
--- a/src/share/vm/prims/jvmtiExtensions.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiExtensions.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -49,8 +49,8 @@
 // event. The function and the event are registered here.
 //
 void JvmtiExtensions::register_extensions() {
-  _ext_functions = new (ResourceObj::C_HEAP) GrowableArray<jvmtiExtensionFunctionInfo*>(1,true);
-  _ext_events = new (ResourceObj::C_HEAP) GrowableArray<jvmtiExtensionEventInfo*>(1,true);
+  _ext_functions = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<jvmtiExtensionFunctionInfo*>(1,true);
+  _ext_events = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<jvmtiExtensionEventInfo*>(1,true);
 
   // register our extension function
   static jvmtiParamInfo func_params[] = {
--- a/src/share/vm/prims/jvmtiGetLoadedClasses.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiGetLoadedClasses.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -152,7 +152,7 @@
 
   // Public methods that get called within the scope of the closure
   void allocate() {
-    _list = NEW_C_HEAP_ARRAY(Handle, _count);
+    _list = NEW_C_HEAP_ARRAY(Handle, _count, mtInternal);
     assert(_list != NULL, "Out of memory");
     if (_list == NULL) {
       _count = 0;
--- a/src/share/vm/prims/jvmtiImpl.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiImpl.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -98,8 +98,8 @@
 void GrowableCache::recache() {
   int len = _elements->length();
 
-  FREE_C_HEAP_ARRAY(address, _cache);
-  _cache = NEW_C_HEAP_ARRAY(address,len+1);
+  FREE_C_HEAP_ARRAY(address, _cache, mtInternal);
+  _cache = NEW_C_HEAP_ARRAY(address,len+1, mtInternal);
 
   for (int i=0; i<len; i++) {
     _cache[i] = _elements->at(i)->getCacheValue();
@@ -142,13 +142,13 @@
 GrowableCache::~GrowableCache() {
   clear();
   delete _elements;
-  FREE_C_HEAP_ARRAY(address, _cache);
+  FREE_C_HEAP_ARRAY(address, _cache, mtInternal);
 }
 
 void GrowableCache::initialize(void *this_obj, void listener_fun(void *, address*) ) {
   _this_obj       = this_obj;
   _listener_fun   = listener_fun;
-  _elements       = new (ResourceObj::C_HEAP) GrowableArray<GrowableElement*>(5,true);
+  _elements       = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<GrowableElement*>(5,true);
   recache();
 }
 
--- a/src/share/vm/prims/jvmtiImpl.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiImpl.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -64,7 +64,7 @@
 // to update its pointer to the address cache.
 //
 
-class GrowableElement : public CHeapObj {
+class GrowableElement : public CHeapObj<mtInternal> {
 public:
   virtual address getCacheValue()          =0;
   virtual bool equals(GrowableElement* e)  =0;
@@ -130,7 +130,7 @@
 // Note   : typesafe wrapper for GrowableCache of JvmtiBreakpoint
 //
 
-class JvmtiBreakpointCache : public CHeapObj {
+class JvmtiBreakpointCache : public CHeapObj<mtInternal> {
 
 private:
   GrowableCache _cache;
@@ -258,7 +258,7 @@
 // CHeap allocated to emphasize its similarity to JvmtiFramePops.
 //
 
-class JvmtiBreakpoints : public CHeapObj {
+class JvmtiBreakpoints : public CHeapObj<mtInternal> {
 private:
 
   JvmtiBreakpointCache _bps;
@@ -496,7 +496,7 @@
 class JvmtiDeferredEventQueue : AllStatic {
   friend class JvmtiDeferredEvent;
  private:
-  class QueueNode : public CHeapObj {
+  class QueueNode : public CHeapObj<mtInternal> {
    private:
     JvmtiDeferredEvent _event;
     QueueNode* _next;
--- a/src/share/vm/prims/jvmtiRawMonitor.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiRawMonitor.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,7 +27,7 @@
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/thread.hpp"
 
-GrowableArray<JvmtiRawMonitor*> *JvmtiPendingMonitors::_monitors = new (ResourceObj::C_HEAP) GrowableArray<JvmtiRawMonitor*>(1,true);
+GrowableArray<JvmtiRawMonitor*> *JvmtiPendingMonitors::_monitors = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<JvmtiRawMonitor*>(1,true);
 
 void JvmtiPendingMonitors::transition_raw_monitors() {
   assert((Threads::number_of_threads()==1),
@@ -53,7 +53,7 @@
 
 JvmtiRawMonitor::JvmtiRawMonitor(const char *name) {
 #ifdef ASSERT
-  _name = strcpy(NEW_C_HEAP_ARRAY(char, strlen(name) + 1), name);
+  _name = strcpy(NEW_C_HEAP_ARRAY(char, strlen(name) + 1, mtInternal), name);
 #else
   _name = NULL;
 #endif
--- a/src/share/vm/prims/jvmtiRedefineClasses.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -831,7 +831,7 @@
 jvmtiError VM_RedefineClasses::load_new_class_versions(TRAPS) {
   // For consistency allocate memory using os::malloc wrapper.
   _scratch_classes = (instanceKlassHandle *)
-    os::malloc(sizeof(instanceKlassHandle) * _class_count);
+    os::malloc(sizeof(instanceKlassHandle) * _class_count, mtInternal);
   if (_scratch_classes == NULL) {
     return JVMTI_ERROR_OUT_OF_MEMORY;
   }
@@ -2400,44 +2400,33 @@
   // new constant indices as needed. The inner classes info is a
   // quadruple:
   // (inner_class_info, outer_class_info, inner_name, inner_access_flags)
-  typeArrayOop inner_class_list = scratch_class->inner_classes();
-  int icl_length = (inner_class_list == NULL) ? 0 : inner_class_list->length();
-  if (icl_length > 0) {
-    typeArrayHandle inner_class_list_h(THREAD, inner_class_list);
-    for (int i = 0; i < icl_length;
-         i += instanceKlass::inner_class_next_offset) {
-      int cur_index = inner_class_list_h->ushort_at(i
-                        + instanceKlass::inner_class_inner_class_info_offset);
-      if (cur_index == 0) {
-        continue;  // JVM spec. allows null inner class refs so skip it
-      }
-      int new_index = find_new_index(cur_index);
-      if (new_index != 0) {
-        RC_TRACE_WITH_THREAD(0x00080000, THREAD,
-          ("inner_class_info change: %d to %d", cur_index, new_index));
-        inner_class_list_h->ushort_at_put(i
-          + instanceKlass::inner_class_inner_class_info_offset, new_index);
-      }
-      cur_index = inner_class_list_h->ushort_at(i
-                    + instanceKlass::inner_class_outer_class_info_offset);
-      new_index = find_new_index(cur_index);
-      if (new_index != 0) {
-        RC_TRACE_WITH_THREAD(0x00080000, THREAD,
-          ("outer_class_info change: %d to %d", cur_index, new_index));
-        inner_class_list_h->ushort_at_put(i
-          + instanceKlass::inner_class_outer_class_info_offset, new_index);
-      }
-      cur_index = inner_class_list_h->ushort_at(i
-                    + instanceKlass::inner_class_inner_name_offset);
-      new_index = find_new_index(cur_index);
-      if (new_index != 0) {
-        RC_TRACE_WITH_THREAD(0x00080000, THREAD,
-          ("inner_name change: %d to %d", cur_index, new_index));
-        inner_class_list_h->ushort_at_put(i
-          + instanceKlass::inner_class_inner_name_offset, new_index);
-      }
-    } // end for each inner class
-  } // end if we have inner classes
+  InnerClassesIterator iter(scratch_class);
+  for (; !iter.done(); iter.next()) {
+    int cur_index = iter.inner_class_info_index();
+    if (cur_index == 0) {
+      continue;  // JVM spec. allows null inner class refs so skip it
+    }
+    int new_index = find_new_index(cur_index);
+    if (new_index != 0) {
+      RC_TRACE_WITH_THREAD(0x00080000, THREAD,
+        ("inner_class_info change: %d to %d", cur_index, new_index));
+      iter.set_inner_class_info_index(new_index);
+    }
+    cur_index = iter.outer_class_info_index();
+    new_index = find_new_index(cur_index);
+    if (new_index != 0) {
+      RC_TRACE_WITH_THREAD(0x00080000, THREAD,
+        ("outer_class_info change: %d to %d", cur_index, new_index));
+      iter.set_outer_class_info_index(new_index);
+    }
+    cur_index = iter.inner_name_index();
+    new_index = find_new_index(cur_index);
+    if (new_index != 0) {
+      RC_TRACE_WITH_THREAD(0x00080000, THREAD,
+        ("inner_name change: %d to %d", cur_index, new_index));
+      iter.set_inner_name_index(new_index);
+    }
+  } // end for each inner class
 
   // Attach each method in klass to the new constant pool and update
   // to use new constant pool indices as needed:
@@ -2489,23 +2478,17 @@
     // to use new constant pool indices as needed. The exception table
     // holds quadruple entries of the form:
     //   (beg_bci, end_bci, handler_bci, klass_index)
-    const int beg_bci_offset     = 0;
-    const int end_bci_offset     = 1;
-    const int handler_bci_offset = 2;
-    const int klass_index_offset = 3;
-    const int entry_size         = 4;
-
-    typeArrayHandle ex_table (THREAD, method->exception_table());
-    int ext_length = ex_table->length();
-    assert(ext_length % entry_size == 0, "exception table format has changed");
-
-    for (int j = 0; j < ext_length; j += entry_size) {
-      int cur_index = ex_table->int_at(j + klass_index_offset);
+
+    ExceptionTable ex_table(method());
+    int ext_length = ex_table.length();
+
+    for (int j = 0; j < ext_length; j ++) {
+      int cur_index = ex_table.catch_type_index(j);
       int new_index = find_new_index(cur_index);
       if (new_index != 0) {
         RC_TRACE_WITH_THREAD(0x00080000, THREAD,
           ("ext-klass_index change: %d to %d", cur_index, new_index));
-        ex_table->int_at_put(j + klass_index_offset, new_index);
+        ex_table.set_catch_type_index(j, new_index);
       }
     } // end for each exception table entry
 
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -55,7 +55,7 @@
 // and the tag value. In addition an entry includes a next pointer which
 // is used to chain entries together.
 
-class JvmtiTagHashmapEntry : public CHeapObj {
+class JvmtiTagHashmapEntry : public CHeapObj<mtInternal> {
  private:
   friend class JvmtiTagMap;
 
@@ -106,7 +106,7 @@
 // entries. It also provides a function to iterate over all entries
 // in the hashmap.
 
-class JvmtiTagHashmap : public CHeapObj {
+class JvmtiTagHashmap : public CHeapObj<mtInternal> {
  private:
   friend class JvmtiTagMap;
 
@@ -150,7 +150,7 @@
     _resize_threshold = (int)(_load_factor * _size);
     _resizing_enabled = true;
     size_t s = initial_size * sizeof(JvmtiTagHashmapEntry*);
-    _table = (JvmtiTagHashmapEntry**)os::malloc(s);
+    _table = (JvmtiTagHashmapEntry**)os::malloc(s, mtInternal);
     if (_table == NULL) {
       vm_exit_out_of_memory(s, "unable to allocate initial hashtable for jvmti object tags");
     }
@@ -188,7 +188,7 @@
 
     // allocate new table
     size_t s = new_size * sizeof(JvmtiTagHashmapEntry*);
-    JvmtiTagHashmapEntry** new_table = (JvmtiTagHashmapEntry**)os::malloc(s);
+    JvmtiTagHashmapEntry** new_table = (JvmtiTagHashmapEntry**)os::malloc(s, mtInternal);
     if (new_table == NULL) {
       warning("unable to allocate larger hashtable for jvmti object tags");
       set_resizing_enabled(false);
@@ -585,7 +585,7 @@
     _o = klassOop_if_java_lang_Class(o);
 
     // object size
-    _obj_size = _o->size() * wordSize;
+    _obj_size = (jlong)_o->size() * wordSize;
 
     // record the context
     _tag_map = tag_map;
@@ -776,7 +776,7 @@
 // For each field it holds the field index (as defined by the JVMTI specification),
 // the field type, and the offset.
 
-class ClassFieldDescriptor: public CHeapObj {
+class ClassFieldDescriptor: public CHeapObj<mtInternal> {
  private:
   int _field_index;
   int _field_offset;
@@ -790,7 +790,7 @@
   int field_offset() const  { return _field_offset; }
 };
 
-class ClassFieldMap: public CHeapObj {
+class ClassFieldMap: public CHeapObj<mtInternal> {
  private:
   enum {
     initial_field_count = 5
@@ -821,7 +821,8 @@
 };
 
 ClassFieldMap::ClassFieldMap() {
-  _fields = new (ResourceObj::C_HEAP) GrowableArray<ClassFieldDescriptor*>(initial_field_count, true);
+  _fields = new (ResourceObj::C_HEAP, mtInternal)
+    GrowableArray<ClassFieldDescriptor*>(initial_field_count, true);
 }
 
 ClassFieldMap::~ClassFieldMap() {
@@ -892,7 +893,7 @@
 // heap iteration and avoid creating a field map for each object in the heap
 // (only need to create the map when the first instance of a class is encountered).
 //
-class JvmtiCachedClassFieldMap : public CHeapObj {
+class JvmtiCachedClassFieldMap : public CHeapObj<mtInternal> {
  private:
    enum {
      initial_class_count = 200
@@ -957,7 +958,8 @@
 // record that the given instanceKlass is caching a field map
 void JvmtiCachedClassFieldMap::add_to_class_list(instanceKlass* ik) {
   if (_class_list == NULL) {
-    _class_list = new (ResourceObj::C_HEAP) GrowableArray<instanceKlass*>(initial_class_count, true);
+    _class_list = new (ResourceObj::C_HEAP, mtInternal)
+      GrowableArray<instanceKlass*>(initial_class_count, true);
   }
   _class_list->push(ik);
 }
@@ -1160,7 +1162,7 @@
 
     // get offset and field value
     int offset = field->field_offset();
-    address addr = (address)k + offset;
+    address addr = (address)k->java_mirror() + offset;
     jvalue value;
     copy_to_jvalue(&value, addr, value_type);
 
@@ -1526,8 +1528,8 @@
     _env = env;
     _tags = (jlong*)tags;
     _tag_count = tag_count;
-    _object_results = new (ResourceObj::C_HEAP) GrowableArray<jobject>(1,true);
-    _tag_results = new (ResourceObj::C_HEAP) GrowableArray<uint64_t>(1,true);
+    _object_results = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<jobject>(1,true);
+    _tag_results = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<uint64_t>(1,true);
   }
 
   ~TagObjectCollector() {
@@ -1672,8 +1674,8 @@
   Universe::heap()->ensure_parsability(false);  // no need to retire TLABs
 
   // create stacks for interesting headers
-  _saved_mark_stack = new (ResourceObj::C_HEAP) GrowableArray<markOop>(4000, true);
-  _saved_oop_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+  _saved_mark_stack = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<markOop>(4000, true);
+  _saved_oop_stack = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<oop>(4000, true);
 
   if (UseBiasedLocking) {
     BiasedLocking::preserve_marks();
@@ -2712,7 +2714,7 @@
   bool _reporting_string_values;
 
   GrowableArray<oop>* create_visit_stack() {
-    return new (ResourceObj::C_HEAP) GrowableArray<oop>(initial_visit_stack_size, true);
+    return new (ResourceObj::C_HEAP, mtInternal) GrowableArray<oop>(initial_visit_stack_size, true);
   }
 
   // accessors
@@ -2923,7 +2925,8 @@
           oop entry;
           if (tag.is_string()) {
             entry = pool->resolved_string_at(i);
-            assert(java_lang_String::is_instance(entry), "must be string");
+            assert(java_lang_String::is_instance(entry) ||
+                   pool->is_pseudo_string_at(i), "must be string");
           } else {
             entry = Klass::cast(pool->resolved_klass_at(i))->java_mirror();
           }
@@ -3162,9 +3165,6 @@
         if (fr->is_entry_frame()) {
           last_entry_frame = fr;
         }
-        if (fr->is_ricochet_frame()) {
-          fr->oops_ricochet_do(blk, vf->register_map());
-        }
       }
 
       vf = vf->sender();
--- a/src/share/vm/prims/jvmtiTagMap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiTagMap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -41,7 +41,7 @@
 class JvmtiTagHashmapEntry;
 class JvmtiTagHashmapEntryClosure;
 
-class JvmtiTagMap :  public CHeapObj {
+class JvmtiTagMap :  public CHeapObj<mtInternal> {
  private:
 
   enum{
--- a/src/share/vm/prims/jvmtiThreadState.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiThreadState.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -72,7 +72,7 @@
 //
 // The Jvmti state for each thread (across all JvmtiEnv):
 // 1. Local table of enabled events.
-class JvmtiThreadState : public CHeapObj {
+class JvmtiThreadState : public CHeapObj<mtInternal> {
  private:
   friend class JvmtiEnv;
   JavaThread        *_thread;
--- a/src/share/vm/prims/jvmtiUtil.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/jvmtiUtil.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
   if (_single_threaded_resource_area == NULL) {
     // lazily create the single threaded resource area
     // pick a size which is not a standard since the pools don't exist yet
-    _single_threaded_resource_area = new ResourceArea(Chunk::non_pool_size);
+    _single_threaded_resource_area = new (mtInternal) ResourceArea(Chunk::non_pool_size);
   }
   return _single_threaded_resource_area;
 }
--- a/src/share/vm/prims/methodHandleWalk.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2089 +0,0 @@
-/*
- * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "interpreter/rewriter.hpp"
-#include "memory/oopFactory.hpp"
-#include "prims/methodHandleWalk.hpp"
-
-/*
- * JSR 292 reference implementation: method handle structure analysis
- */
-
-#ifdef PRODUCT
-#define print_method_handle(mh) {}
-#else //PRODUCT
-extern "C" void print_method_handle(oop mh);
-#endif //PRODUCT
-
-// -----------------------------------------------------------------------------
-// MethodHandleChain
-
-void MethodHandleChain::set_method_handle(Handle mh, TRAPS) {
-  if (!java_lang_invoke_MethodHandle::is_instance(mh()))  lose("bad method handle", CHECK);
-
-  // set current method handle and unpack partially
-  _method_handle = mh;
-  _is_last       = false;
-  _is_bound      = false;
-  _arg_slot      = -1;
-  _arg_type      = T_VOID;
-  _conversion    = -1;
-  _last_invoke   = Bytecodes::_nop;  //arbitrary non-garbage
-
-  if (java_lang_invoke_DirectMethodHandle::is_instance(mh())) {
-    set_last_method(mh(), THREAD);
-    return;
-  }
-  if (java_lang_invoke_AdapterMethodHandle::is_instance(mh())) {
-    _conversion = AdapterMethodHandle_conversion();
-    assert(_conversion != -1, "bad conv value");
-    assert(java_lang_invoke_BoundMethodHandle::is_instance(mh()), "also BMH");
-  }
-  if (java_lang_invoke_BoundMethodHandle::is_instance(mh())) {
-    if (!is_adapter())          // keep AMH and BMH separate in this model
-      _is_bound = true;
-    _arg_slot = BoundMethodHandle_vmargslot();
-    oop target = MethodHandle_vmtarget_oop();
-    if (!is_bound() || java_lang_invoke_MethodHandle::is_instance(target)) {
-      _arg_type = compute_bound_arg_type(target, NULL, _arg_slot, CHECK);
-    } else if (target != NULL && target->is_method()) {
-      methodOop m = (methodOop) target;
-      _arg_type = compute_bound_arg_type(NULL, m, _arg_slot, CHECK);
-      set_last_method(mh(), CHECK);
-    } else {
-      _is_bound = false;  // lose!
-    }
-  }
-  if (is_bound() && _arg_type == T_VOID) {
-    lose("bad vmargslot", CHECK);
-  }
-  if (!is_bound() && !is_adapter()) {
-    lose("unrecognized MH type", CHECK);
-  }
-}
-
-
-void MethodHandleChain::set_last_method(oop target, TRAPS) {
-  _is_last = true;
-  KlassHandle receiver_limit; int flags = 0;
-  _last_method = MethodHandles::decode_method(target, receiver_limit, flags);
-  if ((flags & MethodHandles::_dmf_has_receiver) == 0)
-    _last_invoke = Bytecodes::_invokestatic;
-  else if ((flags & MethodHandles::_dmf_does_dispatch) == 0)
-    _last_invoke = Bytecodes::_invokespecial;
-  else if ((flags & MethodHandles::_dmf_from_interface) != 0)
-    _last_invoke = Bytecodes::_invokeinterface;
-  else
-    _last_invoke = Bytecodes::_invokevirtual;
-}
-
-
-BasicType MethodHandleChain::compute_bound_arg_type(oop target, methodOop m, int arg_slot, TRAPS) {
-  // There is no direct indication of whether the argument is primitive or not.
-  // It is implied by the _vmentry code, and by the MethodType of the target.
-  BasicType arg_type = T_VOID;
-  if (target != NULL) {
-    oop mtype = java_lang_invoke_MethodHandle::type(target);
-    int arg_num = MethodHandles::argument_slot_to_argnum(mtype, arg_slot);
-    if (arg_num >= 0) {
-      oop ptype = java_lang_invoke_MethodType::ptype(mtype, arg_num);
-      arg_type = java_lang_Class::as_BasicType(ptype);
-    }
-  } else if (m != NULL) {
-    // figure out the argument type from the slot
-    // FIXME: make this explicit in the MH
-    int cur_slot = m->size_of_parameters();
-    if (arg_slot >= cur_slot)
-      return T_VOID;
-    if (!m->is_static()) {
-      cur_slot -= type2size[T_OBJECT];
-      if (cur_slot == arg_slot)
-        return T_OBJECT;
-    }
-    ResourceMark rm(THREAD);
-    for (SignatureStream ss(m->signature()); !ss.is_done(); ss.next()) {
-      BasicType bt = ss.type();
-      cur_slot -= type2size[bt];
-      if (cur_slot <= arg_slot) {
-        if (cur_slot == arg_slot)
-          arg_type = bt;
-        break;
-      }
-    }
-  }
-  if (arg_type == T_ARRAY)
-    arg_type = T_OBJECT;
-  return arg_type;
-}
-
-
-void MethodHandleChain::lose(const char* msg, TRAPS) {
-  _lose_message = msg;
-#ifdef ASSERT
-  if (Verbose) {
-    tty->print_cr(INTPTR_FORMAT " lose: %s", _method_handle(), msg);
-    print();
-  }
-#endif
-  if (!THREAD->is_Java_thread() || ((JavaThread*)THREAD)->thread_state() != _thread_in_vm) {
-    // throw a preallocated exception
-    THROW_OOP(Universe::virtual_machine_error_instance());
-  }
-  THROW_MSG(vmSymbols::java_lang_InternalError(), msg);
-}
-
-
-#ifdef ASSERT
-static const char* adapter_ops[] = {
-  "retype_only"  ,
-  "retype_raw"   ,
-  "check_cast"   ,
-  "prim_to_prim" ,
-  "ref_to_prim"  ,
-  "prim_to_ref"  ,
-  "swap_args"    ,
-  "rot_args"     ,
-  "dup_args"     ,
-  "drop_args"    ,
-  "collect_args" ,
-  "spread_args"  ,
-  "fold_args"
-};
-
-static const char* adapter_op_to_string(int op) {
-  if (op >= 0 && op < (int)ARRAY_SIZE(adapter_ops))
-    return adapter_ops[op];
-  return "unknown_op";
-}
-
-void MethodHandleChain::print(oopDesc* m) {
-  HandleMark hm;
-  ResourceMark rm;
-  Handle mh(m);
-  EXCEPTION_MARK;
-  MethodHandleChain mhc(mh, THREAD);
-  if (HAS_PENDING_EXCEPTION) {
-    oop ex = THREAD->pending_exception();
-    CLEAR_PENDING_EXCEPTION;
-    ex->print();
-    return;
-  }
-  mhc.print();
-}
-
-
-void MethodHandleChain::print() {
-  EXCEPTION_MARK;
-  print_impl(THREAD);
-  if (HAS_PENDING_EXCEPTION) {
-    oop ex = THREAD->pending_exception();
-    CLEAR_PENDING_EXCEPTION;
-    ex->print();
-  }
-}
-
-void MethodHandleChain::print_impl(TRAPS) {
-  ResourceMark rm;
-
-  MethodHandleChain chain(_root, CHECK);
-  for (;;) {
-    tty->print(INTPTR_FORMAT ": ", chain.method_handle()());
-    if (chain.is_bound()) {
-      tty->print("bound: arg_type %s arg_slot %d",
-                 type2name(chain.bound_arg_type()),
-                 chain.bound_arg_slot());
-      oop o = chain.bound_arg_oop();
-      if (o != NULL) {
-        if (o->is_instance()) {
-          tty->print(" instance %s", o->klass()->klass_part()->internal_name());
-          if (java_lang_invoke_CountingMethodHandle::is_instance(o)) {
-            tty->print(" vmcount: %d", java_lang_invoke_CountingMethodHandle::vmcount(o));
-          }
-        } else {
-          o->print();
-        }
-      }
-      oop vmt = chain.vmtarget_oop();
-      if (vmt != NULL) {
-        if (vmt->is_method()) {
-          tty->print(" ");
-          methodOop(vmt)->print_short_name(tty);
-        } else if (java_lang_invoke_MethodHandle::is_instance(vmt)) {
-          tty->print(" method handle " INTPTR_FORMAT, vmt);
-        } else {
-          ShouldNotReachHere();
-        }
-      }
-    } else if (chain.is_adapter()) {
-      tty->print("adapter: arg_slot %d conversion op %s",
-                 chain.adapter_arg_slot(),
-                 adapter_op_to_string(chain.adapter_conversion_op()));
-      switch (chain.adapter_conversion_op()) {
-        case java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY:
-          if (java_lang_invoke_CountingMethodHandle::is_instance(chain.method_handle_oop())) {
-            tty->print(" vmcount: %d", java_lang_invoke_CountingMethodHandle::vmcount(chain.method_handle_oop()));
-          }
-        case java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW:
-        case java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST:
-        case java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM:
-        case java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM:
-          break;
-
-        case java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF: {
-          tty->print(" src_type = %s", type2name(chain.adapter_conversion_src_type()));
-          break;
-        }
-
-        case java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS:
-        case java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS: {
-          int dest_arg_slot = chain.adapter_conversion_vminfo();
-          tty->print(" dest_arg_slot %d type %s", dest_arg_slot, type2name(chain.adapter_conversion_src_type()));
-          break;
-        }
-
-        case java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS:
-        case java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS: {
-          int dup_slots = chain.adapter_conversion_stack_pushes();
-          tty->print(" pushes %d", dup_slots);
-          break;
-        }
-
-        case java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS:
-        case java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS: {
-          int coll_slots = chain.MethodHandle_vmslots();
-          tty->print(" coll_slots %d", coll_slots);
-          break;
-        }
-
-        case java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS: {
-          // Check the required length.
-          int spread_slots = 1 + chain.adapter_conversion_stack_pushes();
-          tty->print(" spread_slots %d", spread_slots);
-          break;
-        }
-
-        default:
-          tty->print_cr("bad adapter conversion");
-          break;
-      }
-    } else {
-      // DMH
-      tty->print("direct: ");
-      chain.last_method_oop()->print_short_name(tty);
-    }
-
-    tty->print(" (");
-    objArrayOop ptypes = java_lang_invoke_MethodType::ptypes(chain.method_type_oop());
-    for (int i = ptypes->length() - 1; i >= 0; i--) {
-      BasicType t = java_lang_Class::as_BasicType(ptypes->obj_at(i));
-      if (t == T_ARRAY) t = T_OBJECT;
-      tty->print("%c", type2char(t));
-      if (t == T_LONG || t == T_DOUBLE) tty->print("_");
-    }
-    tty->print(")");
-    BasicType rtype = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(chain.method_type_oop()));
-    if (rtype == T_ARRAY) rtype = T_OBJECT;
-    tty->print("%c", type2char(rtype));
-    tty->cr();
-    if (!chain.is_last()) {
-      chain.next(CHECK);
-    } else {
-      break;
-    }
-  }
-}
-#endif
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleWalker
-
-Bytecodes::Code MethodHandleWalker::conversion_code(BasicType src, BasicType dest) {
-  if (is_subword_type(src)) {
-    src = T_INT;          // all subword src types act like int
-  }
-  if (src == dest) {
-    return Bytecodes::_nop;
-  }
-
-#define SRC_DEST(s,d) (((int)(s) << 4) + (int)(d))
-  switch (SRC_DEST(src, dest)) {
-  case SRC_DEST(T_INT, T_LONG):           return Bytecodes::_i2l;
-  case SRC_DEST(T_INT, T_FLOAT):          return Bytecodes::_i2f;
-  case SRC_DEST(T_INT, T_DOUBLE):         return Bytecodes::_i2d;
-  case SRC_DEST(T_INT, T_BYTE):           return Bytecodes::_i2b;
-  case SRC_DEST(T_INT, T_CHAR):           return Bytecodes::_i2c;
-  case SRC_DEST(T_INT, T_SHORT):          return Bytecodes::_i2s;
-
-  case SRC_DEST(T_LONG, T_INT):           return Bytecodes::_l2i;
-  case SRC_DEST(T_LONG, T_FLOAT):         return Bytecodes::_l2f;
-  case SRC_DEST(T_LONG, T_DOUBLE):        return Bytecodes::_l2d;
-
-  case SRC_DEST(T_FLOAT, T_INT):          return Bytecodes::_f2i;
-  case SRC_DEST(T_FLOAT, T_LONG):         return Bytecodes::_f2l;
-  case SRC_DEST(T_FLOAT, T_DOUBLE):       return Bytecodes::_f2d;
-
-  case SRC_DEST(T_DOUBLE, T_INT):         return Bytecodes::_d2i;
-  case SRC_DEST(T_DOUBLE, T_LONG):        return Bytecodes::_d2l;
-  case SRC_DEST(T_DOUBLE, T_FLOAT):       return Bytecodes::_d2f;
-  }
-#undef SRC_DEST
-
-  // cannot do it in one step, or at all
-  return Bytecodes::_illegal;
-}
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleWalker::walk
-//
-MethodHandleWalker::ArgToken
-MethodHandleWalker::walk(TRAPS) {
-  ArgToken empty = ArgToken();  // Empty return value.
-
-  walk_incoming_state(CHECK_(empty));
-
-  for (;;) {
-    set_method_handle(chain().method_handle_oop());
-
-    assert(_outgoing_argc == argument_count_slow(), "empty slots under control");
-
-    if (chain().is_adapter()) {
-      int conv_op = chain().adapter_conversion_op();
-      int arg_slot = chain().adapter_arg_slot();
-
-      // Check that the arg_slot is valid.  In most cases it must be
-      // within range of the current arguments but there are some
-      // exceptions.  Those are sanity checked in their implemention
-      // below.
-      if ((arg_slot < 0 || arg_slot >= _outgoing.length()) &&
-          conv_op > java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW &&
-          conv_op != java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS &&
-          conv_op != java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS) {
-        lose(err_msg("bad argument index %d", arg_slot), CHECK_(empty));
-      }
-
-      bool retain_original_args = false;  // used by fold/collect logic
-
-      // perform the adapter action
-      switch (conv_op) {
-      case java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY:
-        // No changes to arguments; pass the bits through.
-        break;
-
-      case java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW: {
-        // To keep the verifier happy, emit bitwise ("raw") conversions as needed.
-        // See MethodHandles::same_basic_type_for_arguments for allowed conversions.
-        Handle incoming_mtype(THREAD, chain().method_type_oop());
-        Handle outgoing_mtype;
-        {
-          oop outgoing_mh_oop = chain().vmtarget_oop();
-          if (!java_lang_invoke_MethodHandle::is_instance(outgoing_mh_oop))
-            lose("outgoing target not a MethodHandle", CHECK_(empty));
-          outgoing_mtype = Handle(THREAD, java_lang_invoke_MethodHandle::type(outgoing_mh_oop));
-        }
-
-        int nptypes = java_lang_invoke_MethodType::ptype_count(outgoing_mtype());
-        if (nptypes != java_lang_invoke_MethodType::ptype_count(incoming_mtype()))
-          lose("incoming and outgoing parameter count do not agree", CHECK_(empty));
-
-        // Argument types.
-        for (int i = 0, slot = _outgoing.length() - 1; slot >= 0; slot--) {
-          if (arg_type(slot) == T_VOID)  continue;
-
-          klassOop  src_klass = NULL;
-          klassOop  dst_klass = NULL;
-          BasicType src = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(incoming_mtype(), i), &src_klass);
-          BasicType dst = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(outgoing_mtype(), i), &dst_klass);
-          retype_raw_argument_type(src, dst, slot, CHECK_(empty));
-          i++;  // We need to skip void slots at the top of the loop.
-        }
-
-        // Return type.
-        {
-          BasicType src = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(incoming_mtype()));
-          BasicType dst = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(outgoing_mtype()));
-          retype_raw_return_type(src, dst, CHECK_(empty));
-        }
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST: {
-        // checkcast the Nth outgoing argument in place
-        klassOop dest_klass = NULL;
-        BasicType dest = java_lang_Class::as_BasicType(chain().adapter_arg_oop(), &dest_klass);
-        assert(dest == T_OBJECT, "");
-        ArgToken arg = _outgoing.at(arg_slot);
-        assert(dest == arg.basic_type(), "");
-        arg = make_conversion(T_OBJECT, dest_klass, Bytecodes::_checkcast, arg, CHECK_(empty));
-        // replace the object by the result of the cast, to make the compiler happy:
-        change_argument(T_OBJECT, arg_slot, T_OBJECT, arg);
-        debug_only(dest_klass = (klassOop)badOop);
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM: {
-        // i2l, etc., on the Nth outgoing argument in place
-        BasicType src = chain().adapter_conversion_src_type(),
-                  dest = chain().adapter_conversion_dest_type();
-        ArgToken arg = _outgoing.at(arg_slot);
-        Bytecodes::Code bc = conversion_code(src, dest);
-        if (bc == Bytecodes::_nop) {
-          break;
-        } else if (bc != Bytecodes::_illegal) {
-          arg = make_conversion(dest, NULL, bc, arg, CHECK_(empty));
-        } else if (is_subword_type(dest)) {
-          bc = conversion_code(src, T_INT);
-          if (bc != Bytecodes::_illegal) {
-            arg = make_conversion(dest, NULL, bc, arg, CHECK_(empty));
-            bc = conversion_code(T_INT, dest);
-            arg = make_conversion(dest, NULL, bc, arg, CHECK_(empty));
-          }
-        }
-        if (bc == Bytecodes::_illegal) {
-          lose(err_msg("bad primitive conversion for %s -> %s", type2name(src), type2name(dest)), CHECK_(empty));
-        }
-        change_argument(src, arg_slot, dest, arg);
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM: {
-        // checkcast to wrapper type & call intValue, etc.
-        BasicType dest = chain().adapter_conversion_dest_type();
-        ArgToken arg = _outgoing.at(arg_slot);
-        arg = make_conversion(T_OBJECT, SystemDictionary::box_klass(dest),
-                              Bytecodes::_checkcast, arg, CHECK_(empty));
-        vmIntrinsics::ID unboxer = vmIntrinsics::for_unboxing(dest);
-        if (unboxer == vmIntrinsics::_none) {
-          lose("no unboxing method", CHECK_(empty));
-        }
-        ArgToken arglist[2];
-        arglist[0] = arg;         // outgoing 'this'
-        arglist[1] = ArgToken();  // sentinel
-        arg = make_invoke(methodHandle(), unboxer, Bytecodes::_invokevirtual, false, 1, &arglist[0], CHECK_(empty));
-        change_argument(T_OBJECT, arg_slot, dest, arg);
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF: {
-        // call wrapper type.valueOf
-        BasicType src = chain().adapter_conversion_src_type();
-        vmIntrinsics::ID boxer = vmIntrinsics::for_boxing(src);
-        if (boxer == vmIntrinsics::_none) {
-          lose("no boxing method", CHECK_(empty));
-        }
-        ArgToken arg = _outgoing.at(arg_slot);
-        ArgToken arglist[2];
-        arglist[0] = arg;         // outgoing value
-        arglist[1] = ArgToken();  // sentinel
-        arg = make_invoke(methodHandle(), boxer, Bytecodes::_invokestatic, false, 1, &arglist[0], CHECK_(empty));
-        change_argument(src, arg_slot, T_OBJECT, arg);
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS: {
-        int dest_arg_slot = chain().adapter_conversion_vminfo();
-        if (!has_argument(dest_arg_slot)) {
-          lose("bad swap index", CHECK_(empty));
-        }
-        // a simple swap between two arguments
-        if (arg_slot > dest_arg_slot) {
-          int tmp = arg_slot;
-          arg_slot = dest_arg_slot;
-          dest_arg_slot = tmp;
-        }
-        ArgToken a1 = _outgoing.at(arg_slot);
-        ArgToken a2 = _outgoing.at(dest_arg_slot);
-        change_argument(a2.basic_type(), dest_arg_slot, a1);
-        change_argument(a1.basic_type(), arg_slot, a2);
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS: {
-        int limit_raw  = chain().adapter_conversion_vminfo();
-        bool rot_down  = (arg_slot < limit_raw);
-        int limit_bias = (rot_down ? MethodHandles::OP_ROT_ARGS_DOWN_LIMIT_BIAS : 0);
-        int limit_slot = limit_raw - limit_bias;
-        if ((uint)limit_slot > (uint)_outgoing.length()) {
-          lose("bad rotate index", CHECK_(empty));
-        }
-        // Rotate the source argument (plus following N slots) into the
-        // position occupied by the dest argument (plus following N slots).
-        int rotate_count = type2size[chain().adapter_conversion_src_type()];
-        // (no other rotate counts are currently supported)
-        if (rot_down) {
-          for (int i = 0; i < rotate_count; i++) {
-            ArgToken temp = _outgoing.at(arg_slot);
-            _outgoing.remove_at(arg_slot);
-            _outgoing.insert_before(limit_slot - 1, temp);
-          }
-        } else { // arg_slot > limit_slot => rotate_up
-          for (int i = 0; i < rotate_count; i++) {
-            ArgToken temp = _outgoing.at(arg_slot + rotate_count - 1);
-            _outgoing.remove_at(arg_slot + rotate_count - 1);
-            _outgoing.insert_before(limit_slot, temp);
-          }
-        }
-        assert(_outgoing_argc == argument_count_slow(), "empty slots under control");
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS: {
-        int dup_slots = chain().adapter_conversion_stack_pushes();
-        if (dup_slots <= 0) {
-          lose("bad dup count", CHECK_(empty));
-        }
-        for (int i = 0; i < dup_slots; i++) {
-          ArgToken dup = _outgoing.at(arg_slot + 2*i);
-          if (dup.basic_type() != T_VOID)     _outgoing_argc += 1;
-          _outgoing.insert_before(i, dup);
-        }
-        assert(_outgoing_argc == argument_count_slow(), "empty slots under control");
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS: {
-        int drop_slots = -chain().adapter_conversion_stack_pushes();
-        if (drop_slots <= 0) {
-          lose("bad drop count", CHECK_(empty));
-        }
-        for (int i = 0; i < drop_slots; i++) {
-          ArgToken drop = _outgoing.at(arg_slot);
-          if (drop.basic_type() != T_VOID)    _outgoing_argc -= 1;
-          _outgoing.remove_at(arg_slot);
-        }
-        assert(_outgoing_argc == argument_count_slow(), "empty slots under control");
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS:
-        retain_original_args = true;   // and fall through:
-      case java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS: {
-        // call argument MH recursively
-        //{static int x; if (!x++) print_method_handle(chain().method_handle_oop()); --x;}
-        Handle recursive_mh(THREAD, chain().adapter_arg_oop());
-        if (!java_lang_invoke_MethodHandle::is_instance(recursive_mh())) {
-          lose("recursive target not a MethodHandle", CHECK_(empty));
-        }
-        Handle recursive_mtype(THREAD, java_lang_invoke_MethodHandle::type(recursive_mh()));
-        int argc = java_lang_invoke_MethodType::ptype_count(recursive_mtype());
-        int coll_slots = java_lang_invoke_MethodHandle::vmslots(recursive_mh());
-        BasicType rtype = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(recursive_mtype()));
-        ArgToken* arglist = NEW_RESOURCE_ARRAY(ArgToken, 1 + argc + 1);  // 1+: mh, +1: sentinel
-        arglist[0] = make_oop_constant(recursive_mh(), CHECK_(empty));
-        if (arg_slot < 0 || coll_slots < 0 || arg_slot + coll_slots > _outgoing.length()) {
-          lose("bad fold/collect arg slot", CHECK_(empty));
-        }
-        for (int i = 0, slot = arg_slot + coll_slots - 1; slot >= arg_slot; slot--) {
-          ArgToken arg_state = _outgoing.at(slot);
-          BasicType  arg_type  = arg_state.basic_type();
-          if (arg_type == T_VOID)  continue;
-          ArgToken arg = _outgoing.at(slot);
-          if (i >= argc) { lose("bad fold/collect arg", CHECK_(empty)); }
-          arglist[1+i] = arg;
-          if (!retain_original_args)
-            change_argument(arg_type, slot, T_VOID, ArgToken(tt_void));
-          i++;
-        }
-        arglist[1+argc] = ArgToken();  // sentinel
-        oop invoker = java_lang_invoke_MethodTypeForm::vmlayout(
-                          java_lang_invoke_MethodType::form(recursive_mtype()) );
-        if (invoker == NULL || !invoker->is_method()) {
-          lose("bad vmlayout slot", CHECK_(empty));
-        }
-        // FIXME: consider inlining the invokee at the bytecode level
-        ArgToken ret = make_invoke(methodHandle(THREAD, methodOop(invoker)), vmIntrinsics::_invokeGeneric,
-                                   Bytecodes::_invokevirtual, false, 1+argc, &arglist[0], CHECK_(empty));
-        // The iid = _invokeGeneric really means to adjust reference types as needed.
-        DEBUG_ONLY(invoker = NULL);
-        if (rtype == T_OBJECT) {
-          klassOop rklass = java_lang_Class::as_klassOop( java_lang_invoke_MethodType::rtype(recursive_mtype()) );
-          if (rklass != SystemDictionary::Object_klass() &&
-              !Klass::cast(rklass)->is_interface()) {
-            // preserve type safety
-            ret = make_conversion(T_OBJECT, rklass, Bytecodes::_checkcast, ret, CHECK_(empty));
-          }
-        }
-        if (rtype != T_VOID) {
-          int ret_slot = arg_slot + (retain_original_args ? coll_slots : 0);
-          change_argument(T_VOID, ret_slot, rtype, ret);
-        }
-        break;
-      }
-
-      case java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS: {
-        klassOop array_klass_oop = NULL;
-        BasicType array_type = java_lang_Class::as_BasicType(chain().adapter_arg_oop(),
-                                                             &array_klass_oop);
-        assert(array_type == T_OBJECT, "");
-        assert(Klass::cast(array_klass_oop)->oop_is_array(), "");
-        arrayKlassHandle array_klass(THREAD, array_klass_oop);
-        debug_only(array_klass_oop = (klassOop)badOop);
-
-        klassOop element_klass_oop = NULL;
-        BasicType element_type = java_lang_Class::as_BasicType(array_klass->component_mirror(),
-                                                               &element_klass_oop);
-        KlassHandle element_klass(THREAD, element_klass_oop);
-        debug_only(element_klass_oop = (klassOop)badOop);
-
-        // Fetch the argument, which we will cast to the required array type.
-        ArgToken arg = _outgoing.at(arg_slot);
-        assert(arg.basic_type() == T_OBJECT, "");
-        ArgToken array_arg = arg;
-        array_arg = make_conversion(T_OBJECT, array_klass(), Bytecodes::_checkcast, array_arg, CHECK_(empty));
-        change_argument(T_OBJECT, arg_slot, T_VOID, ArgToken(tt_void));
-
-        // Check the required length.
-        int spread_slots = 1 + chain().adapter_conversion_stack_pushes();
-        int spread_length = spread_slots;
-        if (type2size[element_type] == 2) {
-          if (spread_slots % 2 != 0)  spread_slots = -1;  // force error
-          spread_length = spread_slots / 2;
-        }
-        if (spread_slots < 0) {
-          lose("bad spread length", CHECK_(empty));
-        }
-
-        jvalue   length_jvalue;  length_jvalue.i = spread_length;
-        ArgToken length_arg = make_prim_constant(T_INT, &length_jvalue, CHECK_(empty));
-        // Call a built-in method known to the JVM to validate the length.
-        ArgToken arglist[3];
-        arglist[0] = array_arg;   // value to check
-        arglist[1] = length_arg;  // length to check
-        arglist[2] = ArgToken();  // sentinel
-        make_invoke(methodHandle(), vmIntrinsics::_checkSpreadArgument,
-                    Bytecodes::_invokestatic, false, 2, &arglist[0], CHECK_(empty));
-
-        // Spread out the array elements.
-        Bytecodes::Code aload_op = Bytecodes::_nop;
-        switch (element_type) {
-        case T_INT:       aload_op = Bytecodes::_iaload; break;
-        case T_LONG:      aload_op = Bytecodes::_laload; break;
-        case T_FLOAT:     aload_op = Bytecodes::_faload; break;
-        case T_DOUBLE:    aload_op = Bytecodes::_daload; break;
-        case T_OBJECT:    aload_op = Bytecodes::_aaload; break;
-        case T_BOOLEAN:   // fall through:
-        case T_BYTE:      aload_op = Bytecodes::_baload; break;
-        case T_CHAR:      aload_op = Bytecodes::_caload; break;
-        case T_SHORT:     aload_op = Bytecodes::_saload; break;
-        default:          lose("primitive array NYI", CHECK_(empty));
-        }
-        int ap = arg_slot;
-        for (int i = 0; i < spread_length; i++) {
-          jvalue   offset_jvalue;  offset_jvalue.i = i;
-          ArgToken offset_arg = make_prim_constant(T_INT, &offset_jvalue, CHECK_(empty));
-          ArgToken element_arg = make_fetch(element_type, element_klass(), aload_op, array_arg, offset_arg, CHECK_(empty));
-          change_argument(T_VOID, ap, element_type, element_arg);
-          //ap += type2size[element_type];  // don't do this; insert next arg to *right* of previous
-        }
-        break;
-      }
-
-      default:
-        lose("bad adapter conversion", CHECK_(empty));
-        break;
-      }
-    }
-
-    if (chain().is_bound()) {
-      // push a new argument
-      BasicType arg_type  = chain().bound_arg_type();
-      jint      arg_slot  = chain().bound_arg_slot();
-      oop       arg_oop   = chain().bound_arg_oop();
-      ArgToken  arg;
-      if (arg_type == T_OBJECT) {
-        arg = make_oop_constant(arg_oop, CHECK_(empty));
-      } else {
-        jvalue arg_value;
-        BasicType bt = java_lang_boxing_object::get_value(arg_oop, &arg_value);
-        if (bt == arg_type || (bt == T_INT && is_subword_type(arg_type))) {
-          arg = make_prim_constant(arg_type, &arg_value, CHECK_(empty));
-        } else {
-          lose(err_msg("bad bound value: arg_type %s boxing %s", type2name(arg_type), type2name(bt)), CHECK_(empty));
-        }
-      }
-      DEBUG_ONLY(arg_oop = badOop);
-      change_argument(T_VOID, arg_slot, arg_type, arg);
-    }
-
-    // this test must come after the body of the loop
-    if (!chain().is_last()) {
-      chain().next(CHECK_(empty));
-    } else {
-      break;
-    }
-  }
-
-  // finish the sequence with a tail-call to the ultimate target
-  // parameters are passed in logical order (recv 1st), not slot order
-  ArgToken* arglist = NEW_RESOURCE_ARRAY(ArgToken, _outgoing.length() + 1);
-  int ap = 0;
-  for (int i = _outgoing.length() - 1; i >= 0; i--) {
-    ArgToken arg_state = _outgoing.at(i);
-    if (arg_state.basic_type() == T_VOID)  continue;
-    arglist[ap++] = _outgoing.at(i);
-  }
-  assert(ap == _outgoing_argc, "");
-  arglist[ap] = ArgToken();  // add a sentinel, for the sake of asserts
-  return make_invoke(chain().last_method(),
-                     vmIntrinsics::_none,
-                     chain().last_invoke_code(), true,
-                     ap, arglist, THREAD);
-}
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleWalker::walk_incoming_state
-//
-void MethodHandleWalker::walk_incoming_state(TRAPS) {
-  Handle mtype(THREAD, chain().method_type_oop());
-  int nptypes = java_lang_invoke_MethodType::ptype_count(mtype());
-  _outgoing_argc = nptypes;
-  int argp = nptypes - 1;
-  if (argp >= 0) {
-    _outgoing.at_grow(argp, ArgToken(tt_void)); // presize
-  }
-  for (int i = 0; i < nptypes; i++) {
-    klassOop  arg_type_klass = NULL;
-    BasicType arg_type = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(mtype(), i), &arg_type_klass);
-    int index = new_local_index(arg_type);
-    ArgToken arg = make_parameter(arg_type, arg_type_klass, index, CHECK);
-    DEBUG_ONLY(arg_type_klass = (klassOop) NULL);
-    _outgoing.at_put(argp, arg);
-    if (type2size[arg_type] == 2) {
-      // add the extra slot, so we can model the JVM stack
-      _outgoing.insert_before(argp+1, ArgToken(tt_void));
-    }
-    --argp;
-  }
-  // call make_parameter at the end of the list for the return type
-  klassOop  ret_type_klass = NULL;
-  BasicType ret_type = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(mtype()), &ret_type_klass);
-  ArgToken  ret = make_parameter(ret_type, ret_type_klass, -1, CHECK);
-  // ignore ret; client can catch it if needed
-
-  assert(_outgoing_argc == argument_count_slow(), "empty slots under control");
-
-  verify_args_and_signature(CHECK);
-}
-
-
-#ifdef ASSERT
-void MethodHandleWalker::verify_args_and_signature(TRAPS) {
-  int index = _outgoing.length() - 1;
-  objArrayOop ptypes = java_lang_invoke_MethodType::ptypes(chain().method_type_oop());
-  for (int i = 0, limit = ptypes->length(); i < limit; i++) {
-    BasicType t = java_lang_Class::as_BasicType(ptypes->obj_at(i));
-    if (t == T_ARRAY) t = T_OBJECT;
-    if (t == T_LONG || t == T_DOUBLE) {
-      assert(T_VOID == _outgoing.at(index).basic_type(), "types must match");
-      index--;
-    }
-    assert(t == _outgoing.at(index).basic_type(), "types must match");
-    index--;
-  }
-}
-#endif
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleWalker::change_argument
-//
-// This is messy because some kinds of arguments are paired with
-// companion slots containing an empty value.
-void MethodHandleWalker::change_argument(BasicType old_type, int slot, const ArgToken& new_arg) {
-  BasicType new_type = new_arg.basic_type();
-  int old_size = type2size[old_type];
-  int new_size = type2size[new_type];
-  if (old_size == new_size) {
-    // simple case first
-    _outgoing.at_put(slot, new_arg);
-  } else if (old_size > new_size) {
-    for (int i = old_size - 1; i >= new_size; i--) {
-      assert((i != 0) == (_outgoing.at(slot + i).basic_type() == T_VOID), "");
-      _outgoing.remove_at(slot + i);
-    }
-    if (new_size > 0)
-      _outgoing.at_put(slot, new_arg);
-    else
-      _outgoing_argc -= 1;      // deleted a real argument
-  } else {
-    for (int i = old_size; i < new_size; i++) {
-      _outgoing.insert_before(slot + i, ArgToken(tt_void));
-    }
-    _outgoing.at_put(slot, new_arg);
-    if (old_size == 0)
-      _outgoing_argc += 1;      // inserted a real argument
-  }
-  assert(_outgoing_argc == argument_count_slow(), "empty slots under control");
-}
-
-
-#ifdef ASSERT
-int MethodHandleWalker::argument_count_slow() {
-  int args_seen = 0;
-  for (int i = _outgoing.length() - 1; i >= 0; i--) {
-    if (_outgoing.at(i).basic_type() != T_VOID) {
-      ++args_seen;
-      if (_outgoing.at(i).basic_type() == T_LONG ||
-          _outgoing.at(i).basic_type() == T_DOUBLE) {
-        assert(_outgoing.at(i + 1).basic_type() == T_VOID, "should only follow two word");
-      }
-    } else {
-      assert(_outgoing.at(i - 1).basic_type() == T_LONG ||
-             _outgoing.at(i - 1).basic_type() == T_DOUBLE, "should only follow two word");
-    }
-  }
-  return args_seen;
-}
-#endif
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleWalker::retype_raw_conversion
-//
-// Do the raw retype conversions for OP_RETYPE_RAW.
-void MethodHandleWalker::retype_raw_conversion(BasicType src, BasicType dst, bool for_return, int slot, TRAPS) {
-  if (src != dst) {
-    if (MethodHandles::same_basic_type_for_returns(src, dst, /*raw*/ true)) {
-      if (MethodHandles::is_float_fixed_reinterpretation_cast(src, dst)) {
-        vmIntrinsics::ID iid = vmIntrinsics::for_raw_conversion(src, dst);
-        if (iid == vmIntrinsics::_none) {
-          lose("no raw conversion method", CHECK);
-        }
-        ArgToken arglist[2];
-        if (!for_return) {
-          // argument type conversion
-          ArgToken arg = _outgoing.at(slot);
-          assert(arg.token_type() >= tt_symbolic || src == arg.basic_type(), "sanity");
-          arglist[0] = arg;         // outgoing 'this'
-          arglist[1] = ArgToken();  // sentinel
-          arg = make_invoke(methodHandle(), iid, Bytecodes::_invokestatic, false, 1, &arglist[0], CHECK);
-          change_argument(src, slot, dst, arg);
-        } else {
-          // return type conversion
-          if (_return_conv == vmIntrinsics::_none) {
-            _return_conv = iid;
-          } else if (_return_conv == vmIntrinsics::for_raw_conversion(dst, src)) {
-            _return_conv = vmIntrinsics::_none;
-          } else if (_return_conv != zero_return_conv()) {
-            lose(err_msg("requested raw return conversion not allowed: %s -> %s (before %s)", type2name(src), type2name(dst), vmIntrinsics::name_at(_return_conv)), CHECK);
-          }
-        }
-      } else {
-        // Nothing to do.
-      }
-    } else if (for_return && (!is_subword_type(src) || !is_subword_type(dst))) {
-      // This can occur in exception-throwing MHs, which have a fictitious return value encoded as Void or Empty.
-      _return_conv = zero_return_conv();
-    } else if (src == T_OBJECT && is_java_primitive(dst)) {
-      // ref-to-prim: discard ref, push zero
-      lose("requested ref-to-prim conversion not expected", CHECK);
-    } else {
-      lose(err_msg("requested raw conversion not allowed: %s -> %s", type2name(src), type2name(dst)), CHECK);
-    }
-  }
-}
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleCompiler
-
-MethodHandleCompiler::MethodHandleCompiler(Handle root, Symbol* name, Symbol* signature, int invoke_count, bool is_invokedynamic, TRAPS)
-  : MethodHandleWalker(root, is_invokedynamic, THREAD),
-    _invoke_count(invoke_count),
-    _thread(THREAD),
-    _bytecode(THREAD, 50),
-    _constants(THREAD, 10),
-    _non_bcp_klasses(THREAD, 5),
-    _cur_stack(0),
-    _max_stack(0),
-    _rtype(T_ILLEGAL),
-    _selectAlternative_bci(-1),
-    _taken_count(0),
-    _not_taken_count(0)
-{
-
-  // Element zero is always the null constant.
-  (void) _constants.append(NULL);
-
-  // Set name and signature index.
-  _name_index      = cpool_symbol_put(name);
-  _signature_index = cpool_symbol_put(signature);
-
-  // To make the resulting methods more recognizable by
-  // stack walkers and compiler heuristics,
-  // we put them in holder class MethodHandle.
-  // See klass_is_method_handle_adapter_holder
-  // and methodOopDesc::is_method_handle_adapter.
-  _target_klass = SystemDictionaryHandles::MethodHandle_klass();
-
-  check_non_bcp_klasses(java_lang_invoke_MethodHandle::type(root()), CHECK);
-
-  // Get return type klass.
-  Handle first_mtype(THREAD, chain().method_type_oop());
-  // _rklass is NULL for primitives.
-  _rtype = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(first_mtype()), &_rklass);
-  if (_rtype == T_ARRAY)  _rtype = T_OBJECT;
-
-  ArgumentSizeComputer args(signature);
-  int params = args.size() + 1;  // Incoming arguments plus receiver.
-  _num_params = for_invokedynamic() ? params - 1 : params;  // XXX Check if callee is static?
-}
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleCompiler::compile
-//
-// Compile this MethodHandle into a bytecode adapter and return a
-// methodOop.
-methodHandle MethodHandleCompiler::compile(TRAPS) {
-  assert(_thread == THREAD, "must be same thread");
-  methodHandle nullHandle;
-  (void) walk(CHECK_(nullHandle));
-  record_non_bcp_klasses();
-  return get_method_oop(CHECK_(nullHandle));
-}
-
-
-void MethodHandleCompiler::emit_bc(Bytecodes::Code op, int index, int args_size) {
-  Bytecodes::check(op);  // Are we legal?
-
-  switch (op) {
-  // b
-  case Bytecodes::_aconst_null:
-  case Bytecodes::_iconst_m1:
-  case Bytecodes::_iconst_0:
-  case Bytecodes::_iconst_1:
-  case Bytecodes::_iconst_2:
-  case Bytecodes::_iconst_3:
-  case Bytecodes::_iconst_4:
-  case Bytecodes::_iconst_5:
-  case Bytecodes::_lconst_0:
-  case Bytecodes::_lconst_1:
-  case Bytecodes::_fconst_0:
-  case Bytecodes::_fconst_1:
-  case Bytecodes::_fconst_2:
-  case Bytecodes::_dconst_0:
-  case Bytecodes::_dconst_1:
-  case Bytecodes::_iload_0:
-  case Bytecodes::_iload_1:
-  case Bytecodes::_iload_2:
-  case Bytecodes::_iload_3:
-  case Bytecodes::_lload_0:
-  case Bytecodes::_lload_1:
-  case Bytecodes::_lload_2:
-  case Bytecodes::_lload_3:
-  case Bytecodes::_fload_0:
-  case Bytecodes::_fload_1:
-  case Bytecodes::_fload_2:
-  case Bytecodes::_fload_3:
-  case Bytecodes::_dload_0:
-  case Bytecodes::_dload_1:
-  case Bytecodes::_dload_2:
-  case Bytecodes::_dload_3:
-  case Bytecodes::_aload_0:
-  case Bytecodes::_aload_1:
-  case Bytecodes::_aload_2:
-  case Bytecodes::_aload_3:
-  case Bytecodes::_istore_0:
-  case Bytecodes::_istore_1:
-  case Bytecodes::_istore_2:
-  case Bytecodes::_istore_3:
-  case Bytecodes::_lstore_0:
-  case Bytecodes::_lstore_1:
-  case Bytecodes::_lstore_2:
-  case Bytecodes::_lstore_3:
-  case Bytecodes::_fstore_0:
-  case Bytecodes::_fstore_1:
-  case Bytecodes::_fstore_2:
-  case Bytecodes::_fstore_3:
-  case Bytecodes::_dstore_0:
-  case Bytecodes::_dstore_1:
-  case Bytecodes::_dstore_2:
-  case Bytecodes::_dstore_3:
-  case Bytecodes::_astore_0:
-  case Bytecodes::_astore_1:
-  case Bytecodes::_astore_2:
-  case Bytecodes::_astore_3:
-  case Bytecodes::_iand:
-  case Bytecodes::_i2l:
-  case Bytecodes::_i2f:
-  case Bytecodes::_i2d:
-  case Bytecodes::_i2b:
-  case Bytecodes::_i2c:
-  case Bytecodes::_i2s:
-  case Bytecodes::_l2i:
-  case Bytecodes::_l2f:
-  case Bytecodes::_l2d:
-  case Bytecodes::_f2i:
-  case Bytecodes::_f2l:
-  case Bytecodes::_f2d:
-  case Bytecodes::_d2i:
-  case Bytecodes::_d2l:
-  case Bytecodes::_d2f:
-  case Bytecodes::_iaload:
-  case Bytecodes::_laload:
-  case Bytecodes::_faload:
-  case Bytecodes::_daload:
-  case Bytecodes::_aaload:
-  case Bytecodes::_baload:
-  case Bytecodes::_caload:
-  case Bytecodes::_saload:
-  case Bytecodes::_ireturn:
-  case Bytecodes::_lreturn:
-  case Bytecodes::_freturn:
-  case Bytecodes::_dreturn:
-  case Bytecodes::_areturn:
-  case Bytecodes::_return:
-    assert(Bytecodes::format_bits(op, false) == Bytecodes::_fmt_b, "wrong bytecode format");
-    _bytecode.push(op);
-    break;
-
-  // bi
-  case Bytecodes::_ldc:
-    assert(Bytecodes::format_bits(op, false) == (Bytecodes::_fmt_b|Bytecodes::_fmt_has_k), "wrong bytecode format");
-    if (index == (index & 0xff)) {
-      _bytecode.push(op);
-      _bytecode.push(index);
-    } else {
-      _bytecode.push(Bytecodes::_ldc_w);
-      _bytecode.push(index >> 8);
-      _bytecode.push(index);
-    }
-    break;
-
-  case Bytecodes::_iload:
-  case Bytecodes::_lload:
-  case Bytecodes::_fload:
-  case Bytecodes::_dload:
-  case Bytecodes::_aload:
-  case Bytecodes::_istore:
-  case Bytecodes::_lstore:
-  case Bytecodes::_fstore:
-  case Bytecodes::_dstore:
-  case Bytecodes::_astore:
-    assert(Bytecodes::format_bits(op, false) == Bytecodes::_fmt_bi, "wrong bytecode format");
-    if (index == (index & 0xff)) {
-      _bytecode.push(op);
-      _bytecode.push(index);
-    } else {
-      // doesn't fit in a u2
-      _bytecode.push(Bytecodes::_wide);
-      _bytecode.push(op);
-      _bytecode.push(index >> 8);
-      _bytecode.push(index);
-    }
-    break;
-
-  // bkk
-  case Bytecodes::_ldc_w:
-  case Bytecodes::_ldc2_w:
-  case Bytecodes::_checkcast:
-    assert(Bytecodes::format_bits(op, false) == Bytecodes::_fmt_bkk, "wrong bytecode format");
-    assert((unsigned short) index == index, "index does not fit in 16-bit");
-    _bytecode.push(op);
-    _bytecode.push(index >> 8);
-    _bytecode.push(index);
-    break;
-
-  // bJJ
-  case Bytecodes::_invokestatic:
-  case Bytecodes::_invokespecial:
-  case Bytecodes::_invokevirtual:
-    assert(Bytecodes::format_bits(op, false) == Bytecodes::_fmt_bJJ, "wrong bytecode format");
-    assert((unsigned short) index == index, "index does not fit in 16-bit");
-    _bytecode.push(op);
-    _bytecode.push(index >> 8);
-    _bytecode.push(index);
-    break;
-
-  case Bytecodes::_invokeinterface:
-    assert(Bytecodes::format_bits(op, false) == Bytecodes::_fmt_bJJ, "wrong bytecode format");
-    assert((unsigned short) index == index, "index does not fit in 16-bit");
-    assert(args_size > 0, "valid args_size");
-    _bytecode.push(op);
-    _bytecode.push(index >> 8);
-    _bytecode.push(index);
-    _bytecode.push(args_size);
-    _bytecode.push(0);
-    break;
-
-  case Bytecodes::_ifeq:
-    assert((unsigned short) index == index, "index does not fit in 16-bit");
-    _bytecode.push(op);
-    _bytecode.push(index >> 8);
-    _bytecode.push(index);
-    break;
-
-  default:
-    ShouldNotReachHere();
-  }
-}
-
-void MethodHandleCompiler::update_branch_dest(int src, int dst) {
-  switch (_bytecode.at(src)) {
-    case Bytecodes::_ifeq:
-      dst -= src; // compute the offset
-      assert((unsigned short) dst == dst, "index does not fit in 16-bit");
-      _bytecode.at_put(src + 1, dst >> 8);
-      _bytecode.at_put(src + 2, dst);
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-}
-
-void MethodHandleCompiler::emit_load(ArgToken arg) {
-  TokenType tt = arg.token_type();
-  BasicType bt = arg.basic_type();
-
-  switch (tt) {
-    case tt_parameter:
-    case tt_temporary:
-      emit_load(bt, arg.index());
-      break;
-    case tt_constant:
-      emit_load_constant(arg);
-      break;
-    case tt_illegal:
-    case tt_void:
-    default:
-      ShouldNotReachHere();
-  }
-}
-
-
-void MethodHandleCompiler::emit_load(BasicType bt, int index) {
-  if (index <= 3) {
-    switch (bt) {
-    case T_BOOLEAN: case T_BYTE: case T_CHAR: case T_SHORT:
-    case T_INT:    emit_bc(Bytecodes::cast(Bytecodes::_iload_0 + index)); break;
-    case T_LONG:   emit_bc(Bytecodes::cast(Bytecodes::_lload_0 + index)); break;
-    case T_FLOAT:  emit_bc(Bytecodes::cast(Bytecodes::_fload_0 + index)); break;
-    case T_DOUBLE: emit_bc(Bytecodes::cast(Bytecodes::_dload_0 + index)); break;
-    case T_OBJECT: emit_bc(Bytecodes::cast(Bytecodes::_aload_0 + index)); break;
-    default:
-      ShouldNotReachHere();
-    }
-  }
-  else {
-    switch (bt) {
-    case T_BOOLEAN: case T_BYTE: case T_CHAR: case T_SHORT:
-    case T_INT:    emit_bc(Bytecodes::_iload, index); break;
-    case T_LONG:   emit_bc(Bytecodes::_lload, index); break;
-    case T_FLOAT:  emit_bc(Bytecodes::_fload, index); break;
-    case T_DOUBLE: emit_bc(Bytecodes::_dload, index); break;
-    case T_OBJECT: emit_bc(Bytecodes::_aload, index); break;
-    default:
-      ShouldNotReachHere();
-    }
-  }
-  stack_push(bt);
-}
-
-void MethodHandleCompiler::emit_store(BasicType bt, int index) {
-  if (index <= 3) {
-    switch (bt) {
-    case T_BOOLEAN: case T_BYTE: case T_CHAR: case T_SHORT:
-    case T_INT:    emit_bc(Bytecodes::cast(Bytecodes::_istore_0 + index)); break;
-    case T_LONG:   emit_bc(Bytecodes::cast(Bytecodes::_lstore_0 + index)); break;
-    case T_FLOAT:  emit_bc(Bytecodes::cast(Bytecodes::_fstore_0 + index)); break;
-    case T_DOUBLE: emit_bc(Bytecodes::cast(Bytecodes::_dstore_0 + index)); break;
-    case T_OBJECT: emit_bc(Bytecodes::cast(Bytecodes::_astore_0 + index)); break;
-    default:
-      ShouldNotReachHere();
-    }
-  }
-  else {
-    switch (bt) {
-    case T_BOOLEAN: case T_BYTE: case T_CHAR: case T_SHORT:
-    case T_INT:    emit_bc(Bytecodes::_istore, index); break;
-    case T_LONG:   emit_bc(Bytecodes::_lstore, index); break;
-    case T_FLOAT:  emit_bc(Bytecodes::_fstore, index); break;
-    case T_DOUBLE: emit_bc(Bytecodes::_dstore, index); break;
-    case T_OBJECT: emit_bc(Bytecodes::_astore, index); break;
-    default:
-      ShouldNotReachHere();
-    }
-  }
-  stack_pop(bt);
-}
-
-
-void MethodHandleCompiler::emit_load_constant(ArgToken arg) {
-  BasicType bt = arg.basic_type();
-  if (is_subword_type(bt)) bt = T_INT;
-  switch (bt) {
-  case T_INT: {
-    jint value = arg.get_jint();
-    if (-1 <= value && value <= 5)
-      emit_bc(Bytecodes::cast(Bytecodes::_iconst_0 + value));
-    else
-      emit_bc(Bytecodes::_ldc, cpool_int_put(value));
-    break;
-  }
-  case T_LONG: {
-    jlong value = arg.get_jlong();
-    if (0 <= value && value <= 1)
-      emit_bc(Bytecodes::cast(Bytecodes::_lconst_0 + (int) value));
-    else
-      emit_bc(Bytecodes::_ldc2_w, cpool_long_put(value));
-    break;
-  }
-  case T_FLOAT: {
-    jfloat value  = arg.get_jfloat();
-    if (value == 0.0 || value == 1.0 || value == 2.0)
-      emit_bc(Bytecodes::cast(Bytecodes::_fconst_0 + (int) value));
-    else
-      emit_bc(Bytecodes::_ldc, cpool_float_put(value));
-    break;
-  }
-  case T_DOUBLE: {
-    jdouble value = arg.get_jdouble();
-    if (value == 0.0 || value == 1.0)
-      emit_bc(Bytecodes::cast(Bytecodes::_dconst_0 + (int) value));
-    else
-      emit_bc(Bytecodes::_ldc2_w, cpool_double_put(value));
-    break;
-  }
-  case T_OBJECT: {
-    Handle value = arg.object();
-    if (value.is_null()) {
-      emit_bc(Bytecodes::_aconst_null);
-      break;
-    }
-    if (java_lang_Class::is_instance(value())) {
-      klassOop k = java_lang_Class::as_klassOop(value());
-      if (k != NULL) {
-        emit_bc(Bytecodes::_ldc, cpool_klass_put(k));
-        break;
-      }
-    }
-    emit_bc(Bytecodes::_ldc, cpool_object_put(value));
-    break;
-  }
-  default:
-    ShouldNotReachHere();
-  }
-  stack_push(bt);
-}
-
-
-MethodHandleWalker::ArgToken
-MethodHandleCompiler::make_conversion(BasicType type, klassOop tk, Bytecodes::Code op,
-                                      const ArgToken& src, TRAPS) {
-
-  BasicType srctype = src.basic_type();
-  TokenType tt = src.token_type();
-  int index = -1;
-
-  switch (op) {
-  case Bytecodes::_i2l:
-  case Bytecodes::_i2f:
-  case Bytecodes::_i2d:
-  case Bytecodes::_i2b:
-  case Bytecodes::_i2c:
-  case Bytecodes::_i2s:
-
-  case Bytecodes::_l2i:
-  case Bytecodes::_l2f:
-  case Bytecodes::_l2d:
-
-  case Bytecodes::_f2i:
-  case Bytecodes::_f2l:
-  case Bytecodes::_f2d:
-
-  case Bytecodes::_d2i:
-  case Bytecodes::_d2l:
-  case Bytecodes::_d2f:
-    if (tt == tt_constant) {
-      emit_load_constant(src);
-    } else {
-      emit_load(srctype, src.index());
-    }
-    stack_pop(srctype);  // pop the src type
-    emit_bc(op);
-    stack_push(type);    // push the dest value
-    if (tt != tt_constant)
-      index = src.index();
-    if (srctype != type || index == -1)
-      index = new_local_index(type);
-    emit_store(type, index);
-    break;
-
-  case Bytecodes::_checkcast:
-    if (tt == tt_constant) {
-      emit_load_constant(src);
-    } else {
-      emit_load(srctype, src.index());
-      index = src.index();
-    }
-    emit_bc(op, cpool_klass_put(tk));
-    check_non_bcp_klass(tk, CHECK_(src));
-    // Allocate a new local for the type so that we don't hide the
-    // previous type from the verifier.
-    index = new_local_index(type);
-    emit_store(srctype, index);
-    break;
-
-  case Bytecodes::_nop:
-    // nothing to do
-    return src;
-
-  default:
-    if (op == Bytecodes::_illegal)
-      lose(err_msg("no such primitive conversion: %s -> %s", type2name(src.basic_type()), type2name(type)), THREAD);
-    else
-      lose(err_msg("bad primitive conversion op: %s", Bytecodes::name(op)), THREAD);
-    return make_prim_constant(type, &zero_jvalue, THREAD);
-  }
-
-  return make_parameter(type, tk, index, THREAD);
-}
-
-
-// -----------------------------------------------------------------------------
-// MethodHandleCompiler
-//
-
-// Values used by the compiler.
-jvalue MethodHandleCompiler::zero_jvalue = { 0 };
-jvalue MethodHandleCompiler::one_jvalue  = { 1 };
-
-// Fetch any values from CountingMethodHandles and capture them for profiles
-bool MethodHandleCompiler::fetch_counts(ArgToken arg1, ArgToken arg2) {
-  int count1 = -1, count2 = -1;
-  if (arg1.token_type() == tt_constant && arg1.basic_type() == T_OBJECT &&
-      java_lang_invoke_CountingMethodHandle::is_instance(arg1.object()())) {
-    count1 = java_lang_invoke_CountingMethodHandle::vmcount(arg1.object()());
-  }
-  if (arg2.token_type() == tt_constant && arg2.basic_type() == T_OBJECT &&
-      java_lang_invoke_CountingMethodHandle::is_instance(arg2.object()())) {
-    count2 = java_lang_invoke_CountingMethodHandle::vmcount(arg2.object()());
-  }
-  int total = count1 + count2;
-  if (count1 != -1 && count2 != -1 && total != 0) {
-    // Normalize the collect counts to the invoke_count
-    if (count1 != 0) _not_taken_count = (int)(_invoke_count * count1 / (double)total);
-    if (count2 != 0) _taken_count = (int)(_invoke_count * count2 / (double)total);
-    return true;
-  }
-  return false;
-}
-
-// Emit bytecodes for the given invoke instruction.
-MethodHandleWalker::ArgToken
-MethodHandleCompiler::make_invoke(methodHandle m, vmIntrinsics::ID iid,
-                                  Bytecodes::Code op, bool tailcall,
-                                  int argc, MethodHandleWalker::ArgToken* argv,
-                                  TRAPS) {
-  ArgToken zero;
-  if (m.is_null()) {
-    // Get the intrinsic methodOop.
-    m = methodHandle(THREAD, vmIntrinsics::method_for(iid));
-    if (m.is_null()) {
-      lose(vmIntrinsics::name_at(iid), CHECK_(zero));
-    }
-  }
-
-  klassOop klass     = m->method_holder();
-  Symbol*  name      = m->name();
-  Symbol*  signature = m->signature();
-
-  if (iid == vmIntrinsics::_invokeGeneric &&
-      argc >= 1 && argv[0].token_type() == tt_constant) {
-    assert(m->intrinsic_id() == vmIntrinsics::_invokeExact, "");
-    Handle receiver = argv[0].object();
-    Handle rtype(THREAD, java_lang_invoke_MethodHandle::type(receiver()));
-    Handle mtype(THREAD, m->method_handle_type());
-    if (rtype() != mtype()) {
-      assert(java_lang_invoke_MethodType::form(rtype()) ==
-             java_lang_invoke_MethodType::form(mtype()),
-             "must be the same shape");
-      // customize m to the exact required rtype
-      bool has_non_bcp_klass = check_non_bcp_klasses(rtype(), CHECK_(zero));
-      TempNewSymbol sig2 = java_lang_invoke_MethodType::as_signature(rtype(), true, CHECK_(zero));
-      methodHandle m2;
-      if (!has_non_bcp_klass) {
-        methodOop m2_oop = SystemDictionary::find_method_handle_invoke(m->name(), sig2,
-                                                                       KlassHandle(), CHECK_(zero));
-        m2 = methodHandle(THREAD, m2_oop);
-      }
-      if (m2.is_null()) {
-        // just build it fresh
-        m2 = methodOopDesc::make_invoke_method(klass, m->name(), sig2, rtype, CHECK_(zero));
-        if (m2.is_null())
-          lose(err_msg("no customized invoker %s", sig2->as_utf8()), CHECK_(zero));
-      }
-      m = m2;
-      signature = m->signature();
-    }
-  }
-
-  if (m->intrinsic_id() == vmIntrinsics::_selectAlternative &&
-      fetch_counts(argv[1], argv[2])) {
-    assert(argc == 3, "three arguments");
-    assert(tailcall, "only");
-
-    // do inline bytecodes so we can drop profile data into it,
-    //   0:   iload_0
-    emit_load(argv[0]);
-    //   1:   ifeq    8
-    _selectAlternative_bci = _bytecode.length();
-    emit_bc(Bytecodes::_ifeq, 0); // emit placeholder offset
-    //   4:   aload_1
-    emit_load(argv[1]);
-    //   5:   areturn;
-    emit_bc(Bytecodes::_areturn);
-    //   8:   aload_2
-    update_branch_dest(_selectAlternative_bci, cur_bci());
-    emit_load(argv[2]);
-    //   9:   areturn
-    emit_bc(Bytecodes::_areturn);
-    return ArgToken();  // Dummy return value.
-  }
-
-  check_non_bcp_klass(klass, CHECK_(zero));
-  if (m->is_method_handle_invoke()) {
-    check_non_bcp_klasses(m->method_handle_type(), CHECK_(zero));
-  }
-
-  // Count the number of arguments, not the size
-  ArgumentCount asc(signature);
-  assert(argc == asc.size() + ((op == Bytecodes::_invokestatic || op == Bytecodes::_invokedynamic) ? 0 : 1),
-         "argc mismatch");
-
-  for (int i = 0; i < argc; i++) {
-    ArgToken arg = argv[i];
-    TokenType tt = arg.token_type();
-    BasicType bt = arg.basic_type();
-
-    switch (tt) {
-    case tt_parameter:
-    case tt_temporary:
-      emit_load(bt, arg.index());
-      break;
-    case tt_constant:
-      emit_load_constant(arg);
-      break;
-    case tt_illegal:
-      // Sentinel.
-      assert(i == (argc - 1), "sentinel must be last entry");
-      break;
-    case tt_void:
-    default:
-      ShouldNotReachHere();
-    }
-  }
-
-  // Populate constant pool.
-  int name_index          = cpool_symbol_put(name);
-  int signature_index     = cpool_symbol_put(signature);
-  int name_and_type_index = cpool_name_and_type_put(name_index, signature_index);
-  int klass_index         = cpool_klass_put(klass);
-  int methodref_index     = cpool_methodref_put(op, klass_index, name_and_type_index, m);
-
-  // Generate invoke.
-  switch (op) {
-  case Bytecodes::_invokestatic:
-  case Bytecodes::_invokespecial:
-  case Bytecodes::_invokevirtual:
-    emit_bc(op, methodref_index);
-    break;
-
-  case Bytecodes::_invokeinterface: {
-    ArgumentSizeComputer asc(signature);
-    emit_bc(op, methodref_index, asc.size() + 1);
-    break;
-  }
-
-  default:
-    ShouldNotReachHere();
-  }
-
-  // If tailcall, we have walked all the way to a direct method handle.
-  // Otherwise, make a recursive call to some helper routine.
-  BasicType rbt = m->result_type();
-  if (rbt == T_ARRAY)  rbt = T_OBJECT;
-  stack_push(rbt);  // The return value is already pushed onto the stack.
-  ArgToken ret;
-  if (tailcall) {
-    if (return_conv() == zero_return_conv()) {
-      rbt = T_VOID;  // discard value
-    } else if (return_conv() != vmIntrinsics::_none) {
-      // return value conversion
-      int index = new_local_index(rbt);
-      emit_store(rbt, index);
-      ArgToken arglist[2];
-      arglist[0] = ArgToken(tt_temporary, rbt, index);
-      arglist[1] = ArgToken();  // sentinel
-      ret = make_invoke(methodHandle(), return_conv(), Bytecodes::_invokestatic, false, 1, &arglist[0], CHECK_(zero));
-      set_return_conv(vmIntrinsics::_none);
-      rbt = ret.basic_type();
-      emit_load(rbt, ret.index());
-    }
-    if (rbt != _rtype) {
-      if (rbt == T_VOID) {
-        // push a zero of the right sort
-        if (_rtype == T_OBJECT) {
-          zero = make_oop_constant(NULL, CHECK_(zero));
-        } else {
-          zero = make_prim_constant(_rtype, &zero_jvalue, CHECK_(zero));
-        }
-        emit_load_constant(zero);
-      } else if (_rtype == T_VOID) {
-        // We'll emit a _return with something on the stack.
-        // It's OK to ignore what's on the stack.
-      } else if (rbt == T_INT && is_subword_type(_rtype)) {
-        // Convert value to match return type.
-        switch (_rtype) {
-        case T_BOOLEAN: {
-          // boolean is treated as a one-bit unsigned integer.
-          // Cf. API documentation: java/lang/invoke/MethodHandles.html#explicitCastArguments
-          ArgToken one = make_prim_constant(T_INT, &one_jvalue, CHECK_(zero));
-          emit_load_constant(one);
-          emit_bc(Bytecodes::_iand);
-          break;
-        }
-        case T_BYTE:    emit_bc(Bytecodes::_i2b); break;
-        case T_CHAR:    emit_bc(Bytecodes::_i2c); break;
-        case T_SHORT:   emit_bc(Bytecodes::_i2s); break;
-        default: ShouldNotReachHere();
-        }
-      } else if (is_subword_type(rbt) && (is_subword_type(_rtype) || (_rtype == T_INT))) {
-        // The subword type was returned as an int and will be passed
-        // on as an int.
-      } else {
-        lose("unknown conversion", CHECK_(zero));
-      }
-    }
-    switch (_rtype) {
-    case T_BOOLEAN: case T_BYTE: case T_CHAR: case T_SHORT:
-    case T_INT:    emit_bc(Bytecodes::_ireturn); break;
-    case T_LONG:   emit_bc(Bytecodes::_lreturn); break;
-    case T_FLOAT:  emit_bc(Bytecodes::_freturn); break;
-    case T_DOUBLE: emit_bc(Bytecodes::_dreturn); break;
-    case T_VOID:   emit_bc(Bytecodes::_return);  break;
-    case T_OBJECT:
-      if (_rklass.not_null() && _rklass() != SystemDictionary::Object_klass() && !Klass::cast(_rklass())->is_interface()) {
-        emit_bc(Bytecodes::_checkcast, cpool_klass_put(_rklass()));
-        check_non_bcp_klass(_rklass(), CHECK_(zero));
-      }
-      emit_bc(Bytecodes::_areturn);
-      break;
-    default: ShouldNotReachHere();
-    }
-    ret = ArgToken();  // Dummy return value.
-  }
-  else {
-    int index = new_local_index(rbt);
-    switch (rbt) {
-    case T_BOOLEAN: case T_BYTE: case T_CHAR:  case T_SHORT:
-    case T_INT:     case T_LONG: case T_FLOAT: case T_DOUBLE:
-    case T_OBJECT:
-      emit_store(rbt, index);
-      ret = ArgToken(tt_temporary, rbt, index);
-      break;
-    case T_VOID:
-      ret = ArgToken(tt_void);
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-  }
-
-  return ret;
-}
-
-MethodHandleWalker::ArgToken
-MethodHandleCompiler::make_fetch(BasicType type, klassOop tk, Bytecodes::Code op,
-                                 const MethodHandleWalker::ArgToken& base,
-                                 const MethodHandleWalker::ArgToken& offset,
-                                 TRAPS) {
-  switch (base.token_type()) {
-    case tt_parameter:
-    case tt_temporary:
-      emit_load(base.basic_type(), base.index());
-      break;
-    case tt_constant:
-      emit_load_constant(base);
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-  switch (offset.token_type()) {
-    case tt_parameter:
-    case tt_temporary:
-      emit_load(offset.basic_type(), offset.index());
-      break;
-    case tt_constant:
-      emit_load_constant(offset);
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-  emit_bc(op);
-  int index = new_local_index(type);
-  emit_store(type, index);
-  return ArgToken(tt_temporary, type, index);
-}
-
-
-int MethodHandleCompiler::cpool_primitive_put(BasicType bt, jvalue* con) {
-  jvalue con_copy;
-  assert(bt < T_OBJECT, "");
-  if (type2aelembytes(bt) < jintSize) {
-    // widen to int
-    con_copy = (*con);
-    con = &con_copy;
-    switch (bt) {
-    case T_BOOLEAN: con->i = (con->z ? 1 : 0); break;
-    case T_BYTE:    con->i = con->b;           break;
-    case T_CHAR:    con->i = con->c;           break;
-    case T_SHORT:   con->i = con->s;           break;
-    default: ShouldNotReachHere();
-    }
-    bt = T_INT;
-  }
-
-//   for (int i = 1, imax = _constants.length(); i < imax; i++) {
-//     ConstantValue* con = _constants.at(i);
-//     if (con != NULL && con->is_primitive() && con.basic_type() == bt) {
-//       bool match = false;
-//       switch (type2size[bt]) {
-//       case 1:  if (pcon->_value.i == con->i)  match = true;  break;
-//       case 2:  if (pcon->_value.j == con->j)  match = true;  break;
-//       }
-//       if (match)
-//         return i;
-//     }
-//   }
-  ConstantValue* cv = new ConstantValue(bt, *con);
-  int index = _constants.append(cv);
-
-  // long and double entries take 2 slots, we add another empty entry.
-  if (type2size[bt] == 2)
-    (void) _constants.append(NULL);
-
-  return index;
-}
-
-bool MethodHandleCompiler::check_non_bcp_klasses(Handle method_type, TRAPS) {
-  bool res = false;
-  for (int i = -1, len = java_lang_invoke_MethodType::ptype_count(method_type()); i < len; i++) {
-    oop ptype = (i == -1
-                 ? java_lang_invoke_MethodType::rtype(method_type())
-                 : java_lang_invoke_MethodType::ptype(method_type(), i));
-    res |= check_non_bcp_klass(java_lang_Class::as_klassOop(ptype), CHECK_(false));
-  }
-  return res;
-}
-
-bool MethodHandleCompiler::check_non_bcp_klass(klassOop klass, TRAPS) {
-  klass = methodOopDesc::check_non_bcp_klass(klass);
-  if (klass != NULL) {
-    Symbol* name = Klass::cast(klass)->name();
-    for (int i = _non_bcp_klasses.length() - 1; i >= 0; i--) {
-      klassOop k2 = _non_bcp_klasses.at(i)();
-      if (Klass::cast(k2)->name() == name) {
-        if (k2 != klass) {
-          lose(err_msg("unsupported klass name alias %s", name->as_utf8()), THREAD);
-        }
-        return true;
-      }
-    }
-    _non_bcp_klasses.append(KlassHandle(THREAD, klass));
-    return true;
-  }
-  return false;
-}
-
-void MethodHandleCompiler::record_non_bcp_klasses() {
-  // Append extra klasses to constant pool, to guide klass lookup.
-  for (int k = 0; k < _non_bcp_klasses.length(); k++) {
-    klassOop non_bcp_klass = _non_bcp_klasses.at(k)();
-    bool add_to_cp = true;
-    for (int j = 1; j < _constants.length(); j++) {
-      ConstantValue* cv = _constants.at(j);
-      if (cv != NULL && cv->tag() == JVM_CONSTANT_Class
-          && cv->klass_oop() == non_bcp_klass) {
-        add_to_cp = false;
-        break;
-      }
-    }
-    if (add_to_cp)  cpool_klass_put(non_bcp_klass);
-  }
-}
-
-constantPoolHandle MethodHandleCompiler::get_constant_pool(TRAPS) const {
-  constantPoolHandle nullHandle;
-  constantPoolOop cpool_oop = oopFactory::new_constantPool(_constants.length(),
-                                                           oopDesc::IsSafeConc,
-                                                           CHECK_(nullHandle));
-  constantPoolHandle cpool(THREAD, cpool_oop);
-
-  // Fill the real constant pool skipping the zero element.
-  for (int i = 1; i < _constants.length(); i++) {
-    ConstantValue* cv = _constants.at(i);
-    switch (cv->tag()) {
-    case JVM_CONSTANT_Utf8:        cpool->symbol_at_put(       i, cv->symbol()                         ); break;
-    case JVM_CONSTANT_Integer:     cpool->int_at_put(          i, cv->get_jint()                       ); break;
-    case JVM_CONSTANT_Float:       cpool->float_at_put(        i, cv->get_jfloat()                     ); break;
-    case JVM_CONSTANT_Long:        cpool->long_at_put(         i, cv->get_jlong()                      ); break;
-    case JVM_CONSTANT_Double:      cpool->double_at_put(       i, cv->get_jdouble()                    ); break;
-    case JVM_CONSTANT_Class:       cpool->klass_at_put(        i, cv->klass_oop()                      ); break;
-    case JVM_CONSTANT_Methodref:   cpool->method_at_put(       i, cv->first_index(), cv->second_index()); break;
-    case JVM_CONSTANT_InterfaceMethodref:
-                                cpool->interface_method_at_put(i, cv->first_index(), cv->second_index()); break;
-    case JVM_CONSTANT_NameAndType: cpool->name_and_type_at_put(i, cv->first_index(), cv->second_index()); break;
-    case JVM_CONSTANT_Object:      cpool->object_at_put(       i, cv->object_oop()                     ); break;
-    default: ShouldNotReachHere();
-    }
-
-    switch (cv->tag()) {
-    case JVM_CONSTANT_Long:
-    case JVM_CONSTANT_Double:
-      i++;  // Skip empty entry.
-      assert(_constants.at(i) == NULL, "empty entry");
-      break;
-    }
-  }
-
-  cpool->set_preresolution();
-
-  // Set the constant pool holder to the target method's class.
-  cpool->set_pool_holder(_target_klass());
-
-  return cpool;
-}
-
-
-methodHandle MethodHandleCompiler::get_method_oop(TRAPS) {
-  methodHandle empty;
-  // Create a method that holds the generated bytecode.  invokedynamic
-  // has no receiver, normal MH calls do.
-  int flags_bits;
-  if (for_invokedynamic())
-    flags_bits = (/*JVM_MH_INVOKE_BITS |*/ JVM_ACC_PUBLIC | JVM_ACC_FINAL | JVM_ACC_SYNTHETIC | JVM_ACC_STATIC);
-  else
-    flags_bits = (/*JVM_MH_INVOKE_BITS |*/ JVM_ACC_PUBLIC | JVM_ACC_FINAL | JVM_ACC_SYNTHETIC);
-
-  // Create a new method
-  methodHandle m;
-  {
-    methodOop m_oop = oopFactory::new_method(bytecode_length(),
-                                             accessFlags_from(flags_bits),
-                                             0, 0, 0, oopDesc::IsSafeConc, CHECK_(empty));
-    m = methodHandle(THREAD, m_oop);
-  }
-
-  constantPoolHandle cpool = get_constant_pool(CHECK_(empty));
-  m->set_constants(cpool());
-
-  m->set_name_index(_name_index);
-  m->set_signature_index(_signature_index);
-
-  m->set_code((address) bytecode());
-
-  m->set_max_stack(_max_stack);
-  m->set_max_locals(max_locals());
-  m->set_size_of_parameters(_num_params);
-
-  typeArrayHandle exception_handlers(THREAD, Universe::the_empty_int_array());
-  m->set_exception_table(exception_handlers());
-
-  // Rewrite the method and set up the constant pool cache.
-  objArrayOop m_array = oopFactory::new_system_objArray(1, CHECK_(empty));
-  objArrayHandle methods(THREAD, m_array);
-  methods->obj_at_put(0, m());
-  Rewriter::rewrite(_target_klass(), cpool, methods, CHECK_(empty));  // Use fake class.
-  Rewriter::relocate_and_link(_target_klass(), methods, CHECK_(empty));  // Use fake class.
-
-  // Pre-resolve selected CP cache entries, to avoid problems with class loader scoping.
-  constantPoolCacheHandle cpc(THREAD, cpool->cache());
-  for (int i = 0; i < cpc->length(); i++) {
-    ConstantPoolCacheEntry* e = cpc->entry_at(i);
-    assert(!e->is_secondary_entry(), "no indy instructions in here, yet");
-    int constant_pool_index = e->constant_pool_index();
-    ConstantValue* cv = _constants.at(constant_pool_index);
-    if (!cv->has_linkage())  continue;
-    methodHandle m = cv->linkage();
-    int index;
-    switch (cv->tag()) {
-    case JVM_CONSTANT_Methodref:
-      index = m->vtable_index();
-      if (m->is_static()) {
-        e->set_method(Bytecodes::_invokestatic, m, index);
-      } else {
-        e->set_method(Bytecodes::_invokespecial, m, index);
-        e->set_method(Bytecodes::_invokevirtual, m, index);
-      }
-      break;
-    case JVM_CONSTANT_InterfaceMethodref:
-      index = klassItable::compute_itable_index(m());
-      e->set_interface_call(m, index);
-      break;
-    }
-  }
-
-  // Set the invocation counter's count to the invoke count of the
-  // original call site.
-  InvocationCounter* ic = m->invocation_counter();
-  ic->set(InvocationCounter::wait_for_compile, _invoke_count);
-
-  // Create a new MDO
-  {
-    methodDataOop mdo = oopFactory::new_methodData(m, CHECK_(empty));
-    assert(m->method_data() == NULL, "there should not be an MDO yet");
-    m->set_method_data(mdo);
-
-    bool found_selectAlternative = false;
-    // Iterate over all profile data and set the count of the counter
-    // data entries to the original call site counter.
-    for (ProfileData* profile_data = mdo->first_data();
-         mdo->is_valid(profile_data);
-         profile_data = mdo->next_data(profile_data)) {
-      if (profile_data->is_CounterData()) {
-        CounterData* counter_data = profile_data->as_CounterData();
-        counter_data->set_count(_invoke_count);
-      }
-      if (profile_data->is_BranchData() &&
-          profile_data->bci() == _selectAlternative_bci) {
-        BranchData* bd = profile_data->as_BranchData();
-        bd->set_taken(_taken_count);
-        bd->set_not_taken(_not_taken_count);
-        found_selectAlternative = true;
-      }
-    }
-    assert(_selectAlternative_bci == -1 || found_selectAlternative, "must have found profile entry");
-  }
-
-#ifndef PRODUCT
-  if (TraceMethodHandles) {
-    m->print();
-    m->print_codes();
-  }
-#endif //PRODUCT
-
-  assert(m->is_method_handle_adapter(), "must be recognized as an adapter");
-  return m;
-}
-
-
-#ifndef PRODUCT
-
-// MH printer for debugging.
-
-class MethodHandlePrinter : public MethodHandleWalker {
-private:
-  outputStream* _out;
-  bool          _verbose;
-  int           _temp_num;
-  int           _param_state;
-  stringStream  _strbuf;
-  const char* strbuf() {
-    const char* s = _strbuf.as_string();
-    _strbuf.reset();
-    return s;
-  }
-  ArgToken token(const char* str, BasicType type) {
-    return ArgToken(str, type);
-  }
-  const char* string(ArgToken token) {
-    return token.str();
-  }
-  void start_params() {
-    _param_state <<= 1;
-    _out->print("(");
-  }
-  void end_params() {
-    if (_verbose)  _out->print("\n");
-    _out->print(") => {");
-    _param_state >>= 1;
-  }
-  void put_type_name(BasicType type, klassOop tk, outputStream* s) {
-    const char* kname = NULL;
-    if (tk != NULL)
-      kname = Klass::cast(tk)->external_name();
-    s->print("%s", (kname != NULL) ? kname : type2name(type));
-  }
-  ArgToken maybe_make_temp(const char* statement_op, BasicType type, const char* temp_name) {
-    const char* value = strbuf();
-    if (!_verbose)  return token(value, type);
-    // make an explicit binding for each separate value
-    _strbuf.print("%s%d", temp_name, ++_temp_num);
-    const char* temp = strbuf();
-    _out->print("\n  %s %s %s = %s;", statement_op, type2name(type), temp, value);
-    return token(temp, type);
-  }
-
-public:
-  MethodHandlePrinter(Handle root, bool verbose, outputStream* out, TRAPS)
-    : MethodHandleWalker(root, false, THREAD),
-      _out(out),
-      _verbose(verbose),
-      _param_state(0),
-      _temp_num(0)
-  {
-    out->print("MethodHandle:");
-    java_lang_invoke_MethodType::print_signature(java_lang_invoke_MethodHandle::type(root()), out);
-    out->print(" : #");
-    start_params();
-  }
-  virtual ArgToken make_parameter(BasicType type, klassOop tk, int argnum, TRAPS) {
-    if (argnum < 0) {
-      end_params();
-      return token("return", type);
-    }
-    if ((_param_state & 1) == 0) {
-      _param_state |= 1;
-      _out->print(_verbose ? "\n  " : "");
-    } else {
-      _out->print(_verbose ? ",\n  " : ", ");
-    }
-    if (argnum >= _temp_num)
-      _temp_num = argnum;
-    // generate an argument name
-    _strbuf.print("a%d", argnum);
-    const char* arg = strbuf();
-    put_type_name(type, tk, _out);
-    _out->print(" %s", arg);
-    return token(arg, type);
-  }
-  virtual ArgToken make_oop_constant(oop con, TRAPS) {
-    if (con == NULL)
-      _strbuf.print("null");
-    else
-      con->print_value_on(&_strbuf);
-    if (_strbuf.size() == 0) {  // yuck
-      _strbuf.print("(a ");
-      put_type_name(T_OBJECT, con->klass(), &_strbuf);
-      _strbuf.print(")");
-    }
-    return maybe_make_temp("constant", T_OBJECT, "k");
-  }
-  virtual ArgToken make_prim_constant(BasicType type, jvalue* con, TRAPS) {
-    java_lang_boxing_object::print(type, con, &_strbuf);
-    return maybe_make_temp("constant", type, "k");
-  }
-  void print_bytecode_name(Bytecodes::Code op) {
-    if (Bytecodes::is_defined(op))
-      _strbuf.print("%s", Bytecodes::name(op));
-    else
-      _strbuf.print("bytecode_%d", (int) op);
-  }
-  virtual ArgToken make_conversion(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& src, TRAPS) {
-    print_bytecode_name(op);
-    _strbuf.print("(%s", string(src));
-    if (tk != NULL) {
-      _strbuf.print(", ");
-      put_type_name(type, tk, &_strbuf);
-    }
-    _strbuf.print(")");
-    return maybe_make_temp("convert", type, "v");
-  }
-  virtual ArgToken make_fetch(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& base, const ArgToken& offset, TRAPS) {
-    _strbuf.print("%s(%s, %s", Bytecodes::name(op), string(base), string(offset));
-    if (tk != NULL) {
-      _strbuf.print(", ");
-      put_type_name(type, tk, &_strbuf);
-    }
-    _strbuf.print(")");
-    return maybe_make_temp("fetch", type, "x");
-  }
-  virtual ArgToken make_invoke(methodHandle m, vmIntrinsics::ID iid,
-                               Bytecodes::Code op, bool tailcall,
-                               int argc, ArgToken* argv, TRAPS) {
-    Symbol* name;
-    Symbol* sig;
-    if (m.not_null()) {
-      name = m->name();
-      sig  = m->signature();
-    } else {
-      name = vmSymbols::symbol_at(vmIntrinsics::name_for(iid));
-      sig  = vmSymbols::symbol_at(vmIntrinsics::signature_for(iid));
-    }
-    _strbuf.print("%s %s%s(", Bytecodes::name(op), name->as_C_string(), sig->as_C_string());
-    for (int i = 0; i < argc; i++) {
-      _strbuf.print("%s%s", (i > 0 ? ", " : ""), string(argv[i]));
-    }
-    _strbuf.print(")");
-    if (!tailcall) {
-      BasicType rt = char2type(sig->byte_at(sig->utf8_length()-1));
-      if (rt == T_ILLEGAL)  rt = T_OBJECT;  // ';' at the end of '(...)L...;'
-      return maybe_make_temp("invoke", rt, "x");
-    } else {
-      const char* ret = strbuf();
-      _out->print(_verbose ? "\n  return " : " ");
-      _out->print("%s", ret);
-      _out->print(_verbose ? "\n}\n" : " }");
-    }
-    return ArgToken();
-  }
-
-  virtual void set_method_handle(oop mh) {
-    if (WizardMode && Verbose) {
-      tty->print("\n--- next target: ");
-      mh->print();
-    }
-  }
-
-  static void print(Handle root, bool verbose, outputStream* out, TRAPS) {
-    ResourceMark rm;
-    MethodHandlePrinter printer(root, verbose, out, CHECK);
-    printer.walk(CHECK);
-    out->print("\n");
-  }
-  static void print(Handle root, bool verbose = Verbose, outputStream* out = tty) {
-    Thread* THREAD = Thread::current();
-    ResourceMark rm;
-    MethodHandlePrinter printer(root, verbose, out, THREAD);
-    if (!HAS_PENDING_EXCEPTION)
-      printer.walk(THREAD);
-    if (HAS_PENDING_EXCEPTION) {
-      oop ex = PENDING_EXCEPTION;
-      CLEAR_PENDING_EXCEPTION;
-      out->print(" *** ");
-      if (printer.lose_message() != NULL)  out->print("%s ", printer.lose_message());
-      out->print("}");
-    }
-    out->print("\n");
-  }
-};
-
-extern "C"
-void print_method_handle(oop mh) {
-  if (!mh->is_oop()) {
-    tty->print_cr("*** not a method handle: "PTR_FORMAT, (intptr_t)mh);
-  } else if (java_lang_invoke_MethodHandle::is_instance(mh)) {
-    MethodHandlePrinter::print(mh);
-  } else {
-    tty->print("*** not a method handle: ");
-    mh->print();
-  }
-}
-
-#endif // PRODUCT
--- a/src/share/vm/prims/methodHandleWalk.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,486 +0,0 @@
-/*
- * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_PRIMS_METHODHANDLEWALK_HPP
-#define SHARE_VM_PRIMS_METHODHANDLEWALK_HPP
-
-#include "prims/methodHandles.hpp"
-
-// Low-level parser for method handle chains.
-class MethodHandleChain : StackObj {
-public:
-  typedef MethodHandles::EntryKind EntryKind;
-
-private:
-  Handle        _root;          // original target
-  Handle        _method_handle; // current target
-  bool          _is_last;       // final guy in chain
-  bool          _is_bound;      // has a bound argument
-  BasicType     _arg_type;      // if is_bound, the bound argument type
-  int           _arg_slot;      // if is_bound or is_adapter, affected argument slot
-  jint          _conversion;    // conversion field of AMH or -1
-  methodHandle  _last_method;   // if is_last, which method we target
-  Bytecodes::Code _last_invoke; // if is_last, type of invoke
-  const char*   _lose_message;  // saved argument to lose()
-
-  void set_method_handle(Handle target, TRAPS);
-  void set_last_method(oop target, TRAPS);
-  static BasicType compute_bound_arg_type(oop target, methodOop m, int arg_slot, TRAPS);
-
-  oop MethodHandle_type_oop()          { return java_lang_invoke_MethodHandle::type(method_handle_oop()); }
-  oop MethodHandle_vmtarget_oop()      { return java_lang_invoke_MethodHandle::vmtarget(method_handle_oop()); }
-  int MethodHandle_vmslots()           { return java_lang_invoke_MethodHandle::vmslots(method_handle_oop()); }
-  int DirectMethodHandle_vmindex()     { return java_lang_invoke_DirectMethodHandle::vmindex(method_handle_oop()); }
-  oop BoundMethodHandle_argument_oop() { return java_lang_invoke_BoundMethodHandle::argument(method_handle_oop()); }
-  int BoundMethodHandle_vmargslot()    { return java_lang_invoke_BoundMethodHandle::vmargslot(method_handle_oop()); }
-  int AdapterMethodHandle_conversion() { return java_lang_invoke_AdapterMethodHandle::conversion(method_handle_oop()); }
-
-#ifdef ASSERT
-  void print_impl(TRAPS);
-#endif
-
-public:
-  MethodHandleChain(Handle root, TRAPS)
-    : _root(root)
-  { set_method_handle(root, THREAD); }
-
-  bool is_adapter()             { return _conversion != -1; }
-  bool is_bound()               { return _is_bound; }
-  bool is_last()                { return _is_last; }
-
-  void next(TRAPS) {
-    assert(!is_last(), "");
-    set_method_handle(MethodHandle_vmtarget_oop(), THREAD);
-  }
-
-  Handle root()                 { return _root; }
-  Handle method_handle()        { return _method_handle; }
-  oop    method_handle_oop()    { return _method_handle(); }
-  oop    method_type_oop()      { return MethodHandle_type_oop(); }
-  oop    vmtarget_oop()         { return MethodHandle_vmtarget_oop(); }
-
-  jint adapter_conversion()     { assert(is_adapter(), ""); return _conversion; }
-  int  adapter_conversion_op()  { return MethodHandles::adapter_conversion_op(adapter_conversion()); }
-  BasicType adapter_conversion_src_type()
-                                { return MethodHandles::adapter_conversion_src_type(adapter_conversion()); }
-  BasicType adapter_conversion_dest_type()
-                                { return MethodHandles::adapter_conversion_dest_type(adapter_conversion()); }
-  int  adapter_conversion_stack_move()
-                                { return MethodHandles::adapter_conversion_stack_move(adapter_conversion()); }
-  int  adapter_conversion_stack_pushes()
-                                { return adapter_conversion_stack_move() / MethodHandles::stack_move_unit(); }
-  int  adapter_conversion_vminfo()
-                                { return MethodHandles::adapter_conversion_vminfo(adapter_conversion()); }
-  int adapter_arg_slot()        { assert(is_adapter(), ""); return _arg_slot; }
-  oop adapter_arg_oop()         { assert(is_adapter(), ""); return BoundMethodHandle_argument_oop(); }
-
-  BasicType bound_arg_type()    { assert(is_bound(), ""); return _arg_type; }
-  int       bound_arg_slot()    { assert(is_bound(), ""); return _arg_slot; }
-  oop       bound_arg_oop()     { assert(is_bound(), ""); return BoundMethodHandle_argument_oop(); }
-
-  methodHandle last_method()    { assert(is_last(), ""); return _last_method; }
-  methodOop last_method_oop()   { assert(is_last(), ""); return _last_method(); }
-  Bytecodes::Code last_invoke_code() { assert(is_last(), ""); return _last_invoke; }
-
-  void lose(const char* msg, TRAPS);
-  const char* lose_message()    { return _lose_message; }
-
-#ifdef ASSERT
-  // Print a symbolic description of a method handle chain, including
-  // the signature for each method.  The signatures are printed in
-  // slot order to make it easier to understand.
-  void print();
-  static void print(oopDesc* mh);
-#endif
-};
-
-
-// Structure walker for method handles.
-// Does abstract interpretation on top of low-level parsing.
-// You supply the tokens shuffled by the abstract interpretation.
-class MethodHandleWalker : StackObj {
-public:
-  // Stack values:
-  enum TokenType {
-    tt_void,
-    tt_parameter,
-    tt_temporary,
-    tt_constant,
-    tt_symbolic,
-    tt_illegal
-  };
-
-  // Argument token:
-  class ArgToken {
-  private:
-    TokenType _tt;
-    BasicType _bt;
-    jvalue    _value;
-    Handle    _handle;
-
-  public:
-    ArgToken(TokenType tt = tt_illegal) : _tt(tt), _bt(tt == tt_void ? T_VOID : T_ILLEGAL) {
-      assert(tt == tt_illegal || tt == tt_void, "invalid token type");
-    }
-
-    ArgToken(TokenType tt, BasicType bt, int index) : _tt(tt), _bt(bt) {
-      assert(_tt == tt_parameter || _tt == tt_temporary, "must have index");
-      _value.i = index;
-    }
-
-    ArgToken(BasicType bt, jvalue value) : _tt(tt_constant), _bt(bt), _value(value) { assert(_bt != T_OBJECT, "wrong constructor"); }
-    ArgToken(Handle handle) : _tt(tt_constant), _bt(T_OBJECT), _handle(handle) {}
-
-
-    ArgToken(const char* str, BasicType type) : _tt(tt_symbolic), _bt(type) {
-      _value.j = (intptr_t)str;
-    }
-
-    TokenType token_type()  const { return _tt; }
-    BasicType basic_type()  const { return _bt; }
-    bool      has_index()   const { return _tt == tt_parameter || _tt == tt_temporary; }
-    int       index()       const { assert(has_index(), "must have index");; return _value.i; }
-    Handle    object()      const { assert(_bt == T_OBJECT, "wrong accessor"); assert(_tt == tt_constant, "value type"); return _handle; }
-    const char* str()       const { assert(_tt == tt_symbolic, "string type"); return (const char*)(intptr_t)_value.j; }
-
-    jint      get_jint()    const { assert(_bt == T_INT || is_subword_type(_bt), "wrong accessor"); assert(_tt == tt_constant, "value types"); return _value.i; }
-    jlong     get_jlong()   const { assert(_bt == T_LONG, "wrong accessor");   assert(_tt == tt_constant, "value types"); return _value.j; }
-    jfloat    get_jfloat()  const { assert(_bt == T_FLOAT, "wrong accessor");  assert(_tt == tt_constant, "value types"); return _value.f; }
-    jdouble   get_jdouble() const { assert(_bt == T_DOUBLE, "wrong accessor"); assert(_tt == tt_constant, "value types"); return _value.d; }
-  };
-
-private:
-  MethodHandleChain _chain;
-  bool              _for_invokedynamic;
-  int               _local_index;
-
-  // This array is kept in an unusual order, indexed by low-level "slot number".
-  // TOS is always _outgoing.at(0), so simple pushes and pops shift the whole _outgoing array.
-  // If there is a receiver in the current argument list, it is at _outgoing.at(_outgoing.length()-1).
-  // If a value at _outgoing.at(n) is T_LONG or T_DOUBLE, the value at _outgoing.at(n+1) is T_VOID.
-  GrowableArray<ArgToken>  _outgoing;       // current outgoing parameter slots
-  int                      _outgoing_argc;  // # non-empty outgoing slots
-
-  vmIntrinsics::ID _return_conv;            // Return conversion required by raw retypes.
-
-  // Replace a value of type old_type at slot (and maybe slot+1) with the new value.
-  // If old_type != T_VOID, remove the old argument at that point.
-  // If new_type != T_VOID, insert the new argument at that point.
-  // Insert or delete a second empty slot as needed.
-  void change_argument(BasicType old_type, int slot, const ArgToken& new_arg);
-  void change_argument(BasicType old_type, int slot, BasicType type, const ArgToken& new_arg) {
-    assert(type == new_arg.basic_type(), "must agree");
-    change_argument(old_type, slot, new_arg);
-  }
-
-  // Raw retype conversions for OP_RAW_RETYPE.
-  void retype_raw_conversion(BasicType src, BasicType dst, bool for_return, int slot, TRAPS);
-  void retype_raw_argument_type(BasicType src, BasicType dst, int slot, TRAPS) { retype_raw_conversion(src, dst, false, slot, CHECK); }
-  void retype_raw_return_type(  BasicType src, BasicType dst,           TRAPS) { retype_raw_conversion(src, dst, true,  -1,   CHECK); }
-
-  BasicType arg_type(int slot) {
-    return _outgoing.at(slot).basic_type();
-  }
-  bool has_argument(int slot) {
-    return arg_type(slot) < T_VOID;
-  }
-
-#ifdef ASSERT
-  int argument_count_slow();
-#endif
-
-  // Return a bytecode for converting src to dest, if one exists.
-  Bytecodes::Code conversion_code(BasicType src, BasicType dest);
-
-  void walk_incoming_state(TRAPS);
-
-  void verify_args_and_signature(TRAPS) NOT_DEBUG_RETURN;
-
-public:
-  MethodHandleWalker(Handle root, bool for_invokedynamic, TRAPS)
-    : _chain(root, THREAD),
-      _for_invokedynamic(for_invokedynamic),
-      _outgoing(THREAD, 10),
-      _outgoing_argc(0),
-      _return_conv(vmIntrinsics::_none)
-  {
-    _local_index = for_invokedynamic ? 0 : 1;
-  }
-
-  MethodHandleChain& chain() { return _chain; }
-
-  bool for_invokedynamic() const { return _for_invokedynamic; }
-
-  vmIntrinsics::ID return_conv() const { return _return_conv; }
-  void set_return_conv(vmIntrinsics::ID c) { _return_conv = c; }
-  static vmIntrinsics::ID zero_return_conv() { return vmIntrinsics::_min; }
-
-  int new_local_index(BasicType bt) {
-    //int index = _for_invokedynamic ? _local_index : _local_index - 1;
-    int index = _local_index;
-    _local_index += type2size[bt];
-    return index;
-  }
-
-  int max_locals() const { return _local_index; }
-
-  // plug-in abstract interpretation steps:
-  virtual ArgToken make_parameter(BasicType type, klassOop tk, int argnum, TRAPS) = 0;
-  virtual ArgToken make_prim_constant(BasicType type, jvalue* con, TRAPS) = 0;
-  virtual ArgToken make_oop_constant(oop con, TRAPS) = 0;
-  virtual ArgToken make_conversion(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& src, TRAPS) = 0;
-  virtual ArgToken make_fetch(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& base, const ArgToken& offset, TRAPS) = 0;
-  virtual ArgToken make_invoke(methodHandle m, vmIntrinsics::ID iid, Bytecodes::Code op, bool tailcall, int argc, ArgToken* argv, TRAPS) = 0;
-
-  // For make_invoke, the methodHandle can be NULL if the intrinsic ID
-  // is something other than vmIntrinsics::_none.
-
-  // and in case anyone cares to related the previous actions to the chain:
-  virtual void set_method_handle(oop mh) { }
-
-  void lose(const char* msg, TRAPS) { chain().lose(msg, THREAD); }
-  const char* lose_message()        { return chain().lose_message(); }
-
-  ArgToken walk(TRAPS);
-};
-
-
-// An abstract interpreter for method handle chains.
-// Produces an account of the semantics of a chain, in terms of a static IR.
-// The IR happens to be JVM bytecodes.
-class MethodHandleCompiler : public MethodHandleWalker {
-private:
-  int          _invoke_count;  // count the original call site has been executed
-  KlassHandle  _rklass;        // Return type for casting.
-  BasicType    _rtype;
-  KlassHandle  _target_klass;
-  Thread*      _thread;
-
-  int          _selectAlternative_bci; // These are used for capturing profiles from GWTs
-  int          _taken_count;
-  int          _not_taken_count;
-
-  // Values used by the compiler.
-  static jvalue zero_jvalue;
-  static jvalue one_jvalue;
-
-  // Fake constant pool entry.
-  class ConstantValue : public ResourceObj {
-  private:
-    int       _tag;   // Constant pool tag type.
-    JavaValue _value;
-    Handle    _handle;
-    Symbol*   _sym;
-    methodHandle _method;  // pre-linkage
-
-  public:
-    // Constructor for oop types.
-    ConstantValue(int tag, Handle con) : _tag(tag), _handle(con) {
-      assert(tag == JVM_CONSTANT_Class  ||
-             tag == JVM_CONSTANT_String ||
-             tag == JVM_CONSTANT_Object, "must be oop type");
-    }
-
-    ConstantValue(int tag, Symbol* con) : _tag(tag), _sym(con) {
-      assert(tag == JVM_CONSTANT_Utf8, "must be symbol type");
-    }
-
-    // Constructor for oop reference types.
-    ConstantValue(int tag, int index) : _tag(tag) {
-      assert(JVM_CONSTANT_Fieldref <= tag && tag <= JVM_CONSTANT_NameAndType, "must be ref type");
-      _value.set_jint(index);
-    }
-    ConstantValue(int tag, int first_index, int second_index) : _tag(tag) {
-      assert(JVM_CONSTANT_Fieldref <= tag && tag <= JVM_CONSTANT_NameAndType, "must be ref type");
-      _value.set_jint(first_index << 16 | second_index);
-    }
-
-    // Constructor for primitive types.
-    ConstantValue(BasicType bt, jvalue con) {
-      _value.set_type(bt);
-      switch (bt) {
-      case T_INT:    _tag = JVM_CONSTANT_Integer; _value.set_jint(   con.i); break;
-      case T_LONG:   _tag = JVM_CONSTANT_Long;    _value.set_jlong(  con.j); break;
-      case T_FLOAT:  _tag = JVM_CONSTANT_Float;   _value.set_jfloat( con.f); break;
-      case T_DOUBLE: _tag = JVM_CONSTANT_Double;  _value.set_jdouble(con.d); break;
-      default: ShouldNotReachHere();
-      }
-    }
-
-    int       tag()          const { return _tag; }
-    Symbol*   symbol()       const { return _sym; }
-    klassOop  klass_oop()    const { return (klassOop)  _handle(); }
-    oop       object_oop()   const { return _handle(); }
-    int       index()        const { return _value.get_jint(); }
-    int       first_index()  const { return _value.get_jint() >> 16; }
-    int       second_index() const { return _value.get_jint() & 0x0000FFFF; }
-
-    bool      is_primitive() const { return is_java_primitive(_value.get_type()); }
-    jint      get_jint()     const { return _value.get_jint();    }
-    jlong     get_jlong()    const { return _value.get_jlong();   }
-    jfloat    get_jfloat()   const { return _value.get_jfloat();  }
-    jdouble   get_jdouble()  const { return _value.get_jdouble(); }
-
-    void set_linkage(methodHandle method) {
-      assert(_method.is_null(), "");
-      _method = method;
-    }
-    bool     has_linkage()   const { return _method.not_null(); }
-    methodHandle linkage()   const { return _method; }
-  };
-
-  // Fake constant pool.
-  GrowableArray<ConstantValue*> _constants;
-
-  // Non-BCP classes that appear in associated MethodTypes (require special handling).
-  GrowableArray<KlassHandle> _non_bcp_klasses;
-
-  // Accumulated compiler state:
-  GrowableArray<unsigned char> _bytecode;
-
-  int _cur_stack;
-  int _max_stack;
-  int _num_params;
-  int _name_index;
-  int _signature_index;
-
-  void stack_push(BasicType bt) {
-    _cur_stack += type2size[bt];
-    if (_cur_stack > _max_stack) _max_stack = _cur_stack;
-  }
-  void stack_pop(BasicType bt) {
-    _cur_stack -= type2size[bt];
-    assert(_cur_stack >= 0, "sanity");
-  }
-
-  unsigned char* bytecode()        const { return _bytecode.adr_at(0); }
-  int            bytecode_length() const { return _bytecode.length(); }
-  int            cur_bci()         const { return _bytecode.length(); }
-
-  // Fake constant pool.
-  int cpool_oop_put(int tag, Handle con) {
-    if (con.is_null())  return 0;
-    ConstantValue* cv = new ConstantValue(tag, con);
-    return _constants.append(cv);
-  }
-
-  int cpool_symbol_put(int tag, Symbol* con) {
-    if (con == NULL)  return 0;
-    ConstantValue* cv = new ConstantValue(tag, con);
-    con->increment_refcount();
-    return _constants.append(cv);
-  }
-
-  int cpool_oop_reference_put(int tag, int first_index, int second_index, methodHandle method) {
-    if (first_index == 0 && second_index == 0)  return 0;
-    assert(first_index != 0 && second_index != 0, "no zero indexes");
-    ConstantValue* cv = new ConstantValue(tag, first_index, second_index);
-    if (method.not_null())  cv->set_linkage(method);
-    return _constants.append(cv);
-  }
-
-  int cpool_primitive_put(BasicType type, jvalue* con);
-
-  bool check_non_bcp_klasses(Handle method_type, TRAPS);
-  bool check_non_bcp_klass(klassOop klass, TRAPS);
-  void record_non_bcp_klasses();
-
-  int cpool_int_put(jint value) {
-    jvalue con; con.i = value;
-    return cpool_primitive_put(T_INT, &con);
-  }
-  int cpool_long_put(jlong value) {
-    jvalue con; con.j = value;
-    return cpool_primitive_put(T_LONG, &con);
-  }
-  int cpool_float_put(jfloat value) {
-    jvalue con; con.f = value;
-    return cpool_primitive_put(T_FLOAT, &con);
-  }
-  int cpool_double_put(jdouble value) {
-    jvalue con; con.d = value;
-    return cpool_primitive_put(T_DOUBLE, &con);
-  }
-
-  int cpool_object_put(Handle obj) {
-    return cpool_oop_put(JVM_CONSTANT_Object, obj);
-  }
-  int cpool_symbol_put(Symbol* sym) {
-    return cpool_symbol_put(JVM_CONSTANT_Utf8, sym);
-  }
-  int cpool_klass_put(klassOop klass) {
-    return cpool_oop_put(JVM_CONSTANT_Class, klass);
-  }
-  int cpool_methodref_put(Bytecodes::Code op, int class_index, int name_and_type_index, methodHandle method) {
-    int tag = (op == Bytecodes::_invokeinterface ? JVM_CONSTANT_InterfaceMethodref : JVM_CONSTANT_Methodref);
-    return cpool_oop_reference_put(tag, class_index, name_and_type_index, method);
-  }
-  int cpool_name_and_type_put(int name_index, int signature_index) {
-    return cpool_oop_reference_put(JVM_CONSTANT_NameAndType, name_index, signature_index, methodHandle());
-  }
-
-  void emit_bc(Bytecodes::Code op, int index = 0, int args_size = -1);
-  void update_branch_dest(int src, int dst);
-  void emit_load(ArgToken arg);
-  void emit_load(BasicType bt, int index);
-  void emit_store(BasicType bt, int index);
-  void emit_load_constant(ArgToken arg);
-
-  virtual ArgToken make_parameter(BasicType type, klassOop tk, int argnum, TRAPS) {
-    return ArgToken(tt_parameter, type, argnum);
-  }
-  virtual ArgToken make_oop_constant(oop con, TRAPS) {
-    Handle h(THREAD, con);
-    return ArgToken(h);
-  }
-  virtual ArgToken make_prim_constant(BasicType type, jvalue* con, TRAPS) {
-    return ArgToken(type, *con);
-  }
-
-  virtual ArgToken make_conversion(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& src, TRAPS);
-  virtual ArgToken make_fetch(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& base, const ArgToken& offset, TRAPS);
-  virtual ArgToken make_invoke(methodHandle m, vmIntrinsics::ID iid, Bytecodes::Code op, bool tailcall, int argc, ArgToken* argv, TRAPS);
-
-  // Check for profiling information on a GWT and return true if it's found
-  bool fetch_counts(ArgToken a1, ArgToken a2);
-
-  // Get a real constant pool.
-  constantPoolHandle get_constant_pool(TRAPS) const;
-
-  // Get a real methodOop.
-  methodHandle get_method_oop(TRAPS);
-
-public:
-  MethodHandleCompiler(Handle root, Symbol* name, Symbol* signature, int invoke_count, bool for_invokedynamic, TRAPS);
-
-  // Compile the given MH chain into bytecode.
-  methodHandle compile(TRAPS);
-
-  // Tests if the given class is a MH adapter holder.
-  static bool klass_is_method_handle_adapter_holder(klassOop klass) {
-    return (klass == SystemDictionary::MethodHandle_klass());
-  }
-};
-
-#endif // SHARE_VM_PRIMS_METHODHANDLEWALK_HPP
--- a/src/share/vm/prims/methodHandles.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/methodHandles.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,166 +30,30 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "prims/methodHandles.hpp"
-#include "prims/methodHandleWalk.hpp"
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/reflection.hpp"
 #include "runtime/signature.hpp"
 #include "runtime/stubRoutines.hpp"
 
+
 /*
  * JSR 292 reference implementation: method handles
+ * The JDK 7 reference implementation represented method handle
+ * combinations as chains.  Each link in the chain had a "vmentry"
+ * field which pointed at a bit of assembly code which performed
+ * one transformation before dispatching to the next link in the chain.
+ *
+ * The current reference implementation pushes almost all code generation
+ * responsibility to (trusted) Java code.  A method handle contains a
+ * pointer to its "LambdaForm", which embodies all details of the method
+ * handle's behavior.  The LambdaForm is a normal Java object, managed
+ * by a runtime coded in Java.
  */
 
 bool MethodHandles::_enabled = false; // set true after successful native linkage
-
-MethodHandleEntry* MethodHandles::_entries[MethodHandles::_EK_LIMIT] = {NULL};
-const char*        MethodHandles::_entry_names[_EK_LIMIT+1] = {
-  "raise_exception",
-  "invokestatic",               // how a MH emulates invokestatic
-  "invokespecial",              // ditto for the other invokes...
-  "invokevirtual",
-  "invokeinterface",
-  "bound_ref",                  // these are for BMH...
-  "bound_int",
-  "bound_long",
-  "bound_ref_direct",           // (direct versions have a direct methodOop)
-  "bound_int_direct",
-  "bound_long_direct",
-
-  // starting at _adapter_mh_first:
-  "adapter_retype_only",       // these are for AMH...
-  "adapter_retype_raw",
-  "adapter_check_cast",
-  "adapter_prim_to_prim",
-  "adapter_ref_to_prim",
-  "adapter_prim_to_ref",
-  "adapter_swap_args",
-  "adapter_rot_args",
-  "adapter_dup_args",
-  "adapter_drop_args",
-  "adapter_collect_args",
-  "adapter_spread_args",
-  "adapter_fold_args",
-  "adapter_unused_13",
-
-  // optimized adapter types:
-  "adapter_swap_args/1",
-  "adapter_swap_args/2",
-  "adapter_rot_args/1,up",
-  "adapter_rot_args/1,down",
-  "adapter_rot_args/2,up",
-  "adapter_rot_args/2,down",
-  "adapter_prim_to_prim/i2i",
-  "adapter_prim_to_prim/l2i",
-  "adapter_prim_to_prim/d2f",
-  "adapter_prim_to_prim/i2l",
-  "adapter_prim_to_prim/f2d",
-  "adapter_ref_to_prim/unboxi",
-  "adapter_ref_to_prim/unboxl",
-
-  // return value handlers for collect/filter/fold adapters:
-  "return/ref",
-  "return/int",
-  "return/long",
-  "return/float",
-  "return/double",
-  "return/void",
-  "return/S0/ref",
-  "return/S1/ref",
-  "return/S2/ref",
-  "return/S3/ref",
-  "return/S4/ref",
-  "return/S5/ref",
-  "return/any",
-
-  // spreading (array length cases 0, 1, ...)
-  "adapter_spread/0",
-  "adapter_spread/1/ref",
-  "adapter_spread/2/ref",
-  "adapter_spread/3/ref",
-  "adapter_spread/4/ref",
-  "adapter_spread/5/ref",
-  "adapter_spread/ref",
-  "adapter_spread/byte",
-  "adapter_spread/char",
-  "adapter_spread/short",
-  "adapter_spread/int",
-  "adapter_spread/long",
-  "adapter_spread/float",
-  "adapter_spread/double",
-
-  // blocking filter/collect conversions:
-  "adapter_collect/ref",
-  "adapter_collect/int",
-  "adapter_collect/long",
-  "adapter_collect/float",
-  "adapter_collect/double",
-  "adapter_collect/void",
-  "adapter_collect/0/ref",
-  "adapter_collect/1/ref",
-  "adapter_collect/2/ref",
-  "adapter_collect/3/ref",
-  "adapter_collect/4/ref",
-  "adapter_collect/5/ref",
-  "adapter_filter/S0/ref",
-  "adapter_filter/S1/ref",
-  "adapter_filter/S2/ref",
-  "adapter_filter/S3/ref",
-  "adapter_filter/S4/ref",
-  "adapter_filter/S5/ref",
-  "adapter_collect/2/S0/ref",
-  "adapter_collect/2/S1/ref",
-  "adapter_collect/2/S2/ref",
-  "adapter_collect/2/S3/ref",
-  "adapter_collect/2/S4/ref",
-  "adapter_collect/2/S5/ref",
-
-  // blocking fold conversions:
-  "adapter_fold/ref",
-  "adapter_fold/int",
-  "adapter_fold/long",
-  "adapter_fold/float",
-  "adapter_fold/double",
-  "adapter_fold/void",
-  "adapter_fold/1/ref",
-  "adapter_fold/2/ref",
-  "adapter_fold/3/ref",
-  "adapter_fold/4/ref",
-  "adapter_fold/5/ref",
-
-  "adapter_opt_profiling",
-
-  NULL
-};
-
-// Adapters.
 MethodHandlesAdapterBlob* MethodHandles::_adapter_code = NULL;
 
-jobject MethodHandles::_raise_exception_method;
-
-address MethodHandles::_adapter_return_handlers[CONV_TYPE_MASK+1];
-
-#ifdef ASSERT
-bool MethodHandles::spot_check_entry_names() {
-  assert(!strcmp(entry_name(_invokestatic_mh), "invokestatic"), "");
-  assert(!strcmp(entry_name(_bound_ref_mh), "bound_ref"), "");
-  assert(!strcmp(entry_name(_adapter_retype_only), "adapter_retype_only"), "");
-  assert(!strcmp(entry_name(_adapter_fold_args), "adapter_fold_args"), "");
-  assert(!strcmp(entry_name(_adapter_opt_unboxi), "adapter_ref_to_prim/unboxi"), "");
-  assert(!strcmp(entry_name(_adapter_opt_spread_char), "adapter_spread/char"), "");
-  assert(!strcmp(entry_name(_adapter_opt_spread_double), "adapter_spread/double"), "");
-  assert(!strcmp(entry_name(_adapter_opt_collect_int), "adapter_collect/int"), "");
-  assert(!strcmp(entry_name(_adapter_opt_collect_0_ref), "adapter_collect/0/ref"), "");
-  assert(!strcmp(entry_name(_adapter_opt_collect_2_S3_ref), "adapter_collect/2/S3/ref"), "");
-  assert(!strcmp(entry_name(_adapter_opt_filter_S5_ref), "adapter_filter/S5/ref"), "");
-  assert(!strcmp(entry_name(_adapter_opt_fold_3_ref), "adapter_fold/3/ref"), "");
-  assert(!strcmp(entry_name(_adapter_opt_fold_void), "adapter_fold/void"), "");
-  return true;
-}
-#endif
-
-
 //------------------------------------------------------------------------------
 // MethodHandles::generate_adapters
 //
@@ -216,36 +80,20 @@
 //
 void MethodHandlesAdapterGenerator::generate() {
   // Generate generic method handle adapters.
-  for (MethodHandles::EntryKind ek = MethodHandles::_EK_FIRST;
-       ek < MethodHandles::_EK_LIMIT;
-       ek = MethodHandles::EntryKind(1 + (int)ek)) {
-    if (MethodHandles::ek_supported(ek)) {
-      StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek));
-      MethodHandles::generate_method_handle_stub(_masm, ek);
+  // Generate interpreter entries
+  for (Interpreter::MethodKind mk = Interpreter::method_handle_invoke_FIRST;
+       mk <= Interpreter::method_handle_invoke_LAST;
+       mk = Interpreter::MethodKind(1 + (int)mk)) {
+    vmIntrinsics::ID iid = Interpreter::method_handle_intrinsic(mk);
+    StubCodeMark mark(this, "MethodHandle::interpreter_entry", vmIntrinsics::name_at(iid));
+    address entry = MethodHandles::generate_method_handle_interpreter_entry(_masm, iid);
+    if (entry != NULL) {
+      Interpreter::set_entry_for_kind(mk, entry);
     }
+    // If the entry is not set, it will throw AbstractMethodError.
   }
 }
 
-
-//------------------------------------------------------------------------------
-// MethodHandles::ek_supported
-//
-bool MethodHandles::ek_supported(MethodHandles::EntryKind ek) {
-  MethodHandles::EntryKind ek_orig = MethodHandles::ek_original_kind(ek);
-  switch (ek_orig) {
-  case _adapter_unused_13:
-    return false;  // not defined yet
-  case _adapter_prim_to_ref:
-    return conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF);
-  case _adapter_collect_args:
-    return conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS);
-  case _adapter_fold_args:
-    return conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS);
-  }
-  return true;
-}
-
-
 void MethodHandles::set_enabled(bool z) {
   if (_enabled != z) {
     guarantee(z && EnableInvokeDynamic, "can only enable once, and only if -XX:+EnableInvokeDynamic");
@@ -253,217 +101,6 @@
   }
 }
 
-// Note: A method which does not have a TRAPS argument cannot block in the GC
-// or throw exceptions.  Such methods are used in this file to do something quick
-// and local, like parse a data structure.  For speed, such methods work on plain
-// oops, not handles.  Trapping methods uniformly operate on handles.
-
-methodHandle MethodHandles::decode_vmtarget(oop vmtarget, int vmindex, oop mtype,
-                                            KlassHandle& receiver_limit_result, int& decode_flags_result) {
-  if (vmtarget == NULL)  return methodHandle();
-  assert(methodOopDesc::nonvirtual_vtable_index < 0, "encoding");
-  if (vmindex < 0) {
-    // this DMH performs no dispatch; it is directly bound to a methodOop
-    // A MemberName may either be directly bound to a methodOop,
-    // or it may use the klass/index form; both forms mean the same thing.
-    methodOop m = decode_methodOop(methodOop(vmtarget), decode_flags_result);
-    if ((decode_flags_result & _dmf_has_receiver) != 0
-        && java_lang_invoke_MethodType::is_instance(mtype)) {
-      // Extract receiver type restriction from mtype.ptypes[0].
-      objArrayOop ptypes = java_lang_invoke_MethodType::ptypes(mtype);
-      oop ptype0 = (ptypes == NULL || ptypes->length() < 1) ? oop(NULL) : ptypes->obj_at(0);
-      if (java_lang_Class::is_instance(ptype0))
-        receiver_limit_result = java_lang_Class::as_klassOop(ptype0);
-    }
-    if (vmindex == methodOopDesc::nonvirtual_vtable_index) {
-      // this DMH can be an "invokespecial" version
-      decode_flags_result &= ~_dmf_does_dispatch;
-    } else {
-      assert(vmindex == methodOopDesc::invalid_vtable_index, "random vmindex?");
-    }
-    return m;
-  } else {
-    assert(vmtarget->is_klass(), "must be class or interface");
-    decode_flags_result |= MethodHandles::_dmf_does_dispatch;
-    decode_flags_result |= MethodHandles::_dmf_has_receiver;
-    receiver_limit_result = (klassOop)vmtarget;
-    Klass* tk = Klass::cast((klassOop)vmtarget);
-    if (tk->is_interface()) {
-      // an itable linkage is <interface, itable index>
-      decode_flags_result |= MethodHandles::_dmf_from_interface;
-      return klassItable::method_for_itable_index((klassOop)vmtarget, vmindex);
-    } else {
-      if (!tk->oop_is_instance())
-        tk = instanceKlass::cast(SystemDictionary::Object_klass());
-      return ((instanceKlass*)tk)->method_at_vtable(vmindex);
-    }
-  }
-}
-
-// MemberName and DirectMethodHandle have the same linkage to the JVM internals.
-// (MemberName is the non-operational name used for queries and setup.)
-
-methodHandle MethodHandles::decode_DirectMethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result) {
-  oop vmtarget = java_lang_invoke_DirectMethodHandle::vmtarget(mh);
-  int vmindex  = java_lang_invoke_DirectMethodHandle::vmindex(mh);
-  oop mtype    = java_lang_invoke_DirectMethodHandle::type(mh);
-  return decode_vmtarget(vmtarget, vmindex, mtype, receiver_limit_result, decode_flags_result);
-}
-
-methodHandle MethodHandles::decode_BoundMethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result) {
-  assert(java_lang_invoke_BoundMethodHandle::is_instance(mh), "");
-  assert(mh->klass() != SystemDictionary::AdapterMethodHandle_klass(), "");
-  for (oop bmh = mh;;) {
-    // Bound MHs can be stacked to bind several arguments.
-    oop target = java_lang_invoke_MethodHandle::vmtarget(bmh);
-    if (target == NULL)  return methodHandle();
-    decode_flags_result |= MethodHandles::_dmf_binds_argument;
-    klassOop tk = target->klass();
-    if (tk == SystemDictionary::BoundMethodHandle_klass()) {
-      bmh = target;
-      continue;
-    } else {
-      if (java_lang_invoke_MethodHandle::is_subclass(tk)) {
-        //assert(tk == SystemDictionary::DirectMethodHandle_klass(), "end of BMH chain must be DMH");
-        return decode_MethodHandle(target, receiver_limit_result, decode_flags_result);
-      } else {
-        // Optimized case:  binding a receiver to a non-dispatched DMH
-        // short-circuits directly to the methodOop.
-        // (It might be another argument besides a receiver also.)
-        assert(target->is_method(), "must be a simple method");
-        decode_flags_result |= MethodHandles::_dmf_binds_method;
-        methodOop m = (methodOop) target;
-        if (!m->is_static())
-          decode_flags_result |= MethodHandles::_dmf_has_receiver;
-        return m;
-      }
-    }
-  }
-}
-
-methodHandle MethodHandles::decode_AdapterMethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result) {
-  assert(mh->klass() == SystemDictionary::AdapterMethodHandle_klass(), "");
-  for (oop amh = mh;;) {
-    // Adapter MHs can be stacked to convert several arguments.
-    int conv_op = adapter_conversion_op(java_lang_invoke_AdapterMethodHandle::conversion(amh));
-    decode_flags_result |= (_dmf_adapter_lsb << conv_op) & _DMF_ADAPTER_MASK;
-    oop target = java_lang_invoke_MethodHandle::vmtarget(amh);
-    if (target == NULL)  return methodHandle();
-    klassOop tk = target->klass();
-    if (tk == SystemDictionary::AdapterMethodHandle_klass()) {
-      amh = target;
-      continue;
-    } else {
-      // must be a BMH (which will bind some more arguments) or a DMH (for the final call)
-      return MethodHandles::decode_MethodHandle(target, receiver_limit_result, decode_flags_result);
-    }
-  }
-}
-
-methodHandle MethodHandles::decode_MethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result) {
-  if (mh == NULL)  return methodHandle();
-  klassOop mhk = mh->klass();
-  assert(java_lang_invoke_MethodHandle::is_subclass(mhk), "must be a MethodHandle");
-  if (mhk == SystemDictionary::DirectMethodHandle_klass()) {
-    return decode_DirectMethodHandle(mh, receiver_limit_result, decode_flags_result);
-  } else if (mhk == SystemDictionary::BoundMethodHandle_klass()) {
-    return decode_BoundMethodHandle(mh, receiver_limit_result, decode_flags_result);
-  } else if (mhk == SystemDictionary::AdapterMethodHandle_klass()) {
-    return decode_AdapterMethodHandle(mh, receiver_limit_result, decode_flags_result);
-  } else if (java_lang_invoke_BoundMethodHandle::is_subclass(mhk)) {
-    // could be a JavaMethodHandle (but not an adapter MH)
-    return decode_BoundMethodHandle(mh, receiver_limit_result, decode_flags_result);
-  } else {
-    assert(false, "cannot parse this MH");
-    return methodHandle();  // random MH?
-  }
-}
-
-methodOop MethodHandles::decode_methodOop(methodOop m, int& decode_flags_result) {
-  assert(m->is_method(), "");
-  if (m->is_static()) {
-    // check that signature begins '(L' or '([' (not '(I', '()', etc.)
-    Symbol* sig = m->signature();
-    BasicType recv_bt = char2type(sig->byte_at(1));
-    // Note: recv_bt might be T_ILLEGAL if byte_at(2) is ')'
-    assert(sig->byte_at(0) == '(', "must be method sig");
-//     if (recv_bt == T_OBJECT || recv_bt == T_ARRAY)
-//       decode_flags_result |= _dmf_has_receiver;
-  } else {
-    // non-static method
-    decode_flags_result |= _dmf_has_receiver;
-    if (!m->can_be_statically_bound() && !m->is_initializer()) {
-      decode_flags_result |= _dmf_does_dispatch;
-      if (Klass::cast(m->method_holder())->is_interface())
-        decode_flags_result |= _dmf_from_interface;
-    }
-  }
-  return m;
-}
-
-
-// A trusted party is handing us a cookie to determine a method.
-// Let's boil it down to the method oop they really want.
-methodHandle MethodHandles::decode_method(oop x, KlassHandle& receiver_limit_result, int& decode_flags_result) {
-  decode_flags_result = 0;
-  receiver_limit_result = KlassHandle();
-  klassOop xk = x->klass();
-  if (xk == Universe::methodKlassObj()) {
-    return decode_methodOop((methodOop) x, decode_flags_result);
-  } else if (xk == SystemDictionary::MemberName_klass()) {
-    // Note: This only works if the MemberName has already been resolved.
-    return decode_MemberName(x, receiver_limit_result, decode_flags_result);
-  } else if (java_lang_invoke_MethodHandle::is_subclass(xk)) {
-    return decode_MethodHandle(x, receiver_limit_result, decode_flags_result);
-  } else if (xk == SystemDictionary::reflect_Method_klass()) {
-    oop clazz  = java_lang_reflect_Method::clazz(x);
-    int slot   = java_lang_reflect_Method::slot(x);
-    klassOop k = java_lang_Class::as_klassOop(clazz);
-    if (k != NULL && Klass::cast(k)->oop_is_instance())
-      return decode_methodOop(instanceKlass::cast(k)->method_with_idnum(slot),
-                              decode_flags_result);
-  } else if (xk == SystemDictionary::reflect_Constructor_klass()) {
-    oop clazz  = java_lang_reflect_Constructor::clazz(x);
-    int slot   = java_lang_reflect_Constructor::slot(x);
-    klassOop k = java_lang_Class::as_klassOop(clazz);
-    if (k != NULL && Klass::cast(k)->oop_is_instance())
-      return decode_methodOop(instanceKlass::cast(k)->method_with_idnum(slot),
-                              decode_flags_result);
-  } else {
-    // unrecognized object
-    assert(!x->is_method(), "already checked");
-    assert(!java_lang_invoke_MemberName::is_instance(x), "already checked");
-  }
-  return methodHandle();
-}
-
-
-int MethodHandles::decode_MethodHandle_stack_pushes(oop mh) {
-  if (mh->klass() == SystemDictionary::DirectMethodHandle_klass())
-    return 0;                   // no push/pop
-  int this_vmslots = java_lang_invoke_MethodHandle::vmslots(mh);
-  int last_vmslots = 0;
-  oop last_mh = mh;
-  for (;;) {
-    oop target = java_lang_invoke_MethodHandle::vmtarget(last_mh);
-    if (target->klass() == SystemDictionary::DirectMethodHandle_klass()) {
-      last_vmslots = java_lang_invoke_MethodHandle::vmslots(target);
-      break;
-    } else if (!java_lang_invoke_MethodHandle::is_instance(target)) {
-      // might be klass or method
-      assert(target->is_method(), "must get here with a direct ref to method");
-      last_vmslots = methodOop(target)->size_of_parameters();
-      break;
-    }
-    last_mh = target;
-  }
-  // If I am called with fewer VM slots than my ultimate callee,
-  // it must be that I push the additionally needed slots.
-  // Likewise if am called with more VM slots, I will pop them.
-  return (last_vmslots - this_vmslots);
-}
-
-
 // MemberName support
 
 // import java_lang_invoke_MemberName.*
@@ -472,10 +109,11 @@
   IS_CONSTRUCTOR = java_lang_invoke_MemberName::MN_IS_CONSTRUCTOR,
   IS_FIELD       = java_lang_invoke_MemberName::MN_IS_FIELD,
   IS_TYPE        = java_lang_invoke_MemberName::MN_IS_TYPE,
+  REFERENCE_KIND_SHIFT = java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT,
+  REFERENCE_KIND_MASK  = java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK,
   SEARCH_SUPERCLASSES = java_lang_invoke_MemberName::MN_SEARCH_SUPERCLASSES,
   SEARCH_INTERFACES   = java_lang_invoke_MemberName::MN_SEARCH_INTERFACES,
-  ALL_KINDS      = IS_METHOD | IS_CONSTRUCTOR | IS_FIELD | IS_TYPE,
-  VM_INDEX_UNINITIALIZED = java_lang_invoke_MemberName::VM_INDEX_UNINITIALIZED
+  ALL_KINDS      = IS_METHOD | IS_CONSTRUCTOR | IS_FIELD | IS_TYPE
 };
 
 Handle MethodHandles::new_MemberName(TRAPS) {
@@ -485,72 +123,265 @@
   return Handle(THREAD, k->allocate_instance(THREAD));
 }
 
-void MethodHandles::init_MemberName(oop mname_oop, oop target_oop) {
-  if (target_oop->klass() == SystemDictionary::reflect_Field_klass()) {
+oop MethodHandles::init_MemberName(oop mname_oop, oop target_oop) {
+  klassOop target_klass = target_oop->klass();
+  if (target_klass == SystemDictionary::reflect_Field_klass()) {
     oop clazz = java_lang_reflect_Field::clazz(target_oop); // fd.field_holder()
     int slot  = java_lang_reflect_Field::slot(target_oop);  // fd.index()
     int mods  = java_lang_reflect_Field::modifiers(target_oop);
+    oop type  = java_lang_reflect_Field::type(target_oop);
+    oop name  = java_lang_reflect_Field::name(target_oop);
     klassOop k = java_lang_Class::as_klassOop(clazz);
-    int offset = instanceKlass::cast(k)->field_offset(slot);
-    init_MemberName(mname_oop, k, accessFlags_from(mods), offset);
-  } else {
-    KlassHandle receiver_limit; int decode_flags = 0;
-    methodHandle m = MethodHandles::decode_method(target_oop, receiver_limit, decode_flags);
-    bool do_dispatch = ((decode_flags & MethodHandles::_dmf_does_dispatch) != 0);
-    init_MemberName(mname_oop, m(), do_dispatch);
+    intptr_t offset = instanceKlass::cast(k)->field_offset(slot);
+    return init_field_MemberName(mname_oop, k, accessFlags_from(mods), type, name, offset);
+  } else if (target_klass == SystemDictionary::reflect_Method_klass()) {
+    oop clazz  = java_lang_reflect_Method::clazz(target_oop);
+    int slot   = java_lang_reflect_Method::slot(target_oop);
+    klassOop k = java_lang_Class::as_klassOop(clazz);
+    if (k != NULL && Klass::cast(k)->oop_is_instance()) {
+      methodOop m = instanceKlass::cast(k)->method_with_idnum(slot);
+      return init_method_MemberName(mname_oop, m, true, k);
+    }
+  } else if (target_klass == SystemDictionary::reflect_Constructor_klass()) {
+    oop clazz  = java_lang_reflect_Constructor::clazz(target_oop);
+    int slot   = java_lang_reflect_Constructor::slot(target_oop);
+    klassOop k = java_lang_Class::as_klassOop(clazz);
+    if (k != NULL && Klass::cast(k)->oop_is_instance()) {
+      methodOop m = instanceKlass::cast(k)->method_with_idnum(slot);
+      return init_method_MemberName(mname_oop, m, false, k);
+    }
+  } else if (target_klass == SystemDictionary::MemberName_klass()) {
+    // Note: This only works if the MemberName has already been resolved.
+    oop clazz        = java_lang_invoke_MemberName::clazz(target_oop);
+    int flags        = java_lang_invoke_MemberName::flags(target_oop);
+    oop vmtarget     = java_lang_invoke_MemberName::vmtarget(target_oop);
+    intptr_t vmindex = java_lang_invoke_MemberName::vmindex(target_oop);
+    klassOop k       = java_lang_Class::as_klassOop(clazz);
+    int ref_kind     = (flags >> REFERENCE_KIND_SHIFT) & REFERENCE_KIND_MASK;
+    if (vmtarget == NULL)  return NULL;  // not resolved
+    if ((flags & IS_FIELD) != 0) {
+      assert(vmtarget->is_klass(), "field vmtarget is klassOop");
+      int basic_mods = (ref_kind_is_static(ref_kind) ? JVM_ACC_STATIC : 0);
+      // FIXME:  how does k (receiver_limit) contribute?
+      return init_field_MemberName(mname_oop, klassOop(vmtarget), accessFlags_from(basic_mods), NULL, NULL, vmindex);
+    } else if ((flags & (IS_METHOD | IS_CONSTRUCTOR)) != 0) {
+      assert(vmtarget->is_method(), "method or constructor vmtarget is methodOop");
+      return init_method_MemberName(mname_oop, methodOop(vmtarget), ref_kind_does_dispatch(ref_kind), k);
+    } else {
+      return NULL;
+    }
   }
+  return NULL;
 }
 
-void MethodHandles::init_MemberName(oop mname_oop, methodOop m, bool do_dispatch) {
-  int flags = ((m->is_initializer() ? IS_CONSTRUCTOR : IS_METHOD)
-               | (jushort)( m->access_flags().as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS ));
-  oop vmtarget = m;
-  int vmindex  = methodOopDesc::invalid_vtable_index;  // implies no info yet
-  if (!do_dispatch || (flags & IS_CONSTRUCTOR) || m->can_be_statically_bound())
-    vmindex = methodOopDesc::nonvirtual_vtable_index; // implies never any dispatch
-  assert(vmindex != VM_INDEX_UNINITIALIZED, "Java sentinel value");
+oop MethodHandles::init_method_MemberName(oop mname_oop, methodOop m, bool do_dispatch,
+                                          klassOop receiver_limit) {
+  AccessFlags mods = m->access_flags();
+  int flags = (jushort)( mods.as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS );
+  int vmindex = methodOopDesc::nonvirtual_vtable_index; // implies never any dispatch
+  klassOop mklass = m->method_holder();
+  if (receiver_limit == NULL)
+    receiver_limit = mklass;
+  if (m->is_initializer()) {
+    flags |= IS_CONSTRUCTOR | (JVM_REF_invokeSpecial << REFERENCE_KIND_SHIFT);
+  } else if (mods.is_static()) {
+    flags |= IS_METHOD | (JVM_REF_invokeStatic << REFERENCE_KIND_SHIFT);
+  } else if (receiver_limit != mklass &&
+             !Klass::cast(receiver_limit)->is_subtype_of(mklass)) {
+    return NULL;  // bad receiver limit
+  } else if (Klass::cast(receiver_limit)->is_interface() &&
+             Klass::cast(mklass)->is_interface()) {
+    flags |= IS_METHOD | (JVM_REF_invokeInterface << REFERENCE_KIND_SHIFT);
+    receiver_limit = mklass;  // ignore passed-in limit; interfaces are interconvertible
+    vmindex = klassItable::compute_itable_index(m);
+  } else if (mklass != receiver_limit && Klass::cast(mklass)->is_interface()) {
+    flags |= IS_METHOD | (JVM_REF_invokeVirtual << REFERENCE_KIND_SHIFT);
+    // it is a miranda method, so m->vtable_index is not what we want
+    ResourceMark rm;
+    klassVtable* vt = instanceKlass::cast(receiver_limit)->vtable();
+    vmindex = vt->index_of_miranda(m->name(), m->signature());
+  } else if (!do_dispatch || m->can_be_statically_bound()) {
+    flags |= IS_METHOD | (JVM_REF_invokeSpecial << REFERENCE_KIND_SHIFT);
+  } else {
+    flags |= IS_METHOD | (JVM_REF_invokeVirtual << REFERENCE_KIND_SHIFT);
+    vmindex = m->vtable_index();
+  }
+
+  java_lang_invoke_MemberName::set_flags(mname_oop,    flags);
+  java_lang_invoke_MemberName::set_vmtarget(mname_oop, m);
+  java_lang_invoke_MemberName::set_vmindex(mname_oop,  vmindex);   // vtable/itable index
+  java_lang_invoke_MemberName::set_clazz(mname_oop,    Klass::cast(receiver_limit)->java_mirror());
+  // Note:  name and type can be lazily computed by resolve_MemberName,
+  // if Java code needs them as resolved String and MethodType objects.
+  // The clazz must be eagerly stored, because it provides a GC
+  // root to help keep alive the methodOop.
+  // If relevant, the vtable or itable value is stored as vmindex.
+  // This is done eagerly, since it is readily available without
+  // constructing any new objects.
+  // TO DO: maybe intern mname_oop
+  return mname_oop;
+}
+
+Handle MethodHandles::init_method_MemberName(oop mname_oop, CallInfo& info, TRAPS) {
+  Handle empty;
+  if (info.resolved_appendix().not_null()) {
+    // The resolved MemberName must not be accompanied by an appendix argument,
+    // since there is no way to bind this value into the MemberName.
+    // Caller is responsible to prevent this from happening.
+    THROW_MSG_(vmSymbols::java_lang_InternalError(), "appendix", empty);
+  }
+  methodHandle m = info.resolved_method();
+  KlassHandle defc = info.resolved_klass();
+  int vmindex = -1;
+  if (defc->is_interface() && Klass::cast(m->method_holder())->is_interface()) {
+    // LinkResolver does not report itable indexes!  (fix this?)
+    vmindex = klassItable::compute_itable_index(m());
+  } else if (m->can_be_statically_bound()) {
+    // LinkResolver reports vtable index even for final methods!
+    vmindex = methodOopDesc::nonvirtual_vtable_index;
+  } else {
+    vmindex = info.vtable_index();
+  }
+  oop res = init_method_MemberName(mname_oop, m(), (vmindex >= 0), defc());
+  assert(res == NULL || (java_lang_invoke_MemberName::vmindex(res) == vmindex), "");
+  return Handle(THREAD, res);
+}
+
+oop MethodHandles::init_field_MemberName(oop mname_oop, klassOop field_holder,
+                                         AccessFlags mods, oop type, oop name,
+                                         intptr_t offset, bool is_setter) {
+  int flags = (jushort)( mods.as_short() & JVM_RECOGNIZED_FIELD_MODIFIERS );
+  flags |= IS_FIELD | ((mods.is_static() ? JVM_REF_getStatic : JVM_REF_getField) << REFERENCE_KIND_SHIFT);
+  if (is_setter)  flags += ((JVM_REF_putField - JVM_REF_getField) << REFERENCE_KIND_SHIFT);
+  oop vmtarget = field_holder;
+  int vmindex  = offset;  // determines the field uniquely when combined with static bit
+  java_lang_invoke_MemberName::set_flags(mname_oop,    flags);
   java_lang_invoke_MemberName::set_vmtarget(mname_oop, vmtarget);
   java_lang_invoke_MemberName::set_vmindex(mname_oop,  vmindex);
-  java_lang_invoke_MemberName::set_flags(mname_oop,    flags);
-  java_lang_invoke_MemberName::set_clazz(mname_oop,    Klass::cast(m->method_holder())->java_mirror());
+  java_lang_invoke_MemberName::set_clazz(mname_oop,    Klass::cast(field_holder)->java_mirror());
+  if (name != NULL)
+    java_lang_invoke_MemberName::set_name(mname_oop,   name);
+  if (type != NULL)
+    java_lang_invoke_MemberName::set_type(mname_oop,   type);
+  // Note:  name and type can be lazily computed by resolve_MemberName,
+  // if Java code needs them as resolved String and Class objects.
+  // Note that the incoming type oop might be pre-resolved (non-null).
+  // The base clazz and field offset (vmindex) must be eagerly stored,
+  // because they unambiguously identify the field.
+  // Although the fieldDescriptor::_index would also identify the field,
+  // we do not use it, because it is harder to decode.
+  // TO DO: maybe intern mname_oop
+  return mname_oop;
 }
 
-void MethodHandles::init_MemberName(oop mname_oop, klassOop field_holder, AccessFlags mods, int offset) {
-  int flags = (IS_FIELD | (jushort)( mods.as_short() & JVM_RECOGNIZED_FIELD_MODIFIERS ));
-  oop vmtarget = field_holder;
-  int vmindex  = offset;  // determines the field uniquely when combined with static bit
-  assert(vmindex != VM_INDEX_UNINITIALIZED, "bad alias on vmindex");
-  java_lang_invoke_MemberName::set_vmtarget(mname_oop, vmtarget);
-  java_lang_invoke_MemberName::set_vmindex(mname_oop,  vmindex);
-  java_lang_invoke_MemberName::set_flags(mname_oop,    flags);
-  java_lang_invoke_MemberName::set_clazz(mname_oop,    Klass::cast(field_holder)->java_mirror());
+Handle MethodHandles::init_field_MemberName(oop mname_oop, FieldAccessInfo& info, TRAPS) {
+  return Handle();
+#if 0
+  KlassHandle field_holder = info.klass();
+  intptr_t    field_offset = info.field_offset();
+  return init_field_MemberName(mname_oop, field_holder(),
+                               info.access_flags(),
+                               type, name,
+                               field_offset, false /*is_setter*/);
+#endif
 }
 
 
-methodHandle MethodHandles::decode_MemberName(oop mname, KlassHandle& receiver_limit_result, int& decode_flags_result) {
-  methodHandle empty;
-  int flags  = java_lang_invoke_MemberName::flags(mname);
-  if ((flags & (IS_METHOD | IS_CONSTRUCTOR)) == 0)  return empty;  // not invocable
-  oop vmtarget = java_lang_invoke_MemberName::vmtarget(mname);
-  int vmindex  = java_lang_invoke_MemberName::vmindex(mname);
-  if (vmindex == VM_INDEX_UNINITIALIZED)  return empty;  // not resolved
-  methodHandle m = decode_vmtarget(vmtarget, vmindex, NULL, receiver_limit_result, decode_flags_result);
-  oop clazz = java_lang_invoke_MemberName::clazz(mname);
-  if (clazz != NULL && java_lang_Class::is_instance(clazz)) {
-    klassOop klass = java_lang_Class::as_klassOop(clazz);
-    if (klass != NULL)  receiver_limit_result = klass;
+// JVM 2.9 Special Methods:
+// A method is signature polymorphic if and only if all of the following conditions hold :
+// * It is declared in the java.lang.invoke.MethodHandle class.
+// * It has a single formal parameter of type Object[].
+// * It has a return type of Object.
+// * It has the ACC_VARARGS and ACC_NATIVE flags set.
+bool MethodHandles::is_method_handle_invoke_name(klassOop klass, Symbol* name) {
+  if (klass == NULL)
+    return false;
+  // The following test will fail spuriously during bootstrap of MethodHandle itself:
+  //    if (klass != SystemDictionary::MethodHandle_klass())
+  // Test the name instead:
+  if (Klass::cast(klass)->name() != vmSymbols::java_lang_invoke_MethodHandle())
+    return false;
+  Symbol* poly_sig = vmSymbols::object_array_object_signature();
+  methodOop m = instanceKlass::cast(klass)->find_method(name, poly_sig);
+  if (m == NULL)  return false;
+  int required = JVM_ACC_NATIVE | JVM_ACC_VARARGS;
+  int flags = m->access_flags().as_int();
+  return (flags & required) == required;
+}
+
+
+Symbol* MethodHandles::signature_polymorphic_intrinsic_name(vmIntrinsics::ID iid) {
+  assert(is_signature_polymorphic_intrinsic(iid), err_msg("iid=%d", iid));
+  switch (iid) {
+  case vmIntrinsics::_invokeBasic:      return vmSymbols::invokeBasic_name();
+  case vmIntrinsics::_linkToVirtual:    return vmSymbols::linkToVirtual_name();
+  case vmIntrinsics::_linkToStatic:     return vmSymbols::linkToStatic_name();
+  case vmIntrinsics::_linkToSpecial:    return vmSymbols::linkToSpecial_name();
+  case vmIntrinsics::_linkToInterface:  return vmSymbols::linkToInterface_name();
   }
-  return m;
+  assert(false, "");
+  return 0;
 }
 
+int MethodHandles::signature_polymorphic_intrinsic_ref_kind(vmIntrinsics::ID iid) {
+  switch (iid) {
+  case vmIntrinsics::_invokeBasic:      return 0;
+  case vmIntrinsics::_linkToVirtual:    return JVM_REF_invokeVirtual;
+  case vmIntrinsics::_linkToStatic:     return JVM_REF_invokeStatic;
+  case vmIntrinsics::_linkToSpecial:    return JVM_REF_invokeSpecial;
+  case vmIntrinsics::_linkToInterface:  return JVM_REF_invokeInterface;
+  }
+  assert(false, err_msg("iid=%d", iid));
+  return 0;
+}
+
+vmIntrinsics::ID MethodHandles::signature_polymorphic_name_id(Symbol* name) {
+  vmSymbols::SID name_id = vmSymbols::find_sid(name);
+  switch (name_id) {
+  // The ID _invokeGeneric stands for all non-static signature-polymorphic methods, except built-ins.
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(invoke_name):           return vmIntrinsics::_invokeGeneric;
+  // The only built-in non-static signature-polymorphic method is MethodHandle.invokeBasic:
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(invokeBasic_name):      return vmIntrinsics::_invokeBasic;
+
+  // There is one static signature-polymorphic method for each JVM invocation mode.
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(linkToVirtual_name):    return vmIntrinsics::_linkToVirtual;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(linkToStatic_name):     return vmIntrinsics::_linkToStatic;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(linkToSpecial_name):    return vmIntrinsics::_linkToSpecial;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(linkToInterface_name):  return vmIntrinsics::_linkToInterface;
+  }
+
+  // Cover the case of invokeExact and any future variants of invokeFoo.
+  klassOop mh_klass = SystemDictionary::well_known_klass(
+                              SystemDictionary::WK_KLASS_ENUM_NAME(MethodHandle_klass) );
+  if (mh_klass != NULL && is_method_handle_invoke_name(mh_klass, name))
+    return vmIntrinsics::_invokeGeneric;
+
+  // Note: The pseudo-intrinsic _compiledLambdaForm is never linked against.
+  // Instead it is used to mark lambda forms bound to invokehandle or invokedynamic.
+  return vmIntrinsics::_none;
+}
+
+vmIntrinsics::ID MethodHandles::signature_polymorphic_name_id(klassOop klass, Symbol* name) {
+  if (klass != NULL &&
+      Klass::cast(klass)->name() == vmSymbols::java_lang_invoke_MethodHandle()) {
+    vmIntrinsics::ID iid = signature_polymorphic_name_id(name);
+    if (iid != vmIntrinsics::_none)
+      return iid;
+    if (is_method_handle_invoke_name(klass, name))
+      return vmIntrinsics::_invokeGeneric;
+  }
+  return vmIntrinsics::_none;
+}
+
+
 // convert the external string or reflective type to an internal signature
-Symbol* MethodHandles::convert_to_signature(oop type_str, bool polymorphic, TRAPS) {
+Symbol* MethodHandles::lookup_signature(oop type_str, bool intern_if_not_found, TRAPS) {
   if (java_lang_invoke_MethodType::is_instance(type_str)) {
-    return java_lang_invoke_MethodType::as_signature(type_str, polymorphic, CHECK_NULL);
+    return java_lang_invoke_MethodType::as_signature(type_str, intern_if_not_found, CHECK_NULL);
   } else if (java_lang_Class::is_instance(type_str)) {
     return java_lang_Class::as_signature(type_str, false, CHECK_NULL);
   } else if (java_lang_String::is_instance(type_str)) {
-    if (polymorphic) {
+    if (intern_if_not_found) {
       return java_lang_String::as_symbol(type_str, CHECK_NULL);
     } else {
       return java_lang_String::as_symbol_or_null(type_str);
@@ -560,121 +391,303 @@
   }
 }
 
+static const char OBJ_SIG[] = "Ljava/lang/Object;";
+enum { OBJ_SIG_LEN = 18 };
+
+bool MethodHandles::is_basic_type_signature(Symbol* sig) {
+  assert(vmSymbols::object_signature()->utf8_length() == (int)OBJ_SIG_LEN, "");
+  assert(vmSymbols::object_signature()->equals(OBJ_SIG), "");
+  const int len = sig->utf8_length();
+  for (int i = 0; i < len; i++) {
+    switch (sig->byte_at(i)) {
+    case 'L':
+      // only java/lang/Object is valid here
+      if (sig->index_of_at(i, OBJ_SIG, OBJ_SIG_LEN) != i)
+        return false;
+      i += OBJ_SIG_LEN-1;  //-1 because of i++ in loop
+      continue;
+    case '(': case ')': case 'V':
+    case 'I': case 'J': case 'F': case 'D':
+      continue;
+    //case '[':
+    //case 'Z': case 'B': case 'C': case 'S':
+    default:
+      return false;
+    }
+  }
+  return true;
+}
+
+Symbol* MethodHandles::lookup_basic_type_signature(Symbol* sig, bool keep_last_arg, TRAPS) {
+  Symbol* bsig = NULL;
+  if (sig == NULL) {
+    return sig;
+  } else if (is_basic_type_signature(sig)) {
+    sig->increment_refcount();
+    return sig;  // that was easy
+  } else if (sig->byte_at(0) != '(') {
+    BasicType bt = char2type(sig->byte_at(0));
+    if (is_subword_type(bt)) {
+      bsig = vmSymbols::int_signature();
+    } else {
+      assert(bt == T_OBJECT || bt == T_ARRAY, "is_basic_type_signature was false");
+      bsig = vmSymbols::object_signature();
+    }
+  } else {
+    ResourceMark rm;
+    stringStream buffer(128);
+    buffer.put('(');
+    int arg_pos = 0, keep_arg_pos = -1;
+    if (keep_last_arg)
+      keep_arg_pos = ArgumentCount(sig).size() - 1;
+    for (SignatureStream ss(sig); !ss.is_done(); ss.next()) {
+      BasicType bt = ss.type();
+      size_t this_arg_pos = buffer.size();
+      if (ss.at_return_type()) {
+        buffer.put(')');
+      }
+      if (arg_pos == keep_arg_pos) {
+        buffer.write((char*) ss.raw_bytes(),
+                     (int)   ss.raw_length());
+      } else if (bt == T_OBJECT || bt == T_ARRAY) {
+        buffer.write(OBJ_SIG, OBJ_SIG_LEN);
+      } else {
+        if (is_subword_type(bt))
+          bt = T_INT;
+        buffer.put(type2char(bt));
+      }
+      arg_pos++;
+    }
+    const char* sigstr =       buffer.base();
+    int         siglen = (int) buffer.size();
+    bsig = SymbolTable::new_symbol(sigstr, siglen, THREAD);
+  }
+  assert(is_basic_type_signature(bsig) ||
+         // detune assert in case the injected argument is not a basic type:
+         keep_last_arg, "");
+  return bsig;
+}
+
+void MethodHandles::print_as_basic_type_signature_on(outputStream* st,
+                                                     Symbol* sig,
+                                                     bool keep_arrays,
+                                                     bool keep_basic_names) {
+  st = st ? st : tty;
+  int len  = sig->utf8_length();
+  int array = 0;
+  bool prev_type = false;
+  for (int i = 0; i < len; i++) {
+    char ch = sig->byte_at(i);
+    switch (ch) {
+    case '(': case ')':
+      prev_type = false;
+      st->put(ch);
+      continue;
+    case '[':
+      if (!keep_basic_names && keep_arrays)
+        st->put(ch);
+      array++;
+      continue;
+    case 'L':
+      {
+        if (prev_type)  st->put(',');
+        int start = i+1, slash = start;
+        while (++i < len && (ch = sig->byte_at(i)) != ';') {
+          if (ch == '/' || ch == '.' || ch == '$')  slash = i+1;
+        }
+        if (slash < i)  start = slash;
+        if (!keep_basic_names) {
+          st->put('L');
+        } else {
+          for (int j = start; j < i; j++)
+            st->put(sig->byte_at(j));
+          prev_type = true;
+        }
+        break;
+      }
+    default:
+      {
+        if (array && char2type(ch) != T_ILLEGAL && !keep_arrays) {
+          ch = '[';
+          array = 0;
+        }
+        if (prev_type)  st->put(',');
+        const char* n = NULL;
+        if (keep_basic_names)
+          n = type2name(char2type(ch));
+        if (n == NULL) {
+          // unknown letter, or we don't want to know its name
+          st->put(ch);
+        } else {
+          st->print(n);
+          prev_type = true;
+        }
+        break;
+      }
+    }
+    // Switch break goes here to take care of array suffix:
+    if (prev_type) {
+      while (array > 0) {
+        st->print("[]");
+        --array;
+      }
+    }
+    array = 0;
+  }
+}
+
+
+
+static oop object_java_mirror() {
+  return Klass::cast(SystemDictionary::Object_klass())->java_mirror();
+}
+
+static oop field_name_or_null(Symbol* s) {
+  if (s == NULL)  return NULL;
+  return StringTable::lookup(s);
+}
+
+static oop field_signature_type_or_null(Symbol* s) {
+  if (s == NULL)  return NULL;
+  BasicType bt = FieldType::basic_type(s);
+  if (is_java_primitive(bt)) {
+    assert(s->utf8_length() == 1, "");
+    return java_lang_Class::primitive_mirror(bt);
+  }
+  // Here are some more short cuts for common types.
+  // They are optional, since reference types can be resolved lazily.
+  if (bt == T_OBJECT) {
+    if (s == vmSymbols::object_signature()) {
+      return object_java_mirror();
+    } else if (s == vmSymbols::class_signature()) {
+      return Klass::cast(SystemDictionary::Class_klass())->java_mirror();
+    } else if (s == vmSymbols::string_signature()) {
+      return Klass::cast(SystemDictionary::String_klass())->java_mirror();
+    } else {
+      int len = s->utf8_length();
+      if (s->byte_at(0) == 'L' && s->byte_at(len-1) == ';') {
+        TempNewSymbol cname = SymbolTable::probe((const char*)&s->bytes()[1], len-2);
+        if (cname == NULL)  return NULL;
+        klassOop wkk = SystemDictionary::find_well_known_klass(cname);
+        if (wkk == NULL)  return NULL;
+        return Klass::cast(wkk)->java_mirror();
+      }
+    }
+  }
+  return NULL;
+}
+
 // An unresolved member name is a mere symbolic reference.
 // Resolving it plants a vmtarget/vmindex in it,
 // which refers dirctly to JVM internals.
-void MethodHandles::resolve_MemberName(Handle mname, TRAPS) {
+Handle MethodHandles::resolve_MemberName(Handle mname, TRAPS) {
+  Handle empty;
   assert(java_lang_invoke_MemberName::is_instance(mname()), "");
-#ifdef ASSERT
-  // If this assert throws, renegotiate the sentinel value used by the Java code,
-  // so that it is distinct from any valid vtable index value, and any special
-  // values defined in methodOopDesc::VtableIndexFlag.
-  // The point of the slop is to give the Java code and the JVM some room
-  // to independently specify sentinel values.
-  const int sentinel_slop  = 10;
-  const int sentinel_limit = methodOopDesc::highest_unused_vtable_index_value - sentinel_slop;
-  assert(VM_INDEX_UNINITIALIZED < sentinel_limit, "Java sentinel != JVM sentinels");
-#endif
-  if (java_lang_invoke_MemberName::vmindex(mname()) != VM_INDEX_UNINITIALIZED)
-    return;  // already resolved
+
+  if (java_lang_invoke_MemberName::vmtarget(mname()) != NULL) {
+    // Already resolved.
+    DEBUG_ONLY(int vmindex = java_lang_invoke_MemberName::vmindex(mname()));
+    assert(vmindex >= methodOopDesc::nonvirtual_vtable_index, "");
+    return mname;
+  }
+
   Handle defc_oop(THREAD, java_lang_invoke_MemberName::clazz(mname()));
   Handle name_str(THREAD, java_lang_invoke_MemberName::name( mname()));
   Handle type_str(THREAD, java_lang_invoke_MemberName::type( mname()));
   int    flags    =       java_lang_invoke_MemberName::flags(mname());
+  int    ref_kind =       (flags >> REFERENCE_KIND_SHIFT) & REFERENCE_KIND_MASK;
+  if (!ref_kind_is_valid(ref_kind)) {
+    THROW_MSG_(vmSymbols::java_lang_InternalError(), "obsolete MemberName format", empty);
+  }
+
+  DEBUG_ONLY(int old_vmindex);
+  assert((old_vmindex = java_lang_invoke_MemberName::vmindex(mname())) == 0, "clean input");
 
   if (defc_oop.is_null() || name_str.is_null() || type_str.is_null()) {
-    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "nothing to resolve");
+    THROW_MSG_(vmSymbols::java_lang_IllegalArgumentException(), "nothing to resolve", empty);
   }
 
   instanceKlassHandle defc;
   {
     klassOop defc_klassOop = java_lang_Class::as_klassOop(defc_oop());
-    if (defc_klassOop == NULL)  return;  // a primitive; no resolution possible
+    if (defc_klassOop == NULL)  return empty;  // a primitive; no resolution possible
     if (!Klass::cast(defc_klassOop)->oop_is_instance()) {
-      if (!Klass::cast(defc_klassOop)->oop_is_array())  return;
+      if (!Klass::cast(defc_klassOop)->oop_is_array())  return empty;
       defc_klassOop = SystemDictionary::Object_klass();
     }
     defc = instanceKlassHandle(THREAD, defc_klassOop);
   }
   if (defc.is_null()) {
-    THROW_MSG(vmSymbols::java_lang_InternalError(), "primitive class");
+    THROW_MSG_(vmSymbols::java_lang_InternalError(), "primitive class", empty);
   }
-  defc->link_class(CHECK);  // possible safepoint
+  defc->link_class(CHECK_(empty));  // possible safepoint
 
   // convert the external string name to an internal symbol
   TempNewSymbol name = java_lang_String::as_symbol_or_null(name_str());
-  if (name == NULL)  return;  // no such name
+  if (name == NULL)  return empty;  // no such name
   if (name == vmSymbols::class_initializer_name())
-    return; // illegal name
+    return empty; // illegal name
 
-  Handle polymorphic_method_type;
-  bool polymorphic_signature = false;
+  vmIntrinsics::ID mh_invoke_id = vmIntrinsics::_none;
   if ((flags & ALL_KINDS) == IS_METHOD &&
-      (defc() == SystemDictionary::MethodHandle_klass() &&
-       methodOopDesc::is_method_handle_invoke_name(name))) {
-    polymorphic_signature = true;
+      (defc() == SystemDictionary::MethodHandle_klass()) &&
+      (ref_kind == JVM_REF_invokeVirtual ||
+       ref_kind == JVM_REF_invokeSpecial ||
+       // static invocation mode is required for _linkToVirtual, etc.:
+       ref_kind == JVM_REF_invokeStatic)) {
+    vmIntrinsics::ID iid = signature_polymorphic_name_id(name);
+    if (iid != vmIntrinsics::_none &&
+        ((ref_kind == JVM_REF_invokeStatic) == is_signature_polymorphic_static(iid))) {
+      // Virtual methods invoke and invokeExact, plus internal invokers like _invokeBasic.
+      // For a static reference it could an internal linkage routine like _linkToVirtual, etc.
+      mh_invoke_id = iid;
+    }
   }
 
   // convert the external string or reflective type to an internal signature
-  TempNewSymbol type = convert_to_signature(type_str(), polymorphic_signature, CHECK);
-  if (java_lang_invoke_MethodType::is_instance(type_str()) && polymorphic_signature) {
-    polymorphic_method_type = type_str;  // preserve exactly
-  }
-  if (type == NULL)  return;  // no such signature exists in the VM
+  TempNewSymbol type = lookup_signature(type_str(), (mh_invoke_id != vmIntrinsics::_none), CHECK_(empty));
+  if (type == NULL)  return empty;  // no such signature exists in the VM
 
   // Time to do the lookup.
   switch (flags & ALL_KINDS) {
   case IS_METHOD:
     {
       CallInfo result;
+      bool do_dispatch = true;  // default, neutral setting
       {
-        EXCEPTION_MARK;
-        if ((flags & JVM_ACC_STATIC) != 0) {
+        assert(!HAS_PENDING_EXCEPTION, "");
+        if (ref_kind == JVM_REF_invokeStatic) {
+          //do_dispatch = false;  // no need, since statics are never dispatched
           LinkResolver::resolve_static_call(result,
                         defc, name, type, KlassHandle(), false, false, THREAD);
-        } else if (defc->is_interface()) {
+        } else if (ref_kind == JVM_REF_invokeInterface) {
           LinkResolver::resolve_interface_call(result, Handle(), defc,
                         defc, name, type, KlassHandle(), false, false, THREAD);
-        } else {
+        } else if (mh_invoke_id != vmIntrinsics::_none) {
+          assert(!is_signature_polymorphic_static(mh_invoke_id), "");
+          LinkResolver::resolve_handle_call(result,
+                        defc, name, type, KlassHandle(), THREAD);
+        } else if (ref_kind == JVM_REF_invokeSpecial) {
+          do_dispatch = false;  // force non-virtual linkage
+          LinkResolver::resolve_special_call(result,
+                        defc, name, type, KlassHandle(), false, THREAD);
+        } else if (ref_kind == JVM_REF_invokeVirtual) {
           LinkResolver::resolve_virtual_call(result, Handle(), defc,
                         defc, name, type, KlassHandle(), false, false, THREAD);
+        } else {
+          assert(false, err_msg("ref_kind=%d", ref_kind));
         }
         if (HAS_PENDING_EXCEPTION) {
-          CLEAR_PENDING_EXCEPTION;
-          break;  // go to second chance
+          return empty;
         }
       }
-      methodHandle m = result.resolved_method();
-      oop vmtarget = NULL;
-      int vmindex = methodOopDesc::nonvirtual_vtable_index;
-      if (defc->is_interface()) {
-        vmindex = klassItable::compute_itable_index(m());
-        assert(vmindex >= 0, "");
-      } else if (result.has_vtable_index()) {
-        vmindex = result.vtable_index();
-        assert(vmindex >= 0, "");
-      }
-      assert(vmindex != VM_INDEX_UNINITIALIZED, "");
-      if (vmindex < 0) {
-        assert(result.is_statically_bound(), "");
-        vmtarget = m();
-      } else {
-        vmtarget = result.resolved_klass()->as_klassOop();
-      }
-      int mods = (m->access_flags().as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS);
-      java_lang_invoke_MemberName::set_vmtarget(mname(), vmtarget);
-      java_lang_invoke_MemberName::set_vmindex(mname(),  vmindex);
-      java_lang_invoke_MemberName::set_modifiers(mname(), mods);
-      DEBUG_ONLY(KlassHandle junk1; int junk2);
-      assert(decode_MemberName(mname(), junk1, junk2) == result.resolved_method(),
-             "properly stored for later decoding");
-      return;
+      return init_method_MemberName(mname(), result, THREAD);
     }
   case IS_CONSTRUCTOR:
     {
       CallInfo result;
       {
-        EXCEPTION_MARK;
+        assert(!HAS_PENDING_EXCEPTION, "");
         if (name == vmSymbols::object_initializer_name()) {
           LinkResolver::resolve_special_call(result,
                         defc, name, type, KlassHandle(), false, THREAD);
@@ -682,22 +695,11 @@
           break;                // will throw after end of switch
         }
         if (HAS_PENDING_EXCEPTION) {
-          CLEAR_PENDING_EXCEPTION;
-          return;
+          return empty;
         }
       }
       assert(result.is_statically_bound(), "");
-      methodHandle m = result.resolved_method();
-      oop vmtarget = m();
-      int vmindex  = methodOopDesc::nonvirtual_vtable_index;
-      int mods     = (m->access_flags().as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS);
-      java_lang_invoke_MemberName::set_vmtarget(mname(), vmtarget);
-      java_lang_invoke_MemberName::set_vmindex(mname(),  vmindex);
-      java_lang_invoke_MemberName::set_modifiers(mname(), mods);
-      DEBUG_ONLY(KlassHandle junk1; int junk2);
-      assert(decode_MemberName(mname(), junk1, junk2) == result.resolved_method(),
-             "properly stored for later decoding");
-      return;
+      return init_method_MemberName(mname(), result, THREAD);
     }
   case IS_FIELD:
     {
@@ -705,54 +707,20 @@
       fieldDescriptor fd; // find_field initializes fd if found
       KlassHandle sel_klass(THREAD, instanceKlass::cast(defc())->find_field(name, type, &fd));
       // check if field exists; i.e., if a klass containing the field def has been selected
-      if (sel_klass.is_null())  return;
-      oop vmtarget = sel_klass->as_klassOop();
-      int vmindex  = fd.offset();
-      int mods     = (fd.access_flags().as_short() & JVM_RECOGNIZED_FIELD_MODIFIERS);
-      if (vmindex == VM_INDEX_UNINITIALIZED)  break;  // should not happen
-      java_lang_invoke_MemberName::set_vmtarget(mname(),  vmtarget);
-      java_lang_invoke_MemberName::set_vmindex(mname(),   vmindex);
-      java_lang_invoke_MemberName::set_modifiers(mname(), mods);
-      return;
+      if (sel_klass.is_null())  return empty;  // should not happen
+      oop type = field_signature_type_or_null(fd.signature());
+      oop name = field_name_or_null(fd.name());
+      bool is_setter = (ref_kind_is_valid(ref_kind) && ref_kind_is_setter(ref_kind));
+      mname = Handle(THREAD,
+                     init_field_MemberName(mname(), sel_klass->as_klassOop(),
+                                           fd.access_flags(), type, name, fd.offset(), is_setter));
+      return mname;
     }
   default:
-    THROW_MSG(vmSymbols::java_lang_InternalError(), "unrecognized MemberName format");
+    THROW_MSG_(vmSymbols::java_lang_InternalError(), "unrecognized MemberName format", empty);
   }
 
-  // Second chance.
-  if (polymorphic_method_type.not_null()) {
-    // Look on a non-null class loader.
-    Handle cur_class_loader;
-    const int nptypes = java_lang_invoke_MethodType::ptype_count(polymorphic_method_type());
-    for (int i = 0; i <= nptypes; i++) {
-      oop type_mirror;
-      if (i < nptypes)  type_mirror = java_lang_invoke_MethodType::ptype(polymorphic_method_type(), i);
-      else              type_mirror = java_lang_invoke_MethodType::rtype(polymorphic_method_type());
-      klassOop example_type = java_lang_Class::as_klassOop(type_mirror);
-      if (example_type == NULL)  continue;
-      oop class_loader = Klass::cast(example_type)->class_loader();
-      if (class_loader == NULL || class_loader == cur_class_loader())  continue;
-      cur_class_loader = Handle(THREAD, class_loader);
-      methodOop m = SystemDictionary::find_method_handle_invoke(name,
-                                                                type,
-                                                                KlassHandle(THREAD, example_type),
-                                                                THREAD);
-      if (HAS_PENDING_EXCEPTION) {
-        CLEAR_PENDING_EXCEPTION;
-        m = NULL;
-        // try again with a different class loader...
-      }
-      if (m != NULL &&
-          m->is_method_handle_invoke() &&
-          java_lang_invoke_MethodType::equals(polymorphic_method_type(), m->method_handle_type())) {
-        int mods = (m->access_flags().as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS);
-        java_lang_invoke_MemberName::set_vmtarget(mname(),  m);
-        java_lang_invoke_MemberName::set_vmindex(mname(),   m->vtable_index());
-        java_lang_invoke_MemberName::set_modifiers(mname(), mods);
-        return;
-      }
-    }
-  }
+  return empty;
 }
 
 // Conversely, a member name which is only initialized from JVM internals
@@ -763,7 +731,7 @@
   assert(java_lang_invoke_MemberName::is_instance(mname()), "");
   oop vmtarget = java_lang_invoke_MemberName::vmtarget(mname());
   int vmindex  = java_lang_invoke_MemberName::vmindex(mname());
-  if (vmtarget == NULL || vmindex == VM_INDEX_UNINITIALIZED) {
+  if (vmtarget == NULL) {
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "nothing to expand");
   }
 
@@ -784,14 +752,12 @@
   case IS_METHOD:
   case IS_CONSTRUCTOR:
     {
-      KlassHandle receiver_limit; int decode_flags = 0;
-      methodHandle m = decode_vmtarget(vmtarget, vmindex, NULL, receiver_limit, decode_flags);
+      assert(vmtarget->is_method(), "method or constructor vmtarget is methodOop");
+      methodHandle m(THREAD, methodOop(vmtarget));
+      DEBUG_ONLY(vmtarget = NULL);  // safety
       if (m.is_null())  break;
       if (!have_defc) {
         klassOop defc = m->method_holder();
-        if (receiver_limit.not_null() && receiver_limit() != defc
-            && Klass::cast(receiver_limit())->is_subtype_of(defc))
-          defc = receiver_limit();
         java_lang_invoke_MemberName::set_clazz(mname(), Klass::cast(defc)->java_mirror());
       }
       if (!have_name) {
@@ -808,9 +774,10 @@
   case IS_FIELD:
     {
       // This is taken from LinkResolver::resolve_field, sans access checks.
-      if (!vmtarget->is_klass())  break;
+      assert(vmtarget->is_klass(), "field vmtarget is klassOop");
       if (!Klass::cast((klassOop) vmtarget)->oop_is_instance())  break;
       instanceKlassHandle defc(THREAD, (klassOop) vmtarget);
+      DEBUG_ONLY(vmtarget = NULL);  // safety
       bool is_static = ((flags & JVM_ACC_STATIC) != 0);
       fieldDescriptor fd; // find_field initializes fd if found
       if (!defc->find_field_from_offset(vmindex, is_static, &fd))
@@ -824,7 +791,11 @@
         java_lang_invoke_MemberName::set_name(mname(), name());
       }
       if (!have_type) {
-        Handle type = java_lang_String::create_from_symbol(fd.signature(), CHECK);
+        // If it is a primitive field type, don't mess with short strings like "I".
+        Handle type = field_signature_type_or_null(fd.signature());
+        if (type.is_null()) {
+          java_lang_String::create_from_symbol(fd.signature(), CHECK);
+        }
         java_lang_invoke_MemberName::set_type(mname(), type());
       }
       return;
@@ -882,7 +853,13 @@
         oop result = results->obj_at(rfill++);
         if (!java_lang_invoke_MemberName::is_instance(result))
           return -99;  // caller bug!
-        MethodHandles::init_MemberName(result, st.klass()->as_klassOop(), st.access_flags(), st.offset());
+        oop type = field_signature_type_or_null(st.signature());
+        oop name = field_name_or_null(st.name());
+        oop saved = MethodHandles::init_field_MemberName(result, st.klass()->as_klassOop(),
+                                                         st.access_flags(), type, name,
+                                                         st.offset());
+        if (saved != result)
+          results->obj_at_put(rfill-1, saved);  // show saved instance to user
       } else if (++overflow >= overflow_limit) {
         match_flags = 0; break; // got tired of looking at overflow
       }
@@ -930,7 +907,9 @@
         oop result = results->obj_at(rfill++);
         if (!java_lang_invoke_MemberName::is_instance(result))
           return -99;  // caller bug!
-        MethodHandles::init_MemberName(result, m, true);
+        oop saved = MethodHandles::init_method_MemberName(result, m, true, NULL);
+        if (saved != result)
+          results->obj_at_put(rfill-1, saved);  // show saved instance to user
       } else if (++overflow >= overflow_limit) {
         match_flags = 0; break; // got tired of looking at overflow
       }
@@ -941,1925 +920,16 @@
   return rfill + overflow;
 }
 
-
-// Decode this java.lang.Class object into an instanceKlass, if possible.
-// Throw IAE if not
-instanceKlassHandle MethodHandles::resolve_instance_klass(oop java_mirror_oop, TRAPS) {
-  instanceKlassHandle empty;
-  klassOop caller = NULL;
-  if (java_lang_Class::is_instance(java_mirror_oop)) {
-    caller = java_lang_Class::as_klassOop(java_mirror_oop);
-  }
-  if (caller == NULL || !Klass::cast(caller)->oop_is_instance()) {
-    THROW_MSG_(vmSymbols::java_lang_IllegalArgumentException(), "not a class", empty);
-  }
-  return instanceKlassHandle(THREAD, caller);
-}
-
-
-
-// Decode the vmtarget field of a method handle.
-// Sanitize out methodOops, klassOops, and any other non-Java data.
-// This is for debugging and reflection.
-oop MethodHandles::encode_target(Handle mh, int format, TRAPS) {
-  assert(java_lang_invoke_MethodHandle::is_instance(mh()), "must be a MH");
-  if (format == ETF_FORCE_DIRECT_HANDLE ||
-      format == ETF_COMPILE_DIRECT_HANDLE) {
-    // Internal function for stress testing.
-    Handle mt = java_lang_invoke_MethodHandle::type(mh());
-    int invocation_count = 10000;
-    TempNewSymbol signature = java_lang_invoke_MethodType::as_signature(mt(), true, CHECK_NULL);
-    bool omit_receiver_argument = true;
-    MethodHandleCompiler mhc(mh, vmSymbols::invoke_name(), signature, invocation_count, omit_receiver_argument, CHECK_NULL);
-    methodHandle m = mhc.compile(CHECK_NULL);
-    if (StressMethodHandleWalk && Verbose || PrintMiscellaneous) {
-      tty->print_cr("MethodHandleNatives.getTarget(%s)",
-                    format == ETF_FORCE_DIRECT_HANDLE ? "FORCE_DIRECT" : "COMPILE_DIRECT");
-      if (Verbose) {
-        m->print_codes();
-      }
-    }
-    if (StressMethodHandleWalk) {
-      InterpreterOopMap mask;
-      OopMapCache::compute_one_oop_map(m, m->code_size() - 1, &mask);
-    }
-    if ((format == ETF_COMPILE_DIRECT_HANDLE ||
-         CompilationPolicy::must_be_compiled(m))
-        && !instanceKlass::cast(m->method_holder())->is_not_initialized()
-        && CompilationPolicy::can_be_compiled(m)) {
-      // Force compilation
-      CompileBroker::compile_method(m, InvocationEntryBci,
-                                    CompilationPolicy::policy()->initial_compile_level(),
-                                    methodHandle(), 0, "MethodHandleNatives.getTarget",
-                                    CHECK_NULL);
-    }
-    // Now wrap m in a DirectMethodHandle.
-    instanceKlassHandle dmh_klass(THREAD, SystemDictionary::DirectMethodHandle_klass());
-    Handle dmh = dmh_klass->allocate_instance_handle(CHECK_NULL);
-    JavaValue ignore_result(T_VOID);
-    Symbol* init_name = vmSymbols::object_initializer_name();
-    Symbol* init_sig  = vmSymbols::notifyGenericMethodType_signature();
-    JavaCalls::call_special(&ignore_result, dmh,
-                            SystemDictionaryHandles::MethodHandle_klass(), init_name, init_sig,
-                            java_lang_invoke_MethodHandle::type(mh()), CHECK_NULL);
-    MethodHandles::init_DirectMethodHandle(dmh, m, false, CHECK_NULL);
-    return dmh();
-  }
-  if (format == ETF_HANDLE_OR_METHOD_NAME) {
-    oop target = java_lang_invoke_MethodHandle::vmtarget(mh());
-    if (target == NULL) {
-      return NULL;                // unformed MH
-    }
-    klassOop tklass = target->klass();
-    if (Klass::cast(tklass)->is_subclass_of(SystemDictionary::Object_klass())) {
-      return target;              // target is another MH (or something else?)
-    }
-  }
-  if (format == ETF_DIRECT_HANDLE) {
-    oop target = mh();
-    for (;;) {
-      if (target->klass() == SystemDictionary::DirectMethodHandle_klass()) {
-        return target;
-      }
-      if (!java_lang_invoke_MethodHandle::is_instance(target)){
-        return NULL;                // unformed MH
-      }
-      target = java_lang_invoke_MethodHandle::vmtarget(target);
-    }
-  }
-  // cases of metadata in MH.vmtarget:
-  // - AMH can have methodOop for static invoke with bound receiver
-  // - DMH can have methodOop for static invoke (on variable receiver)
-  // - DMH can have klassOop for dispatched (non-static) invoke
-  KlassHandle receiver_limit; int decode_flags = 0;
-  methodHandle m = decode_MethodHandle(mh(), receiver_limit, decode_flags);
-  if (m.is_null())  return NULL;
-  switch (format) {
-  case ETF_REFLECT_METHOD:
-    // same as jni_ToReflectedMethod:
-    if (m->is_initializer()) {
-      return Reflection::new_constructor(m, THREAD);
-    } else {
-      return Reflection::new_method(m, UseNewReflection, false, THREAD);
-    }
-
-  case ETF_HANDLE_OR_METHOD_NAME:   // method, not handle
-  case ETF_METHOD_NAME:
-    {
-      if (SystemDictionary::MemberName_klass() == NULL)  break;
-      instanceKlassHandle mname_klass(THREAD, SystemDictionary::MemberName_klass());
-      mname_klass->initialize(CHECK_NULL);
-      Handle mname = mname_klass->allocate_instance_handle(CHECK_NULL);  // possible safepoint
-      java_lang_invoke_MemberName::set_vmindex(mname(), VM_INDEX_UNINITIALIZED);
-      bool do_dispatch = ((decode_flags & MethodHandles::_dmf_does_dispatch) != 0);
-      init_MemberName(mname(), m(), do_dispatch);
-      expand_MemberName(mname, 0, CHECK_NULL);
-      return mname();
-    }
-  }
-
-  // Unknown format code.
-  char msg[50];
-  jio_snprintf(msg, sizeof(msg), "unknown getTarget format=%d", format);
-  THROW_MSG_NULL(vmSymbols::java_lang_IllegalArgumentException(), msg);
-}
-
-static const char* always_null_names[] = {
-  "java/lang/Void",
-  "java/lang/Null",
-  //"java/lang/Nothing",
-  "sun/dyn/empty/Empty",
-  "sun/invoke/empty/Empty",
-  NULL
-};
-
-static bool is_always_null_type(klassOop klass) {
-  if (klass == NULL)  return false;  // safety
-  if (!Klass::cast(klass)->oop_is_instance())  return false;
-  instanceKlass* ik = instanceKlass::cast(klass);
-  // Must be on the boot class path:
-  if (ik->class_loader() != NULL)  return false;
-  // Check the name.
-  Symbol* name = ik->name();
-  for (int i = 0; ; i++) {
-    const char* test_name = always_null_names[i];
-    if (test_name == NULL)  break;
-    if (name->equals(test_name))
-      return true;
-  }
-  return false;
-}
-
-bool MethodHandles::class_cast_needed(klassOop src, klassOop dst) {
-  if (dst == NULL)  return true;
-  if (src == NULL)  return (dst != SystemDictionary::Object_klass());
-  if (src == dst || dst == SystemDictionary::Object_klass())
-    return false;                               // quickest checks
-  Klass* srck = Klass::cast(src);
-  Klass* dstk = Klass::cast(dst);
-  if (dstk->is_interface()) {
-    // interface receivers can safely be viewed as untyped,
-    // because interface calls always include a dynamic check
-    //dstk = Klass::cast(SystemDictionary::Object_klass());
-    return false;
-  }
-  if (srck->is_interface()) {
-    // interface arguments must be viewed as untyped
-    //srck = Klass::cast(SystemDictionary::Object_klass());
-    return true;
-  }
-  if (is_always_null_type(src)) {
-    // some source types are known to be never instantiated;
-    // they represent references which are always null
-    // such null references never fail to convert safely
-    return false;
-  }
-  return !srck->is_subclass_of(dstk->as_klassOop());
-}
-
-static oop object_java_mirror() {
-  return Klass::cast(SystemDictionary::Object_klass())->java_mirror();
-}
-
-bool MethodHandles::is_float_fixed_reinterpretation_cast(BasicType src, BasicType dst) {
-  if (src == T_FLOAT)   return dst == T_INT;
-  if (src == T_INT)     return dst == T_FLOAT;
-  if (src == T_DOUBLE)  return dst == T_LONG;
-  if (src == T_LONG)    return dst == T_DOUBLE;
-  return false;
-}
-
-bool MethodHandles::same_basic_type_for_arguments(BasicType src,
-                                                  BasicType dst,
-                                                  bool raw,
-                                                  bool for_return) {
-  if (for_return) {
-    // return values can always be forgotten:
-    if (dst == T_VOID)  return true;
-    if (src == T_VOID)  return raw && (dst == T_INT);
-    // We allow caller to receive a garbage int, which is harmless.
-    // This trick is pulled by trusted code (see VerifyType.canPassRaw).
-  }
-  assert(src != T_VOID && dst != T_VOID, "should not be here");
-  if (src == dst)  return true;
-  if (type2size[src] != type2size[dst])  return false;
-  if (src == T_OBJECT || dst == T_OBJECT)  return false;
-  if (raw)  return true;  // bitwise reinterpretation; caller guarantees safety
-  // allow reinterpretation casts for integral widening
-  if (is_subword_type(src)) { // subwords can fit in int or other subwords
-    if (dst == T_INT)         // any subword fits in an int
-      return true;
-    if (src == T_BOOLEAN)     // boolean fits in any subword
-      return is_subword_type(dst);
-    if (src == T_BYTE && dst == T_SHORT)
-      return true;            // remaining case: byte fits in short
-  }
-  // allow float/fixed reinterpretation casts
-  if (is_float_fixed_reinterpretation_cast(src, dst))
-    return true;
-  return false;
-}
-
-const char* MethodHandles::check_method_receiver(methodOop m,
-                                                 klassOop passed_recv_type) {
-  assert(!m->is_static(), "caller resp.");
-  if (passed_recv_type == NULL)
-    return "receiver type is primitive";
-  if (class_cast_needed(passed_recv_type, m->method_holder())) {
-    Klass* formal = Klass::cast(m->method_holder());
-    return SharedRuntime::generate_class_cast_message("receiver type",
-                                                      formal->external_name());
-  }
-  return NULL;                  // checks passed
-}
-
-// Verify that m's signature can be called type-safely by a method handle
-// of the given method type 'mtype'.
-// It takes a TRAPS argument because it must perform symbol lookups.
-void MethodHandles::verify_method_signature(methodHandle m,
-                                            Handle mtype,
-                                            int first_ptype_pos,
-                                            KlassHandle insert_ptype,
-                                            TRAPS) {
-  Handle mhi_type;
-  if (m->is_method_handle_invoke()) {
-    // use this more exact typing instead of the symbolic signature:
-    mhi_type = Handle(THREAD, m->method_handle_type());
-  }
-  objArrayHandle ptypes(THREAD, java_lang_invoke_MethodType::ptypes(mtype()));
-  int pnum = first_ptype_pos;
-  int pmax = ptypes->length();
-  int anum = 0;                 // method argument
-  const char* err = NULL;
-  ResourceMark rm(THREAD);
-  for (SignatureStream ss(m->signature()); !ss.is_done(); ss.next()) {
-    oop ptype_oop = NULL;
-    if (ss.at_return_type()) {
-      if (pnum != pmax)
-        { err = "too many arguments"; break; }
-      ptype_oop = java_lang_invoke_MethodType::rtype(mtype());
-    } else {
-      if (pnum >= pmax)
-        { err = "not enough arguments"; break; }
-      if (pnum >= 0)
-        ptype_oop = ptypes->obj_at(pnum);
-      else if (insert_ptype.is_null())
-        ptype_oop = NULL;
-      else
-        ptype_oop = insert_ptype->java_mirror();
-      pnum += 1;
-      anum += 1;
-    }
-    KlassHandle pklass;
-    BasicType   ptype = T_OBJECT;
-    bool   have_ptype = false;
-    // missing ptype_oop does not match any non-reference; use Object to report the error
-    pklass = SystemDictionaryHandles::Object_klass();
-    if (ptype_oop != NULL) {
-      have_ptype = true;
-      klassOop pklass_oop = NULL;
-      ptype = java_lang_Class::as_BasicType(ptype_oop, &pklass_oop);
-      pklass = KlassHandle(THREAD, pklass_oop);
-    }
-    ptype_oop = NULL; //done with this
-    KlassHandle aklass;
-    BasicType   atype = ss.type();
-    if (atype == T_ARRAY)  atype = T_OBJECT; // fold all refs to T_OBJECT
-    if (atype == T_OBJECT) {
-      if (!have_ptype) {
-        // null matches any reference
-        continue;
-      }
-      if (mhi_type.is_null()) {
-        // If we fail to resolve types at this point, we will usually throw an error.
-        TempNewSymbol name = ss.as_symbol_or_null();
-        if (name != NULL) {
-          instanceKlass* mk = instanceKlass::cast(m->method_holder());
-          Handle loader(THREAD, mk->class_loader());
-          Handle domain(THREAD, mk->protection_domain());
-          klassOop aklass_oop = SystemDictionary::resolve_or_null(name, loader, domain, CHECK);
-          if (aklass_oop != NULL)
-            aklass = KlassHandle(THREAD, aklass_oop);
-          if (aklass.is_null() &&
-              pklass.not_null() &&
-              loader.is_null() &&
-              pklass->name() == name)
-            // accept name equivalence here, since that's the best we can do
-            aklass = pklass;
-        }
-      } else {
-        // for method handle invokers we don't look at the name in the signature
-        oop atype_oop;
-        if (ss.at_return_type())
-          atype_oop = java_lang_invoke_MethodType::rtype(mhi_type());
-        else
-          atype_oop = java_lang_invoke_MethodType::ptype(mhi_type(), anum-1);
-        klassOop aklass_oop = NULL;
-        atype = java_lang_Class::as_BasicType(atype_oop, &aklass_oop);
-        aklass = KlassHandle(THREAD, aklass_oop);
-      }
-    }
-    if (!ss.at_return_type()) {
-      err = check_argument_type_change(ptype, pklass(), atype, aklass(), anum);
-    } else {
-      err = check_return_type_change(atype, aklass(), ptype, pklass()); // note reversal!
-    }
-    if (err != NULL)  break;
-  }
-
-  if (err != NULL) {
-#ifndef PRODUCT
-    if (PrintMiscellaneous && (Verbose || WizardMode)) {
-      tty->print("*** verify_method_signature failed: ");
-      java_lang_invoke_MethodType::print_signature(mtype(), tty);
-      tty->cr();
-      tty->print_cr("    first_ptype_pos = %d, insert_ptype = "UINTX_FORMAT, first_ptype_pos, insert_ptype());
-      tty->print("    Failing method: ");
-      m->print();
-    }
-#endif //PRODUCT
-    THROW_MSG(vmSymbols::java_lang_InternalError(), err);
-  }
-}
-
-// Main routine for verifying the MethodHandle.type of a proposed
-// direct or bound-direct method handle.
-void MethodHandles::verify_method_type(methodHandle m,
-                                       Handle mtype,
-                                       bool has_bound_recv,
-                                       KlassHandle bound_recv_type,
-                                       TRAPS) {
-  bool m_needs_receiver = !m->is_static();
-
-  const char* err = NULL;
-
-  int first_ptype_pos = m_needs_receiver ? 1 : 0;
-  if (has_bound_recv) {
-    first_ptype_pos -= 1;  // ptypes do not include the bound argument; start earlier in them
-    if (m_needs_receiver && bound_recv_type.is_null())
-      { err = "bound receiver is not an object"; goto die; }
-  }
-
-  if (m_needs_receiver && err == NULL) {
-    objArrayOop ptypes = java_lang_invoke_MethodType::ptypes(mtype());
-    if (ptypes->length() < first_ptype_pos)
-      { err = "receiver argument is missing"; goto die; }
-    if (has_bound_recv)
-      err = check_method_receiver(m(), bound_recv_type->as_klassOop());
-    else
-      err = check_method_receiver(m(), java_lang_Class::as_klassOop(ptypes->obj_at(first_ptype_pos-1)));
-    if (err != NULL)  goto die;
-  }
-
-  // Check the other arguments for mistypes.
-  verify_method_signature(m, mtype, first_ptype_pos, bound_recv_type, CHECK);
-  return;
-
- die:
-  THROW_MSG(vmSymbols::java_lang_InternalError(), err);
-}
-
-void MethodHandles::verify_vmslots(Handle mh, TRAPS) {
-  // Verify vmslots.
-  int check_slots = argument_slot_count(java_lang_invoke_MethodHandle::type(mh()));
-  if (java_lang_invoke_MethodHandle::vmslots(mh()) != check_slots) {
-    THROW_MSG(vmSymbols::java_lang_InternalError(), "bad vmslots in BMH");
-  }
-}
-
-void MethodHandles::verify_vmargslot(Handle mh, int argnum, int argslot, TRAPS) {
-  // Verify that argslot points at the given argnum.
-  int check_slot = argument_slot(java_lang_invoke_MethodHandle::type(mh()), argnum);
-  if (argslot != check_slot || argslot < 0) {
-    ResourceMark rm;
-    const char* fmt = "for argnum of %d, vmargslot is %d, should be %d";
-    size_t msglen = strlen(fmt) + 3*11 + 1;
-    char* msg = NEW_RESOURCE_ARRAY(char, msglen);
-    jio_snprintf(msg, msglen, fmt, argnum, argslot, check_slot);
-    THROW_MSG(vmSymbols::java_lang_InternalError(), msg);
-  }
-}
-
-// Verify the correspondence between two method types.
-// Apart from the advertised changes, caller method type X must
-// be able to invoke the callee method Y type with no violations
-// of type integrity.
-// Return NULL if all is well, else a short error message.
-const char* MethodHandles::check_method_type_change(oop src_mtype, int src_beg, int src_end,
-                                                    int insert_argnum, oop insert_type,
-                                                    int change_argnum, oop change_type,
-                                                    int delete_argnum,
-                                                    oop dst_mtype, int dst_beg, int dst_end,
-                                                    bool raw) {
-  objArrayOop src_ptypes = java_lang_invoke_MethodType::ptypes(src_mtype);
-  objArrayOop dst_ptypes = java_lang_invoke_MethodType::ptypes(dst_mtype);
-
-  int src_max = src_ptypes->length();
-  int dst_max = dst_ptypes->length();
-
-  if (src_end == -1)  src_end = src_max;
-  if (dst_end == -1)  dst_end = dst_max;
-
-  assert(0 <= src_beg && src_beg <= src_end && src_end <= src_max, "oob");
-  assert(0 <= dst_beg && dst_beg <= dst_end && dst_end <= dst_max, "oob");
-
-  // pending actions; set to -1 when done:
-  int ins_idx = insert_argnum, chg_idx = change_argnum, del_idx = delete_argnum;
-
-  const char* err = NULL;
-
-  // Walk along each array of parameter types, including a virtual
-  // NULL end marker at the end of each.
-  for (int src_idx = src_beg, dst_idx = dst_beg;
-       (src_idx <= src_end && dst_idx <= dst_end);
-       src_idx++, dst_idx++) {
-    oop src_type = (src_idx == src_end) ? oop(NULL) : src_ptypes->obj_at(src_idx);
-    oop dst_type = (dst_idx == dst_end) ? oop(NULL) : dst_ptypes->obj_at(dst_idx);
-    bool fix_null_src_type = false;
-
-    // Perform requested edits.
-    if (ins_idx == src_idx) {
-      // note that the inserted guy is never affected by a change or deletion
-      ins_idx = -1;
-      src_type = insert_type;
-      fix_null_src_type = true;
-      --src_idx;                // back up to process src type on next loop
-      src_idx = src_end;
-    } else {
-      // note that the changed guy can be immediately deleted
-      if (chg_idx == src_idx) {
-        chg_idx = -1;
-        assert(src_idx < src_end, "oob");
-        src_type = change_type;
-        fix_null_src_type = true;
-      }
-      if (del_idx == src_idx) {
-        del_idx = -1;
-        assert(src_idx < src_end, "oob");
-        --dst_idx;
-        continue;               // rerun loop after skipping this position
-      }
-    }
-
-    if (src_type == NULL && fix_null_src_type)
-      // explicit null in this case matches any dest reference
-      src_type = (java_lang_Class::is_primitive(dst_type) ? object_java_mirror() : dst_type);
-
-    // Compare the two argument types.
-    if (src_type != dst_type) {
-      if (src_type == NULL)  return "not enough arguments";
-      if (dst_type == NULL)  return "too many arguments";
-      err = check_argument_type_change(src_type, dst_type, dst_idx, raw);
-      if (err != NULL)  return err;
-    }
-  }
-
-  // Now compare return types also.
-  oop src_rtype = java_lang_invoke_MethodType::rtype(src_mtype);
-  oop dst_rtype = java_lang_invoke_MethodType::rtype(dst_mtype);
-  if (src_rtype != dst_rtype) {
-    err = check_return_type_change(dst_rtype, src_rtype, raw); // note reversal!
-    if (err != NULL)  return err;
-  }
-
-  assert(err == NULL, "");
-  return NULL;  // all is well
-}
-
-
-const char* MethodHandles::check_argument_type_change(BasicType src_type,
-                                                      klassOop src_klass,
-                                                      BasicType dst_type,
-                                                      klassOop dst_klass,
-                                                      int argnum,
-                                                      bool raw) {
-  const char* err = NULL;
-  const bool for_return = (argnum < 0);
-
-  // just in case:
-  if (src_type == T_ARRAY)  src_type = T_OBJECT;
-  if (dst_type == T_ARRAY)  dst_type = T_OBJECT;
-
-  // Produce some nice messages if VerifyMethodHandles is turned on:
-  if (!same_basic_type_for_arguments(src_type, dst_type, raw, for_return)) {
-    if (src_type == T_OBJECT) {
-      if (raw && is_java_primitive(dst_type))
-        return NULL;    // ref-to-prim discards ref and returns zero
-      err = (!for_return
-             ? "type mismatch: passing a %s for method argument #%d, which expects primitive %s"
-             : "type mismatch: returning a %s, but caller expects primitive %s");
-    } else if (dst_type == T_OBJECT) {
-      err = (!for_return
-             ? "type mismatch: passing a primitive %s for method argument #%d, which expects %s"
-             : "type mismatch: returning a primitive %s, but caller expects %s");
-    } else {
-      err = (!for_return
-             ? "type mismatch: passing a %s for method argument #%d, which expects %s"
-             : "type mismatch: returning a %s, but caller expects %s");
-    }
-  } else if (src_type == T_OBJECT && dst_type == T_OBJECT &&
-             class_cast_needed(src_klass, dst_klass)) {
-    if (!class_cast_needed(dst_klass, src_klass)) {
-      if (raw)
-        return NULL;    // reverse cast is OK; the MH target is trusted to enforce it
-      err = (!for_return
-             ? "cast required: passing a %s for method argument #%d, which expects %s"
-             : "cast required: returning a %s, but caller expects %s");
-    } else {
-      err = (!for_return
-             ? "reference mismatch: passing a %s for method argument #%d, which expects %s"
-             : "reference mismatch: returning a %s, but caller expects %s");
-    }
-  } else {
-    // passed the obstacle course
-    return NULL;
-  }
-
-  // format, format, format
-  const char* src_name = type2name(src_type);
-  const char* dst_name = type2name(dst_type);
-  if (src_name == NULL)  src_name = "unknown type";
-  if (dst_name == NULL)  dst_name = "unknown type";
-  if (src_type == T_OBJECT)
-    src_name = (src_klass != NULL) ? Klass::cast(src_klass)->external_name() : "an unresolved class";
-  if (dst_type == T_OBJECT)
-    dst_name = (dst_klass != NULL) ? Klass::cast(dst_klass)->external_name() : "an unresolved class";
-
-  size_t msglen = strlen(err) + strlen(src_name) + strlen(dst_name) + (argnum < 10 ? 1 : 11);
-  char* msg = NEW_RESOURCE_ARRAY(char, msglen + 1);
-  if (!for_return) {
-    assert(strstr(err, "%d") != NULL, "");
-    jio_snprintf(msg, msglen, err, src_name, argnum, dst_name);
-  } else {
-    assert(strstr(err, "%d") == NULL, "");
-    jio_snprintf(msg, msglen, err, src_name,         dst_name);
-  }
-  return msg;
-}
-
-// Compute the depth within the stack of the given argument, i.e.,
-// the combined size of arguments to the right of the given argument.
-// For the last argument (ptypes.length-1) this will be zero.
-// For the first argument (0) this will be the size of all
-// arguments but that one.  For the special number -1, this
-// will be the size of all arguments, including the first.
-// If the argument is neither -1 nor a valid argument index,
-// then return a negative number.  Otherwise, the result
-// is in the range [0..vmslots] inclusive.
-int MethodHandles::argument_slot(oop method_type, int arg) {
-  objArrayOop ptypes = java_lang_invoke_MethodType::ptypes(method_type);
-  int argslot = 0;
-  int len = ptypes->length();
-  if (arg < -1 || arg >= len)  return -99;
-  for (int i = len-1; i > arg; i--) {
-    BasicType bt = java_lang_Class::as_BasicType(ptypes->obj_at(i));
-    argslot += type2size[bt];
-  }
-  assert(argument_slot_to_argnum(method_type, argslot) == arg, "inverse works");
-  return argslot;
-}
-
-// Given a slot number, return the argument number.
-int MethodHandles::argument_slot_to_argnum(oop method_type, int query_argslot) {
-  objArrayOop ptypes = java_lang_invoke_MethodType::ptypes(method_type);
-  int argslot = 0;
-  int len = ptypes->length();
-  for (int i = len-1; i >= 0; i--) {
-    if (query_argslot == argslot)  return i;
-    BasicType bt = java_lang_Class::as_BasicType(ptypes->obj_at(i));
-    argslot += type2size[bt];
-  }
-  // return pseudo-arg deepest in stack:
-  if (query_argslot == argslot)  return -1;
-  return -99;                   // oob slot, or splitting a double-slot arg
-}
-
-methodHandle MethodHandles::dispatch_decoded_method(methodHandle m,
-                                                    KlassHandle receiver_limit,
-                                                    int decode_flags,
-                                                    KlassHandle receiver_klass,
-                                                    TRAPS) {
-  assert((decode_flags & ~_DMF_DIRECT_MASK) == 0, "must be direct method reference");
-  assert((decode_flags & _dmf_has_receiver) != 0, "must have a receiver or first reference argument");
-
-  if (!m->is_static() &&
-      (receiver_klass.is_null() || !receiver_klass->is_subtype_of(m->method_holder())))
-    // given type does not match class of method, or receiver is null!
-    // caller should have checked this, but let's be extra careful...
-    return methodHandle();
-
-  if (receiver_limit.not_null() &&
-      (receiver_klass.not_null() && !receiver_klass->is_subtype_of(receiver_limit())))
-    // given type is not limited to the receiver type
-    // note that a null receiver can match any reference value, for a static method
-    return methodHandle();
-
-  if (!(decode_flags & MethodHandles::_dmf_does_dispatch)) {
-    // pre-dispatched or static method (null receiver is OK for static)
-    return m;
-
-  } else if (receiver_klass.is_null()) {
-    // null receiver value; cannot dispatch
-    return methodHandle();
-
-  } else if (!(decode_flags & MethodHandles::_dmf_from_interface)) {
-    // perform virtual dispatch
-    int vtable_index = m->vtable_index();
-    guarantee(vtable_index >= 0, "valid vtable index");
-
-    // receiver_klass might be an arrayKlassOop but all vtables start at
-    // the same place. The cast is to avoid virtual call and assertion.
-    // See also LinkResolver::runtime_resolve_virtual_method.
-    instanceKlass* inst = (instanceKlass*)Klass::cast(receiver_klass());
-    DEBUG_ONLY(inst->verify_vtable_index(vtable_index));
-    methodOop m_oop = inst->method_at_vtable(vtable_index);
-    return methodHandle(THREAD, m_oop);
-
-  } else {
-    // perform interface dispatch
-    int itable_index = klassItable::compute_itable_index(m());
-    guarantee(itable_index >= 0, "valid itable index");
-    instanceKlass* inst = instanceKlass::cast(receiver_klass());
-    methodOop m_oop = inst->method_at_itable(m->method_holder(), itable_index, THREAD);
-    return methodHandle(THREAD, m_oop);
-  }
-}
-
-void MethodHandles::verify_DirectMethodHandle(Handle mh, methodHandle m, TRAPS) {
-  // Verify type.
-  Handle mtype(THREAD, java_lang_invoke_MethodHandle::type(mh()));
-  verify_method_type(m, mtype, false, KlassHandle(), CHECK);
-
-  // Verify vmslots.
-  if (java_lang_invoke_MethodHandle::vmslots(mh()) != m->size_of_parameters()) {
-    THROW_MSG(vmSymbols::java_lang_InternalError(), "bad vmslots in DMH");
-  }
-}
-
-void MethodHandles::init_DirectMethodHandle(Handle mh, methodHandle m, bool do_dispatch, TRAPS) {
-  // Check arguments.
-  if (mh.is_null() || m.is_null() ||
-      (!do_dispatch && m->is_abstract())) {
-    THROW(vmSymbols::java_lang_InternalError());
-  }
-
-  if (VerifyMethodHandles) {
-    // The privileged code which invokes this routine should not make
-    // a mistake about types, but it's better to verify.
-    verify_DirectMethodHandle(mh, m, CHECK);
-  }
-
-  // Finally, after safety checks are done, link to the target method.
-  // We will follow the same path as the latter part of
-  // InterpreterRuntime::resolve_invoke(), which first finds the method
-  // and then decides how to populate the constant pool cache entry
-  // that links the interpreter calls to the method.  We need the same
-  // bits, and will use the same calling sequence code.
-
-  int    vmindex = methodOopDesc::garbage_vtable_index;
-  Handle vmtarget;
-
-  instanceKlass::cast(m->method_holder())->link_class(CHECK);
-
-  MethodHandleEntry* me = NULL;
-  if (do_dispatch && Klass::cast(m->method_holder())->is_interface()) {
-    // We are simulating an invokeinterface instruction.
-    // (We might also be simulating an invokevirtual on a miranda method,
-    // but it is safe to treat it as an invokeinterface.)
-    assert(!m->can_be_statically_bound(), "no final methods on interfaces");
-    vmindex = klassItable::compute_itable_index(m());
-    assert(vmindex >= 0, "(>=0) == do_dispatch");
-    // Set up same bits as ConstantPoolCacheEntry::set_interface_call().
-    vmtarget = m->method_holder(); // the interface
-    me = MethodHandles::entry(MethodHandles::_invokeinterface_mh);
-  } else if (!do_dispatch || m->can_be_statically_bound()) {
-    // We are simulating an invokestatic or invokespecial instruction.
-    // Set up the method pointer, just like ConstantPoolCacheEntry::set_method().
-    vmtarget = m;
-    // this does not help dispatch, but it will make it possible to parse this MH:
-    vmindex  = methodOopDesc::nonvirtual_vtable_index;
-    assert(vmindex < 0, "(>=0) == do_dispatch");
-    if (!m->is_static()) {
-      me = MethodHandles::entry(MethodHandles::_invokespecial_mh);
-    } else {
-      me = MethodHandles::entry(MethodHandles::_invokestatic_mh);
-      // Part of the semantics of a static call is an initialization barrier.
-      // For a DMH, it is done now, when the handle is created.
-      Klass* k = Klass::cast(m->method_holder());
-      if (k->should_be_initialized()) {
-        k->initialize(CHECK);  // possible safepoint
-      }
-    }
-  } else {
-    // We are simulating an invokevirtual instruction.
-    // Set up the vtable index, just like ConstantPoolCacheEntry::set_method().
-    // The key logic is LinkResolver::runtime_resolve_virtual_method.
-    vmindex  = m->vtable_index();
-    vmtarget = m->method_holder();
-    me = MethodHandles::entry(MethodHandles::_invokevirtual_mh);
-  }
-
-  if (me == NULL) { THROW(vmSymbols::java_lang_InternalError()); }
-
-  java_lang_invoke_DirectMethodHandle::set_vmtarget(mh(), vmtarget());
-  java_lang_invoke_DirectMethodHandle::set_vmindex( mh(), vmindex);
-  DEBUG_ONLY(KlassHandle rlimit; int flags);
-  assert(MethodHandles::decode_method(mh(), rlimit, flags) == m,
-         "properly stored for later decoding");
-  DEBUG_ONLY(bool actual_do_dispatch = ((flags & _dmf_does_dispatch) != 0));
-  assert(!(actual_do_dispatch && !do_dispatch),
-         "do not perform dispatch if !do_dispatch specified");
-  assert(actual_do_dispatch == (vmindex >= 0), "proper later decoding of do_dispatch");
-  assert(decode_MethodHandle_stack_pushes(mh()) == 0, "DMH does not move stack");
-
-  // Done!
-  java_lang_invoke_MethodHandle::set_vmentry(mh(), me);
-}
-
-void MethodHandles::verify_BoundMethodHandle_with_receiver(Handle mh,
-                                                           methodHandle m,
-                                                           TRAPS) {
-  // Verify type.
-  KlassHandle bound_recv_type;
-  {
-    oop receiver = java_lang_invoke_BoundMethodHandle::argument(mh());
-    if (receiver != NULL)
-      bound_recv_type = KlassHandle(THREAD, receiver->klass());
-  }
-  Handle mtype(THREAD, java_lang_invoke_MethodHandle::type(mh()));
-  verify_method_type(m, mtype, true, bound_recv_type, CHECK);
-
-  int receiver_pos = m->size_of_parameters() - 1;
-
-  // Verify MH.vmargslot, which should point at the bound receiver.
-  verify_vmargslot(mh, -1, java_lang_invoke_BoundMethodHandle::vmargslot(mh()), CHECK);
-  //verify_vmslots(mh, CHECK);
-
-  // Verify vmslots.
-  if (java_lang_invoke_MethodHandle::vmslots(mh()) != receiver_pos) {
-    THROW_MSG(vmSymbols::java_lang_InternalError(), "bad vmslots in BMH (receiver)");
-  }
-}
-
-// Initialize a BMH with a receiver bound directly to a methodOop.
-void MethodHandles::init_BoundMethodHandle_with_receiver(Handle mh,
-                                                         methodHandle original_m,
-                                                         KlassHandle receiver_limit,
-                                                         int decode_flags,
-                                                         TRAPS) {
-  // Check arguments.
-  if (mh.is_null() || original_m.is_null()) {
-    THROW(vmSymbols::java_lang_InternalError());
-  }
-
-  KlassHandle receiver_klass;
-  {
-    oop receiver_oop = java_lang_invoke_BoundMethodHandle::argument(mh());
-    if (receiver_oop != NULL)
-      receiver_klass = KlassHandle(THREAD, receiver_oop->klass());
-  }
-  methodHandle m = dispatch_decoded_method(original_m,
-                                           receiver_limit, decode_flags,
-                                           receiver_klass,
-                                           CHECK);
-  if (m.is_null())      { THROW(vmSymbols::java_lang_InternalError()); }
-  if (m->is_abstract()) { THROW(vmSymbols::java_lang_AbstractMethodError()); }
-
-  int vmargslot = m->size_of_parameters() - 1;
-  assert(java_lang_invoke_BoundMethodHandle::vmargslot(mh()) == vmargslot, "");
-
-  if (VerifyMethodHandles) {
-    verify_BoundMethodHandle_with_receiver(mh, m, CHECK);
-  }
-
-  java_lang_invoke_BoundMethodHandle::set_vmtarget(mh(), m());
-
-  DEBUG_ONLY(KlassHandle junk1; int junk2);
-  assert(MethodHandles::decode_method(mh(), junk1, junk2) == m, "properly stored for later decoding");
-  assert(decode_MethodHandle_stack_pushes(mh()) == 1, "BMH pushes one stack slot");
-
-  // Done!
-  java_lang_invoke_MethodHandle::set_vmentry(mh(), MethodHandles::entry(MethodHandles::_bound_ref_direct_mh));
-}
-
-void MethodHandles::verify_BoundMethodHandle(Handle mh, Handle target, int argnum,
-                                             bool direct_to_method, TRAPS) {
-  ResourceMark rm;
-  Handle ptype_handle(THREAD,
-                           java_lang_invoke_MethodType::ptype(java_lang_invoke_MethodHandle::type(target()), argnum));
-  KlassHandle ptype_klass;
-  BasicType ptype = java_lang_Class::as_BasicType(ptype_handle(), &ptype_klass);
-  int slots_pushed = type2size[ptype];
-
-  oop argument = java_lang_invoke_BoundMethodHandle::argument(mh());
-
-  const char* err = NULL;
-
-  switch (ptype) {
-  case T_OBJECT:
-    if (argument != NULL)
-      // we must implicitly convert from the arg type to the outgoing ptype
-      err = check_argument_type_change(T_OBJECT, argument->klass(), ptype, ptype_klass(), argnum);
-    break;
-
-  case T_ARRAY: case T_VOID:
-    assert(false, "array, void do not appear here");
-  default:
-    if (ptype != T_INT && !is_subword_type(ptype)) {
-      err = "unexpected parameter type";
-      break;
-    }
-    // check subrange of Integer.value, if necessary
-    if (argument == NULL || argument->klass() != SystemDictionary::Integer_klass()) {
-      err = "bound integer argument must be of type java.lang.Integer";
-      break;
-    }
-    if (ptype != T_INT) {
-      int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
-      jint value = argument->int_field(value_offset);
-      int vminfo = adapter_unbox_subword_vminfo(ptype);
-      jint subword = truncate_subword_from_vminfo(value, vminfo);
-      if (value != subword) {
-        err = "bound subword value does not fit into the subword type";
-        break;
-      }
-    }
-    break;
-  case T_FLOAT:
-  case T_DOUBLE:
-  case T_LONG:
-    {
-      // we must implicitly convert from the unboxed arg type to the outgoing ptype
-      BasicType argbox = java_lang_boxing_object::basic_type(argument);
-      if (argbox != ptype) {
-        err = check_argument_type_change(T_OBJECT, (argument == NULL
-                                                    ? SystemDictionary::Object_klass()
-                                                    : argument->klass()),
-                                         ptype, ptype_klass(), argnum);
-        assert(err != NULL, "this must be an error");
-      }
-      break;
-    }
-  }
-
-  if (err == NULL) {
-    DEBUG_ONLY(int this_pushes = decode_MethodHandle_stack_pushes(mh()));
-    if (direct_to_method) {
-      assert(this_pushes == slots_pushed, "BMH pushes one or two stack slots");
-    } else {
-      int target_pushes = decode_MethodHandle_stack_pushes(target());
-      assert(this_pushes == slots_pushed + target_pushes, "BMH stack motion must be correct");
-    }
-  }
-
-  if (err == NULL) {
-    // Verify the rest of the method type.
-    err = check_method_type_insertion(java_lang_invoke_MethodHandle::type(mh()),
-                                      argnum, ptype_handle(),
-                                      java_lang_invoke_MethodHandle::type(target()));
-  }
-
-  if (err != NULL) {
-    THROW_MSG(vmSymbols::java_lang_InternalError(), err);
-  }
-}
-
-void MethodHandles::init_BoundMethodHandle(Handle mh, Handle target, int argnum, TRAPS) {
-  // Check arguments.
-  if (mh.is_null() || target.is_null() || !java_lang_invoke_MethodHandle::is_instance(target())) {
-    THROW(vmSymbols::java_lang_InternalError());
-  }
-
-  int argslot = java_lang_invoke_BoundMethodHandle::vmargslot(mh());
-
-  if (VerifyMethodHandles) {
-    int insert_after = argnum - 1;
-    verify_vmargslot(mh, insert_after, argslot, CHECK);
-    verify_vmslots(mh, CHECK);
-  }
-
-  // Get bound type and required slots.
-  BasicType ptype;
-  {
-    oop ptype_oop = java_lang_invoke_MethodType::ptype(java_lang_invoke_MethodHandle::type(target()), argnum);
-    ptype = java_lang_Class::as_BasicType(ptype_oop);
-  }
-  int slots_pushed = type2size[ptype];
-
-  // If (a) the target is a direct non-dispatched method handle,
-  // or (b) the target is a dispatched direct method handle and we
-  // are binding the receiver, cut out the middle-man.
-  // Do this by decoding the DMH and using its methodOop directly as vmtarget.
-  bool direct_to_method = false;
-  if (OptimizeMethodHandles &&
-      target->klass() == SystemDictionary::DirectMethodHandle_klass() &&
-      (argnum != 0 || java_lang_invoke_BoundMethodHandle::argument(mh()) != NULL) &&
-      (argnum == 0 || java_lang_invoke_DirectMethodHandle::vmindex(target()) < 0)) {
-    KlassHandle receiver_limit; int decode_flags = 0;
-    methodHandle m = decode_method(target(), receiver_limit, decode_flags);
-    if (m.is_null()) { THROW_MSG(vmSymbols::java_lang_InternalError(), "DMH failed to decode"); }
-    DEBUG_ONLY(int m_vmslots = m->size_of_parameters() - slots_pushed); // pos. of 1st arg.
-    assert(java_lang_invoke_BoundMethodHandle::vmslots(mh()) == m_vmslots, "type w/ m sig");
-    if (argnum == 0 && (decode_flags & _dmf_has_receiver) != 0) {
-      init_BoundMethodHandle_with_receiver(mh, m,
-                                           receiver_limit, decode_flags,
-                                           CHECK);
-      return;
-    }
-
-    // Even if it is not a bound receiver, we still might be able
-    // to bind another argument and still invoke the methodOop directly.
-    if (!(decode_flags & _dmf_does_dispatch)) {
-      direct_to_method = true;
-      java_lang_invoke_BoundMethodHandle::set_vmtarget(mh(), m());
-    }
-  }
-  if (!direct_to_method)
-    java_lang_invoke_BoundMethodHandle::set_vmtarget(mh(), target());
-
-  if (VerifyMethodHandles) {
-    verify_BoundMethodHandle(mh, target, argnum, direct_to_method, CHECK);
-  }
-
-  // Next question:  Is this a ref, int, or long bound value?
-  MethodHandleEntry* me = NULL;
-  if (ptype == T_OBJECT) {
-    if (direct_to_method)  me = MethodHandles::entry(_bound_ref_direct_mh);
-    else                   me = MethodHandles::entry(_bound_ref_mh);
-  } else if (slots_pushed == 2) {
-    if (direct_to_method)  me = MethodHandles::entry(_bound_long_direct_mh);
-    else                   me = MethodHandles::entry(_bound_long_mh);
-  } else if (slots_pushed == 1) {
-    if (direct_to_method)  me = MethodHandles::entry(_bound_int_direct_mh);
-    else                   me = MethodHandles::entry(_bound_int_mh);
-  } else {
-    assert(false, "");
-  }
-
-  // Done!
-  java_lang_invoke_MethodHandle::set_vmentry(mh(), me);
-}
-
-static void throw_InternalError_for_bad_conversion(int conversion, const char* err, TRAPS) {
-  char msg[200];
-  jio_snprintf(msg, sizeof(msg), "bad adapter (conversion=0x%08x): %s", conversion, err);
-  THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), msg);
-}
-
-void MethodHandles::verify_AdapterMethodHandle(Handle mh, int argnum, TRAPS) {
-  ResourceMark rm;
-  jint conversion = java_lang_invoke_AdapterMethodHandle::conversion(mh());
-  int  argslot    = java_lang_invoke_AdapterMethodHandle::vmargslot(mh());
-
-  verify_vmargslot(mh, argnum, argslot, CHECK);
-  verify_vmslots(mh, CHECK);
-
-  jint conv_op    = adapter_conversion_op(conversion);
-  if (!conv_op_valid(conv_op)) {
-    throw_InternalError_for_bad_conversion(conversion, "unknown conversion op", THREAD);
-    return;
-  }
-  EntryKind ek = adapter_entry_kind(conv_op);
-
-  int stack_move = adapter_conversion_stack_move(conversion);
-  BasicType src  = adapter_conversion_src_type(conversion);
-  BasicType dest = adapter_conversion_dest_type(conversion);
-  int vminfo     = adapter_conversion_vminfo(conversion); // should be zero
-
-  Handle argument(THREAD,  java_lang_invoke_AdapterMethodHandle::argument(mh()));
-  Handle target(THREAD,    java_lang_invoke_AdapterMethodHandle::vmtarget(mh()));
-  Handle src_mtype(THREAD, java_lang_invoke_MethodHandle::type(mh()));
-  Handle dst_mtype(THREAD, java_lang_invoke_MethodHandle::type(target()));
-  Handle arg_mtype;
-
-  const char* err = NULL;
-
-  if (err == NULL) {
-    // Check that the correct argument is supplied, but only if it is required.
-    switch (ek) {
-    case _adapter_check_cast:     // target type of cast
-    case _adapter_ref_to_prim:    // wrapper type from which to unbox
-    case _adapter_spread_args:    // array type to spread from
-      if (!java_lang_Class::is_instance(argument())
-          || java_lang_Class::is_primitive(argument()))
-        { err = "adapter requires argument of type java.lang.Class"; break; }
-      if (ek == _adapter_spread_args) {
-        // Make sure it is a suitable collection type.  (Array, for now.)
-        Klass* ak = Klass::cast(java_lang_Class::as_klassOop(argument()));
-        if (!ak->oop_is_array())
-          { err = "spread adapter requires argument representing an array class"; break; }
-        BasicType et = arrayKlass::cast(ak->as_klassOop())->element_type();
-        if (et != dest && stack_move <= 0)
-          { err = "spread adapter requires array class argument of correct type"; break; }
-      }
-      break;
-    case _adapter_prim_to_ref:    // boxer MH to use
-    case _adapter_collect_args:   // method handle which collects the args
-    case _adapter_fold_args:      // method handle which collects the args
-      if (!java_lang_invoke_MethodHandle::is_instance(argument()))
-        { err = "MethodHandle adapter argument required"; break; }
-      arg_mtype = Handle(THREAD, java_lang_invoke_MethodHandle::type(argument()));
-      break;
-    default:
-      if (argument.not_null())
-        { err = "adapter has spurious argument"; break; }
-      break;
-    }
-  }
-
-  if (err == NULL) {
-    // Check that the src/dest types are supplied if needed.
-    // Also check relevant parameter or return types.
-    switch (ek) {
-    case _adapter_check_cast:
-      if (src != T_OBJECT || dest != T_OBJECT) {
-        err = "adapter requires object src/dest conversion subfields";
-      }
-      break;
-    case _adapter_prim_to_prim:
-      if (!is_java_primitive(src) || !is_java_primitive(dest) || src == dest) {
-        err = "adapter requires primitive src/dest conversion subfields"; break;
-      }
-      if ( (src == T_FLOAT || src == T_DOUBLE) && !(dest == T_FLOAT || dest == T_DOUBLE) ||
-          !(src == T_FLOAT || src == T_DOUBLE) &&  (dest == T_FLOAT || dest == T_DOUBLE)) {
-        err = "adapter cannot convert beween floating and fixed-point"; break;
-      }
-      break;
-    case _adapter_ref_to_prim:
-      if (src != T_OBJECT || !is_java_primitive(dest)
-          || argument() != Klass::cast(SystemDictionary::box_klass(dest))->java_mirror()) {
-        err = "adapter requires primitive dest conversion subfield"; break;
-      }
-      break;
-    case _adapter_prim_to_ref:
-      if (!is_java_primitive(src) || dest != T_OBJECT) {
-        err = "adapter requires primitive src conversion subfield"; break;
-      }
-      break;
-    case _adapter_swap_args:
-      {
-        if (!src || !dest) {
-          err = "adapter requires src/dest conversion subfields for swap"; break;
-        }
-        int src_size  = type2size[src];
-        if (src_size != type2size[dest]) {
-          err = "adapter requires equal sizes for src/dest"; break;
-        }
-        int src_slot   = argslot;
-        int dest_slot  = vminfo;
-        int src_arg    = argnum;
-        int dest_arg   = argument_slot_to_argnum(src_mtype(), dest_slot);
-        verify_vmargslot(mh, dest_arg, dest_slot, CHECK);
-        if (!(dest_slot >= src_slot + src_size) &&
-            !(src_slot >= dest_slot + src_size)) {
-          err = "source, destination slots must be distinct"; break;
-        } else if (!(src_slot > dest_slot)) {
-          err = "source of swap must be deeper in stack"; break;
-        }
-        err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), dest_arg),
-                                         java_lang_invoke_MethodType::ptype(dst_mtype(), src_arg),
-                                         dest_arg);
-        if (err == NULL)
-          err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), src_arg),
-                                           java_lang_invoke_MethodType::ptype(dst_mtype(), dest_arg),
-                                           src_arg);
-        break;
-      }
-    case _adapter_rot_args:
-      {
-        if (!src || !dest) {
-          err = "adapter requires src/dest conversion subfields for rotate"; break;
-        }
-        int src_slot   = argslot;
-        int limit_raw  = vminfo;
-        bool rot_down  = (src_slot < limit_raw);
-        int limit_bias = (rot_down ? MethodHandles::OP_ROT_ARGS_DOWN_LIMIT_BIAS : 0);
-        int limit_slot = limit_raw - limit_bias;
-        int src_arg    = argnum;
-        int limit_arg  = argument_slot_to_argnum(src_mtype(), limit_slot);
-        verify_vmargslot(mh, limit_arg, limit_slot, CHECK);
-        if (src_slot == limit_slot) {
-          err = "source, destination slots must be distinct"; break;
-        }
-        if (!rot_down) {  // rotate slots up == shift arguments left
-          // limit_slot is an inclusive lower limit
-          assert((src_slot > limit_slot) && (src_arg < limit_arg), "");
-          // rotate up: [limit_slot..src_slot-ss] --> [limit_slot+ss..src_slot]
-          // that is:   [src_arg+1..limit_arg] --> [src_arg..limit_arg-1]
-          for (int i = src_arg+1; i <= limit_arg && err == NULL; i++) {
-            err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), i),
-                                             java_lang_invoke_MethodType::ptype(dst_mtype(), i-1),
-                                             i);
-          }
-        } else { // rotate slots down == shfit arguments right
-          // limit_slot is an exclusive upper limit
-          assert((src_slot < limit_slot - limit_bias) && (src_arg > limit_arg + limit_bias), "");
-          // rotate down: [src_slot+ss..limit_slot) --> [src_slot..limit_slot-ss)
-          // that is:     (limit_arg..src_arg-1] --> (dst_arg+1..src_arg]
-          for (int i = limit_arg+1; i <= src_arg-1 && err == NULL; i++) {
-            err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), i),
-                                             java_lang_invoke_MethodType::ptype(dst_mtype(), i+1),
-                                             i);
-          }
-        }
-        if (err == NULL) {
-          int dest_arg = (rot_down ? limit_arg+1 : limit_arg);
-          err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), src_arg),
-                                           java_lang_invoke_MethodType::ptype(dst_mtype(), dest_arg),
-                                           src_arg);
-        }
-      }
-      break;
-    case _adapter_spread_args:
-    case _adapter_collect_args:
-    case _adapter_fold_args:
-      {
-        bool is_spread = (ek == _adapter_spread_args);
-        bool is_fold   = (ek == _adapter_fold_args);
-        BasicType coll_type = is_spread ? src : dest;
-        BasicType elem_type = is_spread ? dest : src;
-        // coll_type is type of args in collected form (or T_VOID if none)
-        // elem_type is common type of args in spread form (or T_VOID if missing or heterogeneous)
-        if (coll_type == 0 || elem_type == 0) {
-          err = "adapter requires src/dest subfields for spread or collect"; break;
-        }
-        if (is_spread && coll_type != T_OBJECT) {
-          err = "spread adapter requires object type for argument bundle"; break;
-        }
-        Handle spread_mtype = (is_spread ? dst_mtype : src_mtype);
-        int spread_slot = argslot;
-        int spread_arg  = argnum;
-        int slots_pushed = stack_move / stack_move_unit();
-        int coll_slot_count = type2size[coll_type];
-        int spread_slot_count = (is_spread ? slots_pushed : -slots_pushed) + coll_slot_count;
-        if (is_fold)  spread_slot_count = argument_slot_count(arg_mtype());
-        if (!is_spread) {
-          int init_slots = argument_slot_count(src_mtype());
-          int coll_slots = argument_slot_count(arg_mtype());
-          if (spread_slot_count > init_slots ||
-              spread_slot_count != coll_slots) {
-            err = "collect adapter has inconsistent arg counts"; break;
-          }
-          int next_slots = argument_slot_count(dst_mtype());
-          int unchanged_slots_in  = (init_slots - spread_slot_count);
-          int unchanged_slots_out = (next_slots - coll_slot_count - (is_fold ? spread_slot_count : 0));
-          if (unchanged_slots_in != unchanged_slots_out) {
-            err = "collect adapter continuation has inconsistent arg counts"; break;
-          }
-        }
-      }
-      break;
-    default:
-      if (src != 0 || dest != 0) {
-        err = "adapter has spurious src/dest conversion subfields"; break;
-      }
-      break;
-    }
-  }
-
-  if (err == NULL) {
-    // Check the stack_move subfield.
-    // It must always report the net change in stack size, positive or negative.
-    int slots_pushed = stack_move / stack_move_unit();
-    switch (ek) {
-    case _adapter_prim_to_prim:
-    case _adapter_ref_to_prim:
-    case _adapter_prim_to_ref:
-      if (slots_pushed != type2size[dest] - type2size[src]) {
-        err = "wrong stack motion for primitive conversion";
-      }
-      break;
-    case _adapter_dup_args:
-      if (slots_pushed <= 0) {
-        err = "adapter requires conversion subfield slots_pushed > 0";
-      }
-      break;
-    case _adapter_drop_args:
-      if (slots_pushed >= 0) {
-        err = "adapter requires conversion subfield slots_pushed < 0";
-      }
-      break;
-    case _adapter_collect_args:
-    case _adapter_fold_args:
-      if (slots_pushed > 2) {
-        err = "adapter requires conversion subfield slots_pushed <= 2";
-      }
-      break;
-    case _adapter_spread_args:
-      if (slots_pushed < -1) {
-        err = "adapter requires conversion subfield slots_pushed >= -1";
-      }
-      break;
-    default:
-      if (stack_move != 0) {
-        err = "adapter has spurious stack_move conversion subfield";
-      }
-      break;
-    }
-    if (err == NULL && stack_move != slots_pushed * stack_move_unit()) {
-      err = "stack_move conversion subfield must be multiple of stack_move_unit";
-    }
-  }
-
-  if (err == NULL) {
-    // Make sure this adapter's stack pushing is accurately recorded.
-    int slots_pushed = stack_move / stack_move_unit();
-    int this_vmslots = java_lang_invoke_MethodHandle::vmslots(mh());
-    int target_vmslots = java_lang_invoke_MethodHandle::vmslots(target());
-    int target_pushes = decode_MethodHandle_stack_pushes(target());
-    if (slots_pushed != (target_vmslots - this_vmslots)) {
-      err = "stack_move inconsistent with previous and current MethodType vmslots";
-    } else {
-      int this_pushes = decode_MethodHandle_stack_pushes(mh());
-      if (slots_pushed + target_pushes != this_pushes) {
-        if (this_pushes == 0)
-          err = "adapter push count not initialized";
-        else
-          err = "adapter push count is wrong";
-      }
-    }
-
-    // While we're at it, check that the stack motion decoder works:
-    DEBUG_ONLY(int this_pushes = decode_MethodHandle_stack_pushes(mh()));
-    assert(this_pushes == slots_pushed + target_pushes, "AMH stack motion must be correct");
-  }
-
-  if (err == NULL && vminfo != 0) {
-    switch (ek) {
-    case _adapter_swap_args:
-    case _adapter_rot_args:
-    case _adapter_prim_to_ref:
-    case _adapter_collect_args:
-    case _adapter_fold_args:
-      break;                // OK
-    default:
-      err = "vminfo subfield is reserved to the JVM";
-    }
-  }
-
-  // Do additional ad hoc checks.
-  if (err == NULL) {
-    switch (ek) {
-    case _adapter_retype_only:
-      err = check_method_type_passthrough(src_mtype(), dst_mtype(), false);
-      break;
-
-    case _adapter_retype_raw:
-      err = check_method_type_passthrough(src_mtype(), dst_mtype(), true);
-      break;
-
-    case _adapter_check_cast:
-      {
-        // The actual value being checked must be a reference:
-        err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), argnum),
-                                         object_java_mirror(), argnum);
-        if (err != NULL)  break;
-
-        // The output of the cast must fit with the destination argument:
-        Handle cast_class = argument;
-        err = check_method_type_conversion(src_mtype(),
-                                           argnum, cast_class(),
-                                           dst_mtype());
-      }
-      break;
-
-      // %%% TO DO: continue in remaining cases to verify src/dst_mtype if VerifyMethodHandles
-    }
-  }
-
-  if (err != NULL) {
-    throw_InternalError_for_bad_conversion(conversion, err, THREAD);
-    return;
-  }
-
-}
-
-void MethodHandles::init_AdapterMethodHandle(Handle mh, Handle target, int argnum, TRAPS) {
-  Handle argument   = java_lang_invoke_AdapterMethodHandle::argument(mh());
-  int    argslot    = java_lang_invoke_AdapterMethodHandle::vmargslot(mh());
-  jint   conversion = java_lang_invoke_AdapterMethodHandle::conversion(mh());
-  jint   conv_op    = adapter_conversion_op(conversion);
-
-  // adjust the adapter code to the internal EntryKind enumeration:
-  EntryKind ek_orig = adapter_entry_kind(conv_op);
-  EntryKind ek_opt  = ek_orig;  // may be optimized
-  EntryKind ek_try;             // temp
-
-  // Finalize the vmtarget field (Java initialized it to null).
-  if (!java_lang_invoke_MethodHandle::is_instance(target())) {
-    throw_InternalError_for_bad_conversion(conversion, "bad target", THREAD);
-    return;
-  }
-  java_lang_invoke_AdapterMethodHandle::set_vmtarget(mh(), target());
-
-  int stack_move = adapter_conversion_stack_move(conversion);
-  BasicType src  = adapter_conversion_src_type(conversion);
-  BasicType dest = adapter_conversion_dest_type(conversion);
-  int vminfo     = adapter_conversion_vminfo(conversion); // should be zero
-
-  int slots_pushed = stack_move / stack_move_unit();
-
-  if (VerifyMethodHandles) {
-    verify_AdapterMethodHandle(mh, argnum, CHECK);
-  }
-
-  const char* err = NULL;
-
-  if (!conv_op_supported(conv_op)) {
-    err = "adapter not yet implemented in the JVM";
-  }
-
-  // Now it's time to finish the case analysis and pick a MethodHandleEntry.
-  switch (ek_orig) {
-  case _adapter_retype_only:
-  case _adapter_retype_raw:
-  case _adapter_check_cast:
-  case _adapter_dup_args:
-  case _adapter_drop_args:
-    // these work fine via general case code
-    break;
-
-  case _adapter_prim_to_prim:
-    {
-      // Non-subword cases are {int,float,long,double} -> {int,float,long,double}.
-      // And, the {float,double} -> {int,long} cases must be handled by Java.
-      switch (type2size[src] *4+ type2size[dest]) {
-      case 1 *4+ 1:
-        assert(src == T_INT || is_subword_type(src), "source is not float");
-        // Subword-related cases are int -> {boolean,byte,char,short}.
-        ek_opt = _adapter_opt_i2i;
-        vminfo = adapter_prim_to_prim_subword_vminfo(dest);
-        break;
-      case 2 *4+ 1:
-        if (src == T_LONG && (dest == T_INT || is_subword_type(dest))) {
-          ek_opt = _adapter_opt_l2i;
-          vminfo = adapter_prim_to_prim_subword_vminfo(dest);
-        } else if (src == T_DOUBLE && dest == T_FLOAT) {
-          ek_opt = _adapter_opt_d2f;
-        } else {
-          goto throw_not_impl;        // runs user code, hence could block
-        }
-        break;
-      case 1 *4+ 2:
-        if ((src == T_INT || is_subword_type(src)) && dest == T_LONG) {
-          ek_opt = _adapter_opt_i2l;
-        } else if (src == T_FLOAT && dest == T_DOUBLE) {
-          ek_opt = _adapter_opt_f2d;
-        } else {
-          goto throw_not_impl;        // runs user code, hence could block
-        }
-        break;
-      default:
-        goto throw_not_impl;        // runs user code, hence could block
-        break;
-      }
-    }
-    break;
-
-  case _adapter_ref_to_prim:
-    {
-      switch (type2size[dest]) {
-      case 1:
-        ek_opt = _adapter_opt_unboxi;
-        vminfo = adapter_unbox_subword_vminfo(dest);
-        break;
-      case 2:
-        ek_opt = _adapter_opt_unboxl;
-        break;
-      default:
-        goto throw_not_impl;
-        break;
-      }
-    }
-    break;
-
-  case _adapter_prim_to_ref:
-    {
-      // vminfo will be the location to insert the return value
-      vminfo = argslot;
-      ek_opt = _adapter_opt_collect_ref;
-      ensure_vmlayout_field(target, CHECK);
-      // for MethodHandleWalk:
-      if (java_lang_invoke_AdapterMethodHandle::is_instance(argument()))
-        ensure_vmlayout_field(argument, CHECK);
-      if (!OptimizeMethodHandles)  break;
-      switch (type2size[src]) {
-      case 1:
-        ek_try = EntryKind(_adapter_opt_filter_S0_ref + argslot);
-        if (ek_try < _adapter_opt_collect_LAST &&
-            ek_adapter_opt_collect_slot(ek_try) == argslot) {
-          assert(ek_adapter_opt_collect_count(ek_try) == 1 &&
-                 ek_adapter_opt_collect_type(ek_try) == T_OBJECT, "");
-          ek_opt = ek_try;
-          break;
-        }
-        // else downgrade to variable slot:
-        ek_opt = _adapter_opt_collect_1_ref;
-        break;
-      case 2:
-        ek_try = EntryKind(_adapter_opt_collect_2_S0_ref + argslot);
-        if (ek_try < _adapter_opt_collect_LAST &&
-            ek_adapter_opt_collect_slot(ek_try) == argslot) {
-          assert(ek_adapter_opt_collect_count(ek_try) == 2 &&
-                 ek_adapter_opt_collect_type(ek_try) == T_OBJECT, "");
-          ek_opt = ek_try;
-          break;
-        }
-        // else downgrade to variable slot:
-        ek_opt = _adapter_opt_collect_2_ref;
-        break;
-      default:
-        goto throw_not_impl;
-        break;
-      }
-    }
-    break;
-
-  case _adapter_swap_args:
-  case _adapter_rot_args:
-    {
-      int swap_slots = type2size[src];
-      int src_slot   = argslot;
-      int dest_slot  = vminfo;
-      int rotate     = (ek_orig == _adapter_swap_args) ? 0 : (src_slot > dest_slot) ? 1 : -1;
-      switch (swap_slots) {
-      case 1:
-        ek_opt = (!rotate    ? _adapter_opt_swap_1 :
-                  rotate > 0 ? _adapter_opt_rot_1_up : _adapter_opt_rot_1_down);
-        break;
-      case 2:
-        ek_opt = (!rotate    ? _adapter_opt_swap_2 :
-                  rotate > 0 ? _adapter_opt_rot_2_up : _adapter_opt_rot_2_down);
-        break;
-      default:
-        goto throw_not_impl;
-        break;
-      }
-    }
-    break;
-
-  case _adapter_spread_args:
-    {
-      // vminfo will be the required length of the array
-      int array_size = (slots_pushed + 1) / (type2size[dest] == 2 ? 2 : 1);
-      vminfo = array_size;
-      // general case
-      switch (dest) {
-      case T_BOOLEAN : // fall through to T_BYTE:
-      case T_BYTE    : ek_opt = _adapter_opt_spread_byte;    break;
-      case T_CHAR    : ek_opt = _adapter_opt_spread_char;    break;
-      case T_SHORT   : ek_opt = _adapter_opt_spread_short;   break;
-      case T_INT     : ek_opt = _adapter_opt_spread_int;     break;
-      case T_LONG    : ek_opt = _adapter_opt_spread_long;    break;
-      case T_FLOAT   : ek_opt = _adapter_opt_spread_float;   break;
-      case T_DOUBLE  : ek_opt = _adapter_opt_spread_double;  break;
-      case T_OBJECT  : ek_opt = _adapter_opt_spread_ref;     break;
-      case T_VOID    : if (array_size != 0)  goto throw_not_impl;
-                       ek_opt = _adapter_opt_spread_ref;     break;
-      default        : goto throw_not_impl;
-      }
-      assert(array_size == 0 ||  // it doesn't matter what the spreader is
-             (ek_adapter_opt_spread_count(ek_opt) == -1 &&
-              (ek_adapter_opt_spread_type(ek_opt) == dest ||
-               (ek_adapter_opt_spread_type(ek_opt) == T_BYTE && dest == T_BOOLEAN))),
-             err_msg("dest=%d ek_opt=%d", dest, ek_opt));
-
-      if (array_size <= 0) {
-        // since the general case does not handle length 0, this case is required:
-        ek_opt = _adapter_opt_spread_0;
-        break;
-      }
-      if (dest == T_OBJECT) {
-        ek_try = EntryKind(_adapter_opt_spread_1_ref - 1 + array_size);
-        if (ek_try < _adapter_opt_spread_LAST &&
-            ek_adapter_opt_spread_count(ek_try) == array_size) {
-          assert(ek_adapter_opt_spread_type(ek_try) == dest, "");
-          ek_opt = ek_try;
-          break;
-        }
-      }
-      break;
-    }
-    break;
-
-  case _adapter_collect_args:
-    {
-      int elem_slots = argument_slot_count(java_lang_invoke_MethodHandle::type(argument()));
-      // vminfo will be the location to insert the return value
-      vminfo = argslot;
-      ensure_vmlayout_field(target, CHECK);
-      ensure_vmlayout_field(argument, CHECK);
-
-      // general case:
-      switch (dest) {
-      default       : if (!is_subword_type(dest))  goto throw_not_impl;
-                    // else fall through:
-      case T_INT    : ek_opt = _adapter_opt_collect_int;     break;
-      case T_LONG   : ek_opt = _adapter_opt_collect_long;    break;
-      case T_FLOAT  : ek_opt = _adapter_opt_collect_float;   break;
-      case T_DOUBLE : ek_opt = _adapter_opt_collect_double;  break;
-      case T_OBJECT : ek_opt = _adapter_opt_collect_ref;     break;
-      case T_VOID   : ek_opt = _adapter_opt_collect_void;    break;
-      }
-      assert(ek_adapter_opt_collect_slot(ek_opt) == -1 &&
-             ek_adapter_opt_collect_count(ek_opt) == -1 &&
-             (ek_adapter_opt_collect_type(ek_opt) == dest ||
-              ek_adapter_opt_collect_type(ek_opt) == T_INT && is_subword_type(dest)),
-             "");
-
-      if (dest == T_OBJECT && elem_slots == 1 && OptimizeMethodHandles) {
-        // filter operation on a ref
-        ek_try = EntryKind(_adapter_opt_filter_S0_ref + argslot);
-        if (ek_try < _adapter_opt_collect_LAST &&
-            ek_adapter_opt_collect_slot(ek_try) == argslot) {
-          assert(ek_adapter_opt_collect_count(ek_try) == elem_slots &&
-                 ek_adapter_opt_collect_type(ek_try) == dest, "");
-          ek_opt = ek_try;
-          break;
-        }
-        ek_opt = _adapter_opt_collect_1_ref;
-        break;
-      }
-
-      if (dest == T_OBJECT && elem_slots == 2 && OptimizeMethodHandles) {
-        // filter of two arguments
-        ek_try = EntryKind(_adapter_opt_collect_2_S0_ref + argslot);
-        if (ek_try < _adapter_opt_collect_LAST &&
-            ek_adapter_opt_collect_slot(ek_try) == argslot) {
-          assert(ek_adapter_opt_collect_count(ek_try) == elem_slots &&
-                 ek_adapter_opt_collect_type(ek_try) == dest, "");
-          ek_opt = ek_try;
-          break;
-        }
-        ek_opt = _adapter_opt_collect_2_ref;
-        break;
-      }
-
-      if (dest == T_OBJECT && OptimizeMethodHandles) {
-        // try to use a fixed length adapter
-        ek_try = EntryKind(_adapter_opt_collect_0_ref + elem_slots);
-        if (ek_try < _adapter_opt_collect_LAST &&
-            ek_adapter_opt_collect_count(ek_try) == elem_slots) {
-          assert(ek_adapter_opt_collect_slot(ek_try) == -1 &&
-                 ek_adapter_opt_collect_type(ek_try) == dest, "");
-          ek_opt = ek_try;
-          break;
-        }
-      }
-
-      break;
-    }
-
-  case _adapter_fold_args:
-    {
-      int elem_slots = argument_slot_count(java_lang_invoke_MethodHandle::type(argument()));
-      // vminfo will be the location to insert the return value
-      vminfo = argslot + elem_slots;
-      ensure_vmlayout_field(target, CHECK);
-      ensure_vmlayout_field(argument, CHECK);
-
-      switch (dest) {
-      default       : if (!is_subword_type(dest))  goto throw_not_impl;
-                    // else fall through:
-      case T_INT    : ek_opt = _adapter_opt_fold_int;     break;
-      case T_LONG   : ek_opt = _adapter_opt_fold_long;    break;
-      case T_FLOAT  : ek_opt = _adapter_opt_fold_float;   break;
-      case T_DOUBLE : ek_opt = _adapter_opt_fold_double;  break;
-      case T_OBJECT : ek_opt = _adapter_opt_fold_ref;     break;
-      case T_VOID   : ek_opt = _adapter_opt_fold_void;    break;
-      }
-      assert(ek_adapter_opt_collect_slot(ek_opt) == -1 &&
-             ek_adapter_opt_collect_count(ek_opt) == -1 &&
-             (ek_adapter_opt_collect_type(ek_opt) == dest ||
-              ek_adapter_opt_collect_type(ek_opt) == T_INT && is_subword_type(dest)),
-             "");
-
-      if (dest == T_OBJECT && elem_slots == 0 && OptimizeMethodHandles) {
-        // if there are no args, just pretend it's a collect
-        ek_opt = _adapter_opt_collect_0_ref;
-        break;
-      }
-
-      if (dest == T_OBJECT && OptimizeMethodHandles) {
-        // try to use a fixed length adapter
-        ek_try = EntryKind(_adapter_opt_fold_1_ref - 1 + elem_slots);
-        if (ek_try < _adapter_opt_fold_LAST &&
-            ek_adapter_opt_collect_count(ek_try) == elem_slots) {
-          assert(ek_adapter_opt_collect_slot(ek_try) == -1 &&
-                 ek_adapter_opt_collect_type(ek_try) == dest, "");
-          ek_opt = ek_try;
-          break;
-        }
-      }
-
-      break;
-    }
-
-  default:
-    // should have failed much earlier; must be a missing case here
-    assert(false, "incomplete switch");
-    // and fall through:
-
-  throw_not_impl:
-    if (err == NULL)
-      err = "unknown adapter type";
-    break;
-  }
-
-  if (err == NULL && (vminfo & CONV_VMINFO_MASK) != vminfo) {
-    // should not happen, since vminfo is used to encode arg/slot indexes < 255
-    err = "vminfo overflow";
-  }
-
-  if (err == NULL && !have_entry(ek_opt)) {
-    err = "adapter stub for this kind of method handle is missing";
-  }
-
-  if (err == NULL && ek_opt == ek_orig) {
-    switch (ek_opt) {
-    case _adapter_prim_to_prim:
-    case _adapter_ref_to_prim:
-    case _adapter_prim_to_ref:
-    case _adapter_swap_args:
-    case _adapter_rot_args:
-    case _adapter_collect_args:
-    case _adapter_fold_args:
-    case _adapter_spread_args:
-      // should be handled completely by optimized cases; see above
-      err = "init_AdapterMethodHandle should not issue this";
-      break;
-    }
-  }
-
-  if (err != NULL) {
-    throw_InternalError_for_bad_conversion(conversion, err_msg("%s: conv_op %d ek_opt %d", err, conv_op, ek_opt), THREAD);
-    return;
-  }
-
-  // Rebuild the conversion value; maybe parts of it were changed.
-  jint new_conversion = adapter_conversion(conv_op, src, dest, stack_move, vminfo);
-
-  // Finalize the conversion field.  (Note that it is final to Java code.)
-  java_lang_invoke_AdapterMethodHandle::set_conversion(mh(), new_conversion);
-
-  if (java_lang_invoke_CountingMethodHandle::is_instance(mh())) {
-    assert(ek_orig == _adapter_retype_only, "only one handled");
-    ek_opt = _adapter_opt_profiling;
-  }
-
-  // Done!
-  java_lang_invoke_MethodHandle::set_vmentry(mh(), entry(ek_opt));
-
-  // There should be enough memory barriers on exit from native methods
-  // to ensure that the MH is fully initialized to all threads before
-  // Java code can publish it in global data structures.
-}
-
-void MethodHandles::ensure_vmlayout_field(Handle target, TRAPS) {
-  Handle mtype(THREAD, java_lang_invoke_MethodHandle::type(target()));
-  Handle mtform(THREAD, java_lang_invoke_MethodType::form(mtype()));
-  if (mtform.is_null()) { THROW(vmSymbols::java_lang_InternalError()); }
-  if (java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() > 0) {
-    if (java_lang_invoke_MethodTypeForm::vmlayout(mtform()) == NULL) {
-      // fill it in
-      Handle erased_mtype(THREAD, java_lang_invoke_MethodTypeForm::erasedType(mtform()));
-      TempNewSymbol erased_signature
-        = java_lang_invoke_MethodType::as_signature(erased_mtype(), /*intern:*/true, CHECK);
-      methodOop cookie
-        = SystemDictionary::find_method_handle_invoke(vmSymbols::invokeExact_name(),
-                                                      erased_signature,
-                                                      SystemDictionaryHandles::Object_klass(),
-                                                      THREAD);
-      java_lang_invoke_MethodTypeForm::init_vmlayout(mtform(), cookie);
-    }
-  }
-  assert(java_lang_invoke_MethodTypeForm::vmslots(mtform()) == argument_slot_count(mtype()), "must agree");
-}
-
-#ifdef ASSERT
-
-extern "C"
-void print_method_handle(oop mh);
-
-static void stress_method_handle_walk_impl(Handle mh, TRAPS) {
-  if (StressMethodHandleWalk) {
-    // Exercise the MethodHandleWalk code in various ways and validate
-    // the resulting method oop.  Some of these produce output so they
-    // are guarded under Verbose.
-    ResourceMark rm;
-    HandleMark hm;
-    if (Verbose) {
-      print_method_handle(mh());
-    }
-    TempNewSymbol name = SymbolTable::new_symbol("invoke", CHECK);
-    Handle mt = java_lang_invoke_MethodHandle::type(mh());
-    TempNewSymbol signature = java_lang_invoke_MethodType::as_signature(mt(), true, CHECK);
-    MethodHandleCompiler mhc(mh, name, signature, 10000, false, CHECK);
-    methodHandle m = mhc.compile(CHECK);
-    if (Verbose) {
-      m->print_codes();
-    }
-    InterpreterOopMap mask;
-    OopMapCache::compute_one_oop_map(m, m->code_size() - 1, &mask);
-    // compile to object code if -Xcomp or WizardMode
-    if ((WizardMode ||
-         CompilationPolicy::must_be_compiled(m))
-        && !instanceKlass::cast(m->method_holder())->is_not_initialized()
-        && CompilationPolicy::can_be_compiled(m)) {
-      // Force compilation
-      CompileBroker::compile_method(m, InvocationEntryBci,
-                                    CompilationPolicy::policy()->initial_compile_level(),
-                                    methodHandle(), 0, "StressMethodHandleWalk",
-                                    CHECK);
-    }
-  }
-}
-
-static void stress_method_handle_walk(Handle mh, TRAPS) {
-  stress_method_handle_walk_impl(mh, THREAD);
-  if (HAS_PENDING_EXCEPTION) {
-    oop ex = PENDING_EXCEPTION;
-    CLEAR_PENDING_EXCEPTION;
-    tty->print("StressMethodHandleWalk: ");
-    java_lang_Throwable::print(ex, tty);
-    tty->cr();
-  }
-}
-#else
-
-static void stress_method_handle_walk(Handle mh, TRAPS) {}
-
-#endif
-
 //
-// Here are the native methods on sun.invoke.MethodHandleImpl.
+// Here are the native methods in java.lang.invoke.MethodHandleNatives
 // They are the private interface between this JVM and the HotSpot-specific
 // Java code that implements JSR 292 method handles.
 //
 // Note:  We use a JVM_ENTRY macro to define each of these, for this is the way
 // that intrinsic (non-JNI) native methods are defined in HotSpot.
 //
-
-// direct method handles for invokestatic or invokespecial
-// void init(DirectMethodHandle self, MemberName ref, boolean doDispatch, Class<?> caller);
-JVM_ENTRY(void, MHN_init_DMH(JNIEnv *env, jobject igcls, jobject mh_jh,
-                             jobject target_jh, jboolean do_dispatch, jobject caller_jh)) {
-  ResourceMark rm;              // for error messages
-
-  // This is the guy we are initializing:
-  if (mh_jh == NULL) { THROW_MSG(vmSymbols::java_lang_InternalError(), "self is null"); }
-  Handle mh(THREAD, JNIHandles::resolve_non_null(mh_jh));
-
-  // Early returns out of this method leave the DMH in an unfinished state.
-  assert(java_lang_invoke_MethodHandle::vmentry(mh()) == NULL, "must be safely null");
-
-  // which method are we really talking about?
-  if (target_jh == NULL) { THROW_MSG(vmSymbols::java_lang_InternalError(), "target is null"); }
-  Handle target(THREAD, JNIHandles::resolve_non_null(target_jh));
-  if (java_lang_invoke_MemberName::is_instance(target()) &&
-      java_lang_invoke_MemberName::vmindex(target()) == VM_INDEX_UNINITIALIZED) {
-    MethodHandles::resolve_MemberName(target, CHECK);
-  }
-
-  KlassHandle receiver_limit; int decode_flags = 0;
-  methodHandle m = MethodHandles::decode_method(target(), receiver_limit, decode_flags);
-  if (m.is_null()) { THROW_MSG(vmSymbols::java_lang_InternalError(), "no such method"); }
-
-  // The trusted Java code that calls this method should already have performed
-  // access checks on behalf of the given caller.  But, we can verify this.
-  if (VerifyMethodHandles && caller_jh != NULL) {
-    KlassHandle caller(THREAD, java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(caller_jh)));
-    // If this were a bytecode, the first access check would be against
-    // the "reference class" mentioned in the CONSTANT_Methodref.
-    // We don't know at this point which class that was, and if we
-    // check against m.method_holder we might get the wrong answer.
-    // So we just make sure to handle this check when the resolution
-    // happens, when we call resolve_MemberName.
-    //
-    // (A public class can inherit public members from private supers,
-    // and it would be wrong to check access against the private super
-    // if the original symbolic reference was against the public class.)
-    //
-    // If there were a bytecode, the next step would be to lookup the method
-    // in the reference class, then then check the method's access bits.
-    // Emulate LinkResolver::check_method_accessability.
-    klassOop resolved_klass = m->method_holder();
-    if (!Reflection::verify_field_access(caller->as_klassOop(),
-                                         resolved_klass, resolved_klass,
-                                         m->access_flags(),
-                                         true)) {
-      // %%% following cutout belongs in Reflection::verify_field_access?
-      bool same_pm = Reflection::is_same_package_member(caller->as_klassOop(),
-                                                        resolved_klass, THREAD);
-      if (!same_pm) {
-        THROW_MSG(vmSymbols::java_lang_InternalError(), m->name_and_sig_as_C_string());
-      }
-    }
-  }
-
-  MethodHandles::init_DirectMethodHandle(mh, m, (do_dispatch != JNI_FALSE), CHECK);
-  stress_method_handle_walk(mh, CHECK);
-}
-JVM_END
-
-// bound method handles
-JVM_ENTRY(void, MHN_init_BMH(JNIEnv *env, jobject igcls, jobject mh_jh,
-                             jobject target_jh, int argnum)) {
-  ResourceMark rm;              // for error messages
-
-  // This is the guy we are initializing:
-  if (mh_jh == NULL) { THROW_MSG(vmSymbols::java_lang_InternalError(), "self is null"); }
-  Handle mh(THREAD, JNIHandles::resolve_non_null(mh_jh));
-
-  // Early returns out of this method leave the BMH in an unfinished state.
-  assert(java_lang_invoke_MethodHandle::vmentry(mh()) == NULL, "must be safely null");
-
-  if (target_jh == NULL) { THROW_MSG(vmSymbols::java_lang_InternalError(), "target is null"); }
-  Handle target(THREAD, JNIHandles::resolve_non_null(target_jh));
-
-  if (!java_lang_invoke_MethodHandle::is_instance(target())) {
-    // Target object is a reflective method.  (%%% Do we need this alternate path?)
-    Untested("init_BMH of non-MH");
-    if (argnum != 0) { THROW(vmSymbols::java_lang_InternalError()); }
-    KlassHandle receiver_limit; int decode_flags = 0;
-    methodHandle m = MethodHandles::decode_method(target(), receiver_limit, decode_flags);
-    MethodHandles::init_BoundMethodHandle_with_receiver(mh, m,
-                                                       receiver_limit,
-                                                       decode_flags,
-                                                       CHECK);
-  } else {
-    // Build a BMH on top of a DMH or another BMH:
-    MethodHandles::init_BoundMethodHandle(mh, target, argnum, CHECK);
-  }
-
-  if (StressMethodHandleWalk) {
-    if (mh->klass() == SystemDictionary::BoundMethodHandle_klass())
-      stress_method_handle_walk(mh, CHECK);
-    // else don't, since the subclass has not yet initialized its own fields
-  }
-}
-JVM_END
-
-// adapter method handles
-JVM_ENTRY(void, MHN_init_AMH(JNIEnv *env, jobject igcls, jobject mh_jh,
-                             jobject target_jh, int argnum)) {
-  // This is the guy we are initializing:
-  if (mh_jh == NULL) { THROW_MSG(vmSymbols::java_lang_InternalError(), "self is null"); }
-  if (target_jh == NULL) { THROW_MSG(vmSymbols::java_lang_InternalError(), "target is null"); }
-  Handle mh(THREAD, JNIHandles::resolve_non_null(mh_jh));
-  Handle target(THREAD, JNIHandles::resolve_non_null(target_jh));
-
-  // Early returns out of this method leave the AMH in an unfinished state.
-  assert(java_lang_invoke_MethodHandle::vmentry(mh()) == NULL, "must be safely null");
-
-  MethodHandles::init_AdapterMethodHandle(mh, target, argnum, CHECK);
-  stress_method_handle_walk(mh, CHECK);
-}
-JVM_END
-
-// method type forms
-JVM_ENTRY(void, MHN_init_MT(JNIEnv *env, jobject igcls, jobject erased_jh)) {
-  if (erased_jh == NULL)  return;
-  if (TraceMethodHandles) {
-    tty->print("creating MethodType form ");
-    if (WizardMode || Verbose) {   // Warning: this calls Java code on the MH!
-      // call Object.toString()
-      Symbol* name = vmSymbols::toString_name();
-      Symbol* sig = vmSymbols::void_string_signature();
-      JavaCallArguments args(Handle(THREAD, JNIHandles::resolve_non_null(erased_jh)));
-      JavaValue result(T_OBJECT);
-      JavaCalls::call_virtual(&result, SystemDictionary::Object_klass(), name, sig,
-                              &args, CHECK);
-      Handle str(THREAD, (oop)result.get_jobject());
-      java_lang_String::print(str, tty);
-    }
-    tty->cr();
-  }
-}
-JVM_END
-
-// debugging and reflection
-JVM_ENTRY(jobject, MHN_getTarget(JNIEnv *env, jobject igcls, jobject mh_jh, jint format)) {
-  Handle mh(THREAD, JNIHandles::resolve(mh_jh));
-  if (!java_lang_invoke_MethodHandle::is_instance(mh())) {
-    THROW_NULL(vmSymbols::java_lang_IllegalArgumentException());
-  }
-  oop target = MethodHandles::encode_target(mh, format, CHECK_NULL);
-  return JNIHandles::make_local(THREAD, target);
-}
-JVM_END
-
 JVM_ENTRY(jint, MHN_getConstant(JNIEnv *env, jobject igcls, jint which)) {
   switch (which) {
-  case MethodHandles::GC_JVM_PUSH_LIMIT:
-    guarantee(MethodHandlePushLimit >= 2 && MethodHandlePushLimit <= 0xFF,
-              "MethodHandlePushLimit parameter must be in valid range");
-    return MethodHandlePushLimit;
-  case MethodHandles::GC_JVM_STACK_MOVE_UNIT:
-    // return number of words per slot, signed according to stack direction
-    return MethodHandles::stack_move_unit();
-  case MethodHandles::GC_CONV_OP_IMPLEMENTED_MASK:
-    return MethodHandles::adapter_conversion_ops_supported_mask();
   case MethodHandles::GC_COUNT_GWT:
 #ifdef COMPILER2
     return true;
@@ -2872,64 +942,54 @@
 JVM_END
 
 #ifndef PRODUCT
-#define EACH_NAMED_CON(template) \
-  /* hold back this one until JDK stabilizes */ \
-  /* template(MethodHandles,GC_JVM_PUSH_LIMIT) */  \
-  /* hold back this one until JDK stabilizes */ \
-  /* template(MethodHandles,GC_JVM_STACK_MOVE_UNIT) */ \
-  /* hold back this one until JDK stabilizes */ \
-  /* template(MethodHandles,GC_OP_ROT_ARGS_DOWN_LIMIT_BIAS) */ \
-    template(MethodHandles,ETF_HANDLE_OR_METHOD_NAME) \
-    template(MethodHandles,ETF_DIRECT_HANDLE) \
-    template(MethodHandles,ETF_METHOD_NAME) \
-    template(MethodHandles,ETF_REFLECT_METHOD) \
+#define EACH_NAMED_CON(template, requirement) \
+    template(MethodHandles,GC_COUNT_GWT) \
     template(java_lang_invoke_MemberName,MN_IS_METHOD) \
     template(java_lang_invoke_MemberName,MN_IS_CONSTRUCTOR) \
     template(java_lang_invoke_MemberName,MN_IS_FIELD) \
     template(java_lang_invoke_MemberName,MN_IS_TYPE) \
     template(java_lang_invoke_MemberName,MN_SEARCH_SUPERCLASSES) \
     template(java_lang_invoke_MemberName,MN_SEARCH_INTERFACES) \
-    template(java_lang_invoke_MemberName,VM_INDEX_UNINITIALIZED) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_RETYPE_ONLY) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_RETYPE_RAW) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_CHECK_CAST) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_PRIM_TO_PRIM) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_REF_TO_PRIM) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_PRIM_TO_REF) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_SWAP_ARGS) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_ROT_ARGS) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_DUP_ARGS) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_DROP_ARGS) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_COLLECT_ARGS) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_SPREAD_ARGS) \
-      /* hold back this one until JDK stabilizes */ \
-      /*template(java_lang_invoke_AdapterMethodHandle,CONV_OP_LIMIT)*/  \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_OP_MASK) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_VMINFO_MASK) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_VMINFO_SHIFT) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_OP_SHIFT) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_DEST_TYPE_SHIFT) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_SRC_TYPE_SHIFT) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_STACK_MOVE_SHIFT) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_STACK_MOVE_MASK) \
+    template(java_lang_invoke_MemberName,MN_REFERENCE_KIND_SHIFT) \
+    template(java_lang_invoke_MemberName,MN_REFERENCE_KIND_MASK) \
+    template(MethodHandles,GC_LAMBDA_SUPPORT) \
     /*end*/
 
+#define IGNORE_REQ(req_expr) /* req_expr */
 #define ONE_PLUS(scope,value) 1+
-static const int con_value_count = EACH_NAMED_CON(ONE_PLUS) 0;
+static const int con_value_count = EACH_NAMED_CON(ONE_PLUS, IGNORE_REQ) 0;
 #define VALUE_COMMA(scope,value) scope::value,
-static const int con_values[con_value_count+1] = { EACH_NAMED_CON(VALUE_COMMA) 0 };
+static const int con_values[con_value_count+1] = { EACH_NAMED_CON(VALUE_COMMA, IGNORE_REQ) 0 };
 #define STRING_NULL(scope,value) #value "\0"
-static const char con_names[] = { EACH_NAMED_CON(STRING_NULL) };
+static const char con_names[] = { EACH_NAMED_CON(STRING_NULL, IGNORE_REQ) };
+
+static bool advertise_con_value(int which) {
+  if (which < 0)  return false;
+  bool ok = true;
+  int count = 0;
+#define INC_COUNT(scope,value) \
+  ++count;
+#define CHECK_REQ(req_expr) \
+  if (which < count)  return ok; \
+  ok = (req_expr);
+  EACH_NAMED_CON(INC_COUNT, CHECK_REQ);
+#undef INC_COUNT
+#undef CHECK_REQ
+  assert(count == con_value_count, "");
+  if (which < count)  return ok;
+  return false;
+}
 
 #undef ONE_PLUS
 #undef VALUE_COMMA
 #undef STRING_NULL
 #undef EACH_NAMED_CON
-#endif
+#endif // PRODUCT
 
 JVM_ENTRY(jint, MHN_getNamedCon(JNIEnv *env, jobject igcls, jint which, jobjectArray box_jh)) {
 #ifndef PRODUCT
-  if (which >= 0 && which < con_value_count) {
+  if (advertise_con_value(which)) {
+    assert(which >= 0 && which < con_value_count, "");
     int con = con_values[which];
     objArrayHandle box(THREAD, (objArrayOop) JNIHandles::resolve(box_jh));
     if (box.not_null() && box->klass() == Universe::objectArrayKlassObj() && box->length() > 0) {
@@ -2965,13 +1025,14 @@
 JVM_END
 
 // void resolve(MemberName self, Class<?> caller)
-JVM_ENTRY(void, MHN_resolve_Mem(JNIEnv *env, jobject igcls, jobject mname_jh, jclass caller_jh)) {
-  if (mname_jh == NULL) { THROW_MSG(vmSymbols::java_lang_InternalError(), "mname is null"); }
+JVM_ENTRY(jobject, MHN_resolve_Mem(JNIEnv *env, jobject igcls, jobject mname_jh, jclass caller_jh)) {
+  if (mname_jh == NULL) { THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), "mname is null"); }
   Handle mname(THREAD, JNIHandles::resolve_non_null(mname_jh));
 
   // The trusted Java code that calls this method should already have performed
   // access checks on behalf of the given caller.  But, we can verify this.
-  if (VerifyMethodHandles && caller_jh != NULL) {
+  if (VerifyMethodHandles && caller_jh != NULL &&
+      java_lang_invoke_MemberName::clazz(mname()) != NULL) {
     klassOop reference_klass = java_lang_Class::as_klassOop(java_lang_invoke_MemberName::clazz(mname()));
     if (reference_klass != NULL) {
       // Emulate LinkResolver::check_klass_accessability.
@@ -2979,15 +1040,97 @@
       if (!Reflection::verify_class_access(caller,
                                            reference_klass,
                                            true)) {
-        THROW_MSG(vmSymbols::java_lang_InternalError(), Klass::cast(reference_klass)->external_name());
+        THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), Klass::cast(reference_klass)->external_name());
       }
     }
   }
 
-  MethodHandles::resolve_MemberName(mname, CHECK);
+  Handle resolved = MethodHandles::resolve_MemberName(mname, CHECK_NULL);
+  if (resolved.is_null()) {
+    int flags = java_lang_invoke_MemberName::flags(mname());
+    int ref_kind = (flags >> REFERENCE_KIND_SHIFT) & REFERENCE_KIND_MASK;
+    if (!MethodHandles::ref_kind_is_valid(ref_kind)) {
+      THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), "obsolete MemberName format");
+    }
+    if ((flags & ALL_KINDS) == IS_FIELD) {
+      THROW_MSG_NULL(vmSymbols::java_lang_NoSuchMethodError(), "field resolution failed");
+    } else if ((flags & ALL_KINDS) == IS_METHOD ||
+               (flags & ALL_KINDS) == IS_CONSTRUCTOR) {
+      THROW_MSG_NULL(vmSymbols::java_lang_NoSuchFieldError(), "method resolution failed");
+    } else {
+      THROW_MSG_NULL(vmSymbols::java_lang_LinkageError(), "resolution failed");
+    }
+  }
+
+  return JNIHandles::make_local(THREAD, resolved());
 }
 JVM_END
 
+static jlong find_member_field_offset(oop mname, bool must_be_static, TRAPS) {
+  if (mname == NULL ||
+      java_lang_invoke_MemberName::vmtarget(mname) == NULL) {
+    THROW_MSG_0(vmSymbols::java_lang_InternalError(), "mname not resolved");
+  } else {
+    int flags = java_lang_invoke_MemberName::flags(mname);
+    if ((flags & IS_FIELD) != 0 &&
+        (must_be_static
+         ? (flags & JVM_ACC_STATIC) != 0
+         : (flags & JVM_ACC_STATIC) == 0)) {
+      int vmindex = java_lang_invoke_MemberName::vmindex(mname);
+      return (jlong) vmindex;
+    }
+  }
+  const char* msg = (must_be_static ? "static field required" : "non-static field required");
+  THROW_MSG_0(vmSymbols::java_lang_InternalError(), msg);
+  return 0;
+}
+
+JVM_ENTRY(jlong, MHN_objectFieldOffset(JNIEnv *env, jobject igcls, jobject mname_jh)) {
+  return find_member_field_offset(JNIHandles::resolve(mname_jh), false, THREAD);
+}
+JVM_END
+
+JVM_ENTRY(jlong, MHN_staticFieldOffset(JNIEnv *env, jobject igcls, jobject mname_jh)) {
+  return find_member_field_offset(JNIHandles::resolve(mname_jh), true, THREAD);
+}
+JVM_END
+
+JVM_ENTRY(jobject, MHN_staticFieldBase(JNIEnv *env, jobject igcls, jobject mname_jh)) {
+  // use the other function to perform sanity checks:
+  jlong ignore = find_member_field_offset(JNIHandles::resolve(mname_jh), true, CHECK_NULL);
+  oop clazz = java_lang_invoke_MemberName::clazz(JNIHandles::resolve_non_null(mname_jh));
+  return JNIHandles::make_local(THREAD, clazz);
+}
+JVM_END
+
+JVM_ENTRY(jobject, MHN_getMemberVMInfo(JNIEnv *env, jobject igcls, jobject mname_jh)) {
+  if (mname_jh == NULL)  return NULL;
+  Handle mname(THREAD, JNIHandles::resolve_non_null(mname_jh));
+  intptr_t vmindex  = java_lang_invoke_MemberName::vmindex(mname());
+  Handle   vmtarget = java_lang_invoke_MemberName::vmtarget(mname());
+  objArrayHandle result = oopFactory::new_objArray(SystemDictionary::Object_klass(), 2, CHECK_NULL);
+  jvalue vmindex_value; vmindex_value.j = (long)vmindex;
+  oop x = java_lang_boxing_object::create(T_LONG, &vmindex_value, CHECK_NULL);
+  result->obj_at_put(0, x);
+  x = NULL;
+  if (vmtarget.is_null() || vmtarget->is_instance()) {
+    x = vmtarget();
+  } else if (vmtarget->is_klass()) {
+    x = Klass::cast((klassOop) vmtarget())->java_mirror();
+  } else {
+    Handle mname2 = MethodHandles::new_MemberName(CHECK_NULL);
+    if (vmtarget->is_method())
+      x = MethodHandles::init_method_MemberName(mname2(), methodOop(vmtarget()), false, NULL);
+    else
+      x = MethodHandles::init_MemberName(mname2(), vmtarget());
+  }
+  result->obj_at_put(1, x);
+  return JNIHandles::make_local(env, result());
+}
+JVM_END
+
+
+
 //  static native int getMembers(Class<?> defc, String matchName, String matchSig,
 //          int matchFlags, Class<?> caller, int skip, MemberName[] results);
 JVM_ENTRY(jint, MHN_getMembers(JNIEnv *env, jobject igcls,
@@ -3053,60 +1196,6 @@
 }
 JVM_END
 
-methodOop MethodHandles::resolve_raise_exception_method(TRAPS) {
-  if (_raise_exception_method != NULL) {
-    // no need to do it twice
-    return raise_exception_method();
-  }
-  // LinkResolver::resolve_invokedynamic can reach this point
-  // because an invokedynamic has failed very early (7049415)
-  KlassHandle MHN_klass = SystemDictionaryHandles::MethodHandleNatives_klass();
-  if (MHN_klass.not_null()) {
-    TempNewSymbol raiseException_name = SymbolTable::new_symbol("raiseException", CHECK_NULL);
-    TempNewSymbol raiseException_sig = SymbolTable::new_symbol("(ILjava/lang/Object;Ljava/lang/Object;)V", CHECK_NULL);
-    methodOop raiseException_method  = instanceKlass::cast(MHN_klass->as_klassOop())
-                  ->find_method(raiseException_name, raiseException_sig);
-    if (raiseException_method != NULL && raiseException_method->is_static()) {
-      return raiseException_method;
-    }
-  }
-  // not found; let the caller deal with it
-  return NULL;
-}
-void MethodHandles::raise_exception(int code, oop actual, oop required, TRAPS) {
-  methodOop raiseException_method = resolve_raise_exception_method(CHECK);
-  if (raiseException_method != NULL &&
-      instanceKlass::cast(raiseException_method->method_holder())->is_not_initialized()) {
-    instanceKlass::cast(raiseException_method->method_holder())->initialize(CHECK);
-    // it had better be resolved by now, or maybe JSR 292 failed to load
-    raiseException_method = raise_exception_method();
-  }
-  if (raiseException_method == NULL) {
-    THROW_MSG(vmSymbols::java_lang_InternalError(), "no raiseException method");
-  }
-  JavaCallArguments args;
-  args.push_int(code);
-  args.push_oop(actual);
-  args.push_oop(required);
-  JavaValue result(T_VOID);
-  JavaCalls::call(&result, raiseException_method, &args, CHECK);
-}
-
-JVM_ENTRY(jobject, MH_invoke_UOE(JNIEnv *env, jobject igmh, jobjectArray igargs)) {
-    TempNewSymbol UOE_name = SymbolTable::new_symbol("java/lang/UnsupportedOperationException", CHECK_NULL);
-    THROW_MSG_NULL(UOE_name, "MethodHandle.invoke cannot be invoked reflectively");
-    return NULL;
-}
-JVM_END
-
-JVM_ENTRY(jobject, MH_invokeExact_UOE(JNIEnv *env, jobject igmh, jobjectArray igargs)) {
-    TempNewSymbol UOE_name = SymbolTable::new_symbol("java/lang/UnsupportedOperationException", CHECK_NULL);
-    THROW_MSG_NULL(UOE_name, "MethodHandle.invokeExact cannot be invoked reflectively");
-    return NULL;
-}
-JVM_END
-
-
 /// JVM_RegisterMethodHandleMethods
 
 #undef CS  // Solaris builds complain
@@ -3121,48 +1210,32 @@
 #define MT    JLINV"MethodType;"
 #define MH    JLINV"MethodHandle;"
 #define MEM   JLINV"MemberName;"
-#define AMH   JLINV"AdapterMethodHandle;"
-#define BMH   JLINV"BoundMethodHandle;"
-#define DMH   JLINV"DirectMethodHandle;"
 
 #define CC (char*)  /*cast a literal from (const char*)*/
 #define FN_PTR(f) CAST_FROM_FN_PTR(void*, &f)
 
 // These are the native methods on java.lang.invoke.MethodHandleNatives.
-static JNINativeMethod methods[] = {
-  // void init(MemberName self, AccessibleObject ref)
-  {CC"init",                      CC"("AMH""MH"I)V",                     FN_PTR(MHN_init_AMH)},
-  {CC"init",                      CC"("BMH""OBJ"I)V",                    FN_PTR(MHN_init_BMH)},
-  {CC"init",                      CC"("DMH""OBJ"Z"CLS")V",               FN_PTR(MHN_init_DMH)},
-  {CC"init",                      CC"("MT")V",                           FN_PTR(MHN_init_MT)},
+static JNINativeMethod required_methods_JDK8[] = {
   {CC"init",                      CC"("MEM""OBJ")V",                     FN_PTR(MHN_init_Mem)},
   {CC"expand",                    CC"("MEM")V",                          FN_PTR(MHN_expand_Mem)},
-  {CC"resolve",                   CC"("MEM""CLS")V",                     FN_PTR(MHN_resolve_Mem)},
-  {CC"getTarget",                 CC"("MH"I)"OBJ,                        FN_PTR(MHN_getTarget)},
+  {CC"resolve",                   CC"("MEM""CLS")"MEM,                   FN_PTR(MHN_resolve_Mem)},
   {CC"getConstant",               CC"(I)I",                              FN_PTR(MHN_getConstant)},
   //  static native int getNamedCon(int which, Object[] name)
   {CC"getNamedCon",               CC"(I["OBJ")I",                        FN_PTR(MHN_getNamedCon)},
   //  static native int getMembers(Class<?> defc, String matchName, String matchSig,
   //          int matchFlags, Class<?> caller, int skip, MemberName[] results);
-  {CC"getMembers",                CC"("CLS""STRG""STRG"I"CLS"I["MEM")I", FN_PTR(MHN_getMembers)}
-};
-
-static JNINativeMethod call_site_methods[] = {
+  {CC"getMembers",                CC"("CLS""STRG""STRG"I"CLS"I["MEM")I", FN_PTR(MHN_getMembers)},
+  {CC"objectFieldOffset",         CC"("MEM")J",                          FN_PTR(MHN_objectFieldOffset)},
   {CC"setCallSiteTargetNormal",   CC"("CS""MH")V",                       FN_PTR(MHN_setCallSiteTargetNormal)},
-  {CC"setCallSiteTargetVolatile", CC"("CS""MH")V",                       FN_PTR(MHN_setCallSiteTargetVolatile)}
-};
-
-static JNINativeMethod invoke_methods[] = {
-  // void init(MemberName self, AccessibleObject ref)
-  {CC"invoke",                    CC"(["OBJ")"OBJ,                       FN_PTR(MH_invoke_UOE)},
-  {CC"invokeExact",               CC"(["OBJ")"OBJ,                       FN_PTR(MH_invokeExact_UOE)}
+  {CC"setCallSiteTargetVolatile", CC"("CS""MH")V",                       FN_PTR(MHN_setCallSiteTargetVolatile)},
+  {CC"staticFieldOffset",         CC"("MEM")J",                          FN_PTR(MHN_staticFieldOffset)},
+  {CC"staticFieldBase",           CC"("MEM")"OBJ,                        FN_PTR(MHN_staticFieldBase)},
+  {CC"getMemberVMInfo",           CC"("MEM")"OBJ,                        FN_PTR(MHN_getMemberVMInfo)}
 };
 
 // This one function is exported, used by NativeLookup.
 
 JVM_ENTRY(void, JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass MHN_class)) {
-  assert(MethodHandles::spot_check_entry_names(), "entry enum is OK");
-
   if (!EnableInvokeDynamic) {
     warning("JSR 292 is disabled in this JVM.  Use -XX:+UnlockDiagnosticVMOptions -XX:+EnableInvokeDynamic to enable.");
     return;  // bind nothing
@@ -3171,36 +1244,29 @@
   assert(!MethodHandles::enabled(), "must not be enabled");
   bool enable_MH = true;
 
-  {
+  jclass MH_class = NULL;
+  if (SystemDictionary::MethodHandle_klass() == NULL) {
+    enable_MH = false;
+  } else {
+    oop mirror = Klass::cast(SystemDictionary::MethodHandle_klass())->java_mirror();
+    MH_class = (jclass) JNIHandles::make_local(env, mirror);
+  }
+
+  int status;
+
+  if (enable_MH) {
     ThreadToNativeFromVM ttnfv(thread);
-    int status = env->RegisterNatives(MHN_class, methods, sizeof(methods)/sizeof(JNINativeMethod));
-    if (!env->ExceptionOccurred()) {
-      const char* L_MH_name = (JLINV "MethodHandle");
-      const char* MH_name = L_MH_name+1;
-      jclass MH_class = env->FindClass(MH_name);
-      status = env->RegisterNatives(MH_class, invoke_methods, sizeof(invoke_methods)/sizeof(JNINativeMethod));
-    }
-    if (env->ExceptionOccurred()) {
+
+    status = env->RegisterNatives(MHN_class, required_methods_JDK8, sizeof(required_methods_JDK8)/sizeof(JNINativeMethod));
+    if (status != JNI_OK || env->ExceptionOccurred()) {
       warning("JSR 292 method handle code is mismatched to this JVM.  Disabling support.");
       enable_MH = false;
       env->ExceptionClear();
     }
-
-    status = env->RegisterNatives(MHN_class, call_site_methods, sizeof(call_site_methods)/sizeof(JNINativeMethod));
-    if (env->ExceptionOccurred()) {
-      // Exception is okay until 7087357
-      env->ExceptionClear();
-    }
   }
 
-  if (enable_MH) {
-    methodOop raiseException_method = MethodHandles::resolve_raise_exception_method(CHECK);
-    if (raiseException_method != NULL) {
-      MethodHandles::set_raise_exception_method(raiseException_method);
-    } else {
-      warning("JSR 292 method handle code is mismatched to this JVM.  Disabling support.");
-      enable_MH = false;
-    }
+  if (TraceInvokeDynamic) {
+    tty->print_cr("MethodHandle support loaded (using LambdaForms)");
   }
 
   if (enable_MH) {
--- a/src/share/vm/prims/methodHandles.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/methodHandles.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -33,523 +33,36 @@
 
 class MacroAssembler;
 class Label;
-class MethodHandleEntry;
 
 class MethodHandles: AllStatic {
   // JVM support for MethodHandle, MethodType, and related types
   // in java.lang.invoke and sun.invoke.
   // See also  javaClasses for layouts java_lang_invoke_Method{Handle,Type,Type::Form}.
  public:
-  enum EntryKind {
-    _raise_exception,           // stub for error generation from other stubs
-    _invokestatic_mh,           // how a MH emulates invokestatic
-    _invokespecial_mh,          // ditto for the other invokes...
-    _invokevirtual_mh,
-    _invokeinterface_mh,
-    _bound_ref_mh,              // reference argument is bound
-    _bound_int_mh,              // int argument is bound (via an Integer or Float)
-    _bound_long_mh,             // long argument is bound (via a Long or Double)
-    _bound_ref_direct_mh,       // same as above, with direct linkage to methodOop
-    _bound_int_direct_mh,
-    _bound_long_direct_mh,
-
-    _adapter_mh_first,     // adapter sequence goes here...
-    _adapter_retype_only   = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY,
-    _adapter_retype_raw    = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW,
-    _adapter_check_cast    = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST,
-    _adapter_prim_to_prim  = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM,
-    _adapter_ref_to_prim   = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM,
-    _adapter_prim_to_ref   = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF,
-    _adapter_swap_args     = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS,
-    _adapter_rot_args      = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS,
-    _adapter_dup_args      = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS,
-    _adapter_drop_args     = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS,
-    _adapter_collect_args  = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS,
-    _adapter_spread_args   = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS,
-    _adapter_fold_args     = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS,
-    _adapter_unused_13     = _adapter_mh_first + 13,  //hole in the CONV_OP enumeration
-    _adapter_mh_last       = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::CONV_OP_LIMIT - 1,
-
-    // Optimized adapter types
-
-    // argument list reordering
-    _adapter_opt_swap_1,
-    _adapter_opt_swap_2,
-    _adapter_opt_rot_1_up,
-    _adapter_opt_rot_1_down,
-    _adapter_opt_rot_2_up,
-    _adapter_opt_rot_2_down,
-    // primitive single to single:
-    _adapter_opt_i2i,           // i2c, i2z, i2b, i2s
-    // primitive double to single:
-    _adapter_opt_l2i,
-    _adapter_opt_d2f,
-    // primitive single to double:
-    _adapter_opt_i2l,
-    _adapter_opt_f2d,
-    // conversion between floating point and integer type is handled by Java
-
-    // reference to primitive:
-    _adapter_opt_unboxi,
-    _adapter_opt_unboxl,
-
-    // %% Maybe tame the following with a VM_SYMBOLS_DO type macro?
-
-    // how a blocking adapter returns (platform-dependent)
-    _adapter_opt_return_ref,
-    _adapter_opt_return_int,
-    _adapter_opt_return_long,
-    _adapter_opt_return_float,
-    _adapter_opt_return_double,
-    _adapter_opt_return_void,
-    _adapter_opt_return_S0_ref,  // return ref to S=0 (last slot)
-    _adapter_opt_return_S1_ref,  // return ref to S=1 (2nd-to-last slot)
-    _adapter_opt_return_S2_ref,
-    _adapter_opt_return_S3_ref,
-    _adapter_opt_return_S4_ref,
-    _adapter_opt_return_S5_ref,
-    _adapter_opt_return_any,     // dynamically select r/i/l/f/d
-    _adapter_opt_return_FIRST = _adapter_opt_return_ref,
-    _adapter_opt_return_LAST  = _adapter_opt_return_any,
-
-    // spreading (array length cases 0, 1, ...)
-    _adapter_opt_spread_0,       // spread empty array to N=0 arguments
-    _adapter_opt_spread_1_ref,   // spread Object[] to N=1 argument
-    _adapter_opt_spread_2_ref,   // spread Object[] to N=2 arguments
-    _adapter_opt_spread_3_ref,   // spread Object[] to N=3 arguments
-    _adapter_opt_spread_4_ref,   // spread Object[] to N=4 arguments
-    _adapter_opt_spread_5_ref,   // spread Object[] to N=5 arguments
-    _adapter_opt_spread_ref,     // spread Object[] to N arguments
-    _adapter_opt_spread_byte,    // spread byte[] or boolean[] to N arguments
-    _adapter_opt_spread_char,    // spread char[], etc., to N arguments
-    _adapter_opt_spread_short,   // spread short[], etc., to N arguments
-    _adapter_opt_spread_int,     // spread int[], short[], etc., to N arguments
-    _adapter_opt_spread_long,    // spread long[] to N arguments
-    _adapter_opt_spread_float,   // spread float[] to N arguments
-    _adapter_opt_spread_double,  // spread double[] to N arguments
-    _adapter_opt_spread_FIRST = _adapter_opt_spread_0,
-    _adapter_opt_spread_LAST  = _adapter_opt_spread_double,
-
-    // blocking filter/collect conversions
-    // These collect N arguments and replace them (at slot S) by a return value
-    // which is passed to the final target, along with the unaffected arguments.
-    // collect_{N}_{T} collects N arguments at any position into a T value
-    // collect_{N}_S{S}_{T} collects N arguments at slot S into a T value
-    // collect_{T} collects any number of arguments at any position
-    // filter_S{S}_{T} is the same as collect_1_S{S}_{T} (a unary collection)
-    // (collect_2 is also usable as a filter, with long or double arguments)
-    _adapter_opt_collect_ref,    // combine N arguments, replace with a reference
-    _adapter_opt_collect_int,    // combine N arguments, replace with an int, short, etc.
-    _adapter_opt_collect_long,   // combine N arguments, replace with a long
-    _adapter_opt_collect_float,  // combine N arguments, replace with a float
-    _adapter_opt_collect_double, // combine N arguments, replace with a double
-    _adapter_opt_collect_void,   // combine N arguments, replace with nothing
-    // if there is a small fixed number to push, do so without a loop:
-    _adapter_opt_collect_0_ref,  // collect N=0 arguments, insert a reference
-    _adapter_opt_collect_1_ref,  // collect N=1 argument, replace with a reference
-    _adapter_opt_collect_2_ref,  // combine N=2 arguments, replace with a reference
-    _adapter_opt_collect_3_ref,  // combine N=3 arguments, replace with a reference
-    _adapter_opt_collect_4_ref,  // combine N=4 arguments, replace with a reference
-    _adapter_opt_collect_5_ref,  // combine N=5 arguments, replace with a reference
-    // filters are an important special case because they never move arguments:
-    _adapter_opt_filter_S0_ref,  // filter N=1 argument at S=0, replace with a reference
-    _adapter_opt_filter_S1_ref,  // filter N=1 argument at S=1, replace with a reference
-    _adapter_opt_filter_S2_ref,  // filter N=1 argument at S=2, replace with a reference
-    _adapter_opt_filter_S3_ref,  // filter N=1 argument at S=3, replace with a reference
-    _adapter_opt_filter_S4_ref,  // filter N=1 argument at S=4, replace with a reference
-    _adapter_opt_filter_S5_ref,  // filter N=1 argument at S=5, replace with a reference
-    // these move arguments, but they are important for boxing
-    _adapter_opt_collect_2_S0_ref,  // combine last N=2 arguments, replace with a reference
-    _adapter_opt_collect_2_S1_ref,  // combine N=2 arguments at S=1, replace with a reference
-    _adapter_opt_collect_2_S2_ref,  // combine N=2 arguments at S=2, replace with a reference
-    _adapter_opt_collect_2_S3_ref,  // combine N=2 arguments at S=3, replace with a reference
-    _adapter_opt_collect_2_S4_ref,  // combine N=2 arguments at S=4, replace with a reference
-    _adapter_opt_collect_2_S5_ref,  // combine N=2 arguments at S=5, replace with a reference
-    _adapter_opt_collect_FIRST = _adapter_opt_collect_ref,
-    _adapter_opt_collect_LAST  = _adapter_opt_collect_2_S5_ref,
-
-    // blocking folding conversions
-    // these are like collects, but retain all the N arguments for the final target
-    //_adapter_opt_fold_0_ref,   // same as _adapter_opt_collect_0_ref
-    // fold_{N}_{T} processes N arguments at any position into a T value, which it inserts
-    // fold_{T} processes any number of arguments at any position
-    _adapter_opt_fold_ref,       // process N arguments, prepend a reference
-    _adapter_opt_fold_int,       // process N arguments, prepend an int, short, etc.
-    _adapter_opt_fold_long,      // process N arguments, prepend a long
-    _adapter_opt_fold_float,     // process N arguments, prepend a float
-    _adapter_opt_fold_double,    // process N arguments, prepend a double
-    _adapter_opt_fold_void,      // process N arguments, but leave the list unchanged
-    _adapter_opt_fold_1_ref,     // process N=1 argument, prepend a reference
-    _adapter_opt_fold_2_ref,     // process N=2 arguments, prepend a reference
-    _adapter_opt_fold_3_ref,     // process N=3 arguments, prepend a reference
-    _adapter_opt_fold_4_ref,     // process N=4 arguments, prepend a reference
-    _adapter_opt_fold_5_ref,     // process N=5 arguments, prepend a reference
-    _adapter_opt_fold_FIRST = _adapter_opt_fold_ref,
-    _adapter_opt_fold_LAST  = _adapter_opt_fold_5_ref,
-
-    _adapter_opt_profiling,
-
-    _EK_LIMIT,
-    _EK_FIRST = 0
-  };
-
  public:
   static bool enabled()                         { return _enabled; }
   static void set_enabled(bool z);
 
  private:
-  enum {  // import java_lang_invoke_AdapterMethodHandle::CONV_OP_*
-    CONV_OP_LIMIT         = java_lang_invoke_AdapterMethodHandle::CONV_OP_LIMIT,
-    CONV_OP_MASK          = java_lang_invoke_AdapterMethodHandle::CONV_OP_MASK,
-    CONV_TYPE_MASK        = java_lang_invoke_AdapterMethodHandle::CONV_TYPE_MASK,
-    CONV_VMINFO_MASK      = java_lang_invoke_AdapterMethodHandle::CONV_VMINFO_MASK,
-    CONV_VMINFO_SHIFT     = java_lang_invoke_AdapterMethodHandle::CONV_VMINFO_SHIFT,
-    CONV_OP_SHIFT         = java_lang_invoke_AdapterMethodHandle::CONV_OP_SHIFT,
-    CONV_DEST_TYPE_SHIFT  = java_lang_invoke_AdapterMethodHandle::CONV_DEST_TYPE_SHIFT,
-    CONV_SRC_TYPE_SHIFT   = java_lang_invoke_AdapterMethodHandle::CONV_SRC_TYPE_SHIFT,
-    CONV_STACK_MOVE_SHIFT = java_lang_invoke_AdapterMethodHandle::CONV_STACK_MOVE_SHIFT,
-    CONV_STACK_MOVE_MASK  = java_lang_invoke_AdapterMethodHandle::CONV_STACK_MOVE_MASK
-  };
-
   static bool _enabled;
-  static MethodHandleEntry* _entries[_EK_LIMIT];
-  static const char*        _entry_names[_EK_LIMIT+1];
-  static jobject            _raise_exception_method;
-  static address            _adapter_return_handlers[CONV_TYPE_MASK+1];
 
   // Adapters.
   static MethodHandlesAdapterBlob* _adapter_code;
 
-  static bool ek_valid(EntryKind ek)            { return (uint)ek < (uint)_EK_LIMIT; }
-  static bool conv_op_valid(int op)             { return (uint)op < (uint)CONV_OP_LIMIT; }
-
- public:
-  static bool    have_entry(EntryKind ek)       { return ek_valid(ek) && _entries[ek] != NULL; }
-  static MethodHandleEntry* entry(EntryKind ek) { assert(ek_valid(ek), "initialized");
-                                                  return _entries[ek]; }
-  static const char* entry_name(EntryKind ek)   { assert(ek_valid(ek), "oob");
-                                                  return _entry_names[ek]; }
-  static EntryKind adapter_entry_kind(int op)   { assert(conv_op_valid(op), "oob");
-                                                  return EntryKind(_adapter_mh_first + op); }
-
-  static void init_entry(EntryKind ek, MethodHandleEntry* me) {
-    assert(ek_valid(ek), "oob");
-    assert(_entries[ek] == NULL, "no double initialization");
-    _entries[ek] = me;
-  }
-
-  // Some adapter helper functions.
-  static EntryKind ek_original_kind(EntryKind ek) {
-    if (ek <= _adapter_mh_last)  return ek;
-    switch (ek) {
-    case _adapter_opt_swap_1:
-    case _adapter_opt_swap_2:
-      return _adapter_swap_args;
-    case _adapter_opt_rot_1_up:
-    case _adapter_opt_rot_1_down:
-    case _adapter_opt_rot_2_up:
-    case _adapter_opt_rot_2_down:
-      return _adapter_rot_args;
-    case _adapter_opt_i2i:
-    case _adapter_opt_l2i:
-    case _adapter_opt_d2f:
-    case _adapter_opt_i2l:
-    case _adapter_opt_f2d:
-      return _adapter_prim_to_prim;
-    case _adapter_opt_unboxi:
-    case _adapter_opt_unboxl:
-      return _adapter_ref_to_prim;
-    }
-    if (ek >= _adapter_opt_spread_FIRST && ek <= _adapter_opt_spread_LAST)
-      return _adapter_spread_args;
-    if (ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST)
-      return _adapter_collect_args;
-    if (ek >= _adapter_opt_fold_FIRST && ek <= _adapter_opt_fold_LAST)
-      return _adapter_fold_args;
-    if (ek >= _adapter_opt_return_FIRST && ek <= _adapter_opt_return_LAST)
-      return _adapter_opt_return_any;
-    if (ek == _adapter_opt_profiling)
-      return _adapter_retype_only;
-    assert(false, "oob");
-    return _EK_LIMIT;
-  }
-
-  static bool ek_supported(MethodHandles::EntryKind ek);
-
-  static BasicType ek_bound_mh_arg_type(EntryKind ek) {
-    switch (ek) {
-    case _bound_int_mh         : // fall-thru
-    case _bound_int_direct_mh  : return T_INT;
-    case _bound_long_mh        : // fall-thru
-    case _bound_long_direct_mh : return T_LONG;
-    default                    : return T_OBJECT;
-    }
-  }
-
-  static int ek_adapter_opt_swap_slots(EntryKind ek) {
-    switch (ek) {
-    case _adapter_opt_swap_1        : return  1;
-    case _adapter_opt_swap_2        : return  2;
-    case _adapter_opt_rot_1_up      : return  1;
-    case _adapter_opt_rot_1_down    : return  1;
-    case _adapter_opt_rot_2_up      : return  2;
-    case _adapter_opt_rot_2_down    : return  2;
-    default : ShouldNotReachHere();   return -1;
-    }
-  }
-
-  static int ek_adapter_opt_swap_mode(EntryKind ek) {
-    switch (ek) {
-    case _adapter_opt_swap_1       : return  0;
-    case _adapter_opt_swap_2       : return  0;
-    case _adapter_opt_rot_1_up     : return  1;
-    case _adapter_opt_rot_1_down   : return -1;
-    case _adapter_opt_rot_2_up     : return  1;
-    case _adapter_opt_rot_2_down   : return -1;
-    default : ShouldNotReachHere();  return  0;
-    }
-  }
-
-  static int ek_adapter_opt_collect_count(EntryKind ek) {
-    assert(ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST ||
-           ek >= _adapter_opt_fold_FIRST    && ek <= _adapter_opt_fold_LAST, "");
-    switch (ek) {
-    case _adapter_opt_collect_0_ref    : return  0;
-    case _adapter_opt_filter_S0_ref    :
-    case _adapter_opt_filter_S1_ref    :
-    case _adapter_opt_filter_S2_ref    :
-    case _adapter_opt_filter_S3_ref    :
-    case _adapter_opt_filter_S4_ref    :
-    case _adapter_opt_filter_S5_ref    :
-    case _adapter_opt_fold_1_ref       :
-    case _adapter_opt_collect_1_ref    : return  1;
-    case _adapter_opt_collect_2_S0_ref :
-    case _adapter_opt_collect_2_S1_ref :
-    case _adapter_opt_collect_2_S2_ref :
-    case _adapter_opt_collect_2_S3_ref :
-    case _adapter_opt_collect_2_S4_ref :
-    case _adapter_opt_collect_2_S5_ref :
-    case _adapter_opt_fold_2_ref       :
-    case _adapter_opt_collect_2_ref    : return  2;
-    case _adapter_opt_fold_3_ref       :
-    case _adapter_opt_collect_3_ref    : return  3;
-    case _adapter_opt_fold_4_ref       :
-    case _adapter_opt_collect_4_ref    : return  4;
-    case _adapter_opt_fold_5_ref       :
-    case _adapter_opt_collect_5_ref    : return  5;
-    default                            : return -1;  // sentinel value for "variable"
-    }
-  }
-
-  static int ek_adapter_opt_collect_slot(EntryKind ek) {
-    assert(ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST ||
-           ek >= _adapter_opt_fold_FIRST    && ek <= _adapter_opt_fold_LAST, "");
-    switch (ek) {
-    case _adapter_opt_collect_2_S0_ref  :
-    case _adapter_opt_filter_S0_ref     : return 0;
-    case _adapter_opt_collect_2_S1_ref  :
-    case _adapter_opt_filter_S1_ref     : return 1;
-    case _adapter_opt_collect_2_S2_ref  :
-    case _adapter_opt_filter_S2_ref     : return 2;
-    case _adapter_opt_collect_2_S3_ref  :
-    case _adapter_opt_filter_S3_ref     : return 3;
-    case _adapter_opt_collect_2_S4_ref  :
-    case _adapter_opt_filter_S4_ref     : return 4;
-    case _adapter_opt_collect_2_S5_ref  :
-    case _adapter_opt_filter_S5_ref     : return 5;
-    default                             : return -1;  // sentinel value for "variable"
-    }
-  }
-
-  static BasicType ek_adapter_opt_collect_type(EntryKind ek) {
-    assert(ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST ||
-           ek >= _adapter_opt_fold_FIRST    && ek <= _adapter_opt_fold_LAST, "");
-    switch (ek) {
-    case _adapter_opt_fold_int          :
-    case _adapter_opt_collect_int       : return T_INT;
-    case _adapter_opt_fold_long         :
-    case _adapter_opt_collect_long      : return T_LONG;
-    case _adapter_opt_fold_float        :
-    case _adapter_opt_collect_float     : return T_FLOAT;
-    case _adapter_opt_fold_double       :
-    case _adapter_opt_collect_double    : return T_DOUBLE;
-    case _adapter_opt_fold_void         :
-    case _adapter_opt_collect_void      : return T_VOID;
-    default                             : return T_OBJECT;
-    }
-  }
-
-  static int ek_adapter_opt_return_slot(EntryKind ek) {
-    assert(ek >= _adapter_opt_return_FIRST && ek <= _adapter_opt_return_LAST, "");
-    switch (ek) {
-    case _adapter_opt_return_S0_ref : return 0;
-    case _adapter_opt_return_S1_ref : return 1;
-    case _adapter_opt_return_S2_ref : return 2;
-    case _adapter_opt_return_S3_ref : return 3;
-    case _adapter_opt_return_S4_ref : return 4;
-    case _adapter_opt_return_S5_ref : return 5;
-    default                         : return -1;  // sentinel value for "variable"
-    }
-  }
-
-  static BasicType ek_adapter_opt_return_type(EntryKind ek) {
-    assert(ek >= _adapter_opt_return_FIRST && ek <= _adapter_opt_return_LAST, "");
-    switch (ek) {
-    case _adapter_opt_return_int    : return T_INT;
-    case _adapter_opt_return_long   : return T_LONG;
-    case _adapter_opt_return_float  : return T_FLOAT;
-    case _adapter_opt_return_double : return T_DOUBLE;
-    case _adapter_opt_return_void   : return T_VOID;
-    case _adapter_opt_return_any    : return T_CONFLICT;  // sentinel value for "variable"
-    default                         : return T_OBJECT;
-    }
-  }
-
-  static int ek_adapter_opt_spread_count(EntryKind ek) {
-    assert(ek >= _adapter_opt_spread_FIRST && ek <= _adapter_opt_spread_LAST, "");
-    switch (ek) {
-    case _adapter_opt_spread_0     : return  0;
-    case _adapter_opt_spread_1_ref : return  1;
-    case _adapter_opt_spread_2_ref : return  2;
-    case _adapter_opt_spread_3_ref : return  3;
-    case _adapter_opt_spread_4_ref : return  4;
-    case _adapter_opt_spread_5_ref : return  5;
-    default                        : return -1;  // sentinel value for "variable"
-    }
-  }
-
-  static BasicType ek_adapter_opt_spread_type(EntryKind ek) {
-    assert(ek >= _adapter_opt_spread_FIRST && ek <= _adapter_opt_spread_LAST, "");
-    switch (ek) {
-    // (there is no _adapter_opt_spread_boolean; we use byte)
-    case _adapter_opt_spread_byte   : return T_BYTE;
-    case _adapter_opt_spread_char   : return T_CHAR;
-    case _adapter_opt_spread_short  : return T_SHORT;
-    case _adapter_opt_spread_int    : return T_INT;
-    case _adapter_opt_spread_long   : return T_LONG;
-    case _adapter_opt_spread_float  : return T_FLOAT;
-    case _adapter_opt_spread_double : return T_DOUBLE;
-    default                         : return T_OBJECT;
-    }
-  }
-
-  static methodOop raise_exception_method() {
-    oop rem = JNIHandles::resolve(_raise_exception_method);
-    assert(rem == NULL || rem->is_method(), "");
-    return (methodOop) rem;
-  }
-  static void set_raise_exception_method(methodOop rem) {
-    assert(_raise_exception_method == NULL, "");
-    _raise_exception_method = JNIHandles::make_global(Handle(rem));
-  }
-  static methodOop resolve_raise_exception_method(TRAPS);
-  // call raise_exception_method from C code:
-  static void raise_exception(int code, oop actual, oop required, TRAPS);
-
-  static jint adapter_conversion(int conv_op, BasicType src, BasicType dest,
-                                 int stack_move = 0, int vminfo = 0) {
-    assert(conv_op_valid(conv_op), "oob");
-    jint conv = ((conv_op      << CONV_OP_SHIFT)
-                 | (src        << CONV_SRC_TYPE_SHIFT)
-                 | (dest       << CONV_DEST_TYPE_SHIFT)
-                 | (stack_move << CONV_STACK_MOVE_SHIFT)
-                 | (vminfo     << CONV_VMINFO_SHIFT)
-                 );
-    assert(adapter_conversion_op(conv) == conv_op, "decode conv_op");
-    assert(adapter_conversion_src_type(conv) == src, "decode src");
-    assert(adapter_conversion_dest_type(conv) == dest, "decode dest");
-    assert(adapter_conversion_stack_move(conv) == stack_move, "decode stack_move");
-    assert(adapter_conversion_vminfo(conv) == vminfo, "decode vminfo");
-    return conv;
-  }
-  static int adapter_conversion_op(jint conv) {
-    return ((conv >> CONV_OP_SHIFT) & 0xF);
-  }
-  static BasicType adapter_conversion_src_type(jint conv) {
-    return (BasicType)((conv >> CONV_SRC_TYPE_SHIFT) & 0xF);
-  }
-  static BasicType adapter_conversion_dest_type(jint conv) {
-    return (BasicType)((conv >> CONV_DEST_TYPE_SHIFT) & 0xF);
-  }
-  static int adapter_conversion_stack_move(jint conv) {
-    return (conv >> CONV_STACK_MOVE_SHIFT);
-  }
-  static int adapter_conversion_vminfo(jint conv) {
-    return (conv >> CONV_VMINFO_SHIFT) & CONV_VMINFO_MASK;
-  }
-
-  // Bit mask of conversion_op values.  May vary by platform.
-  static int adapter_conversion_ops_supported_mask();
-
-  static bool conv_op_supported(int conv_op) {
-    assert(conv_op_valid(conv_op), "");
-    return ((adapter_conversion_ops_supported_mask() & nth_bit(conv_op)) != 0);
-  }
-
-  // Offset in words that the interpreter stack pointer moves when an argument is pushed.
-  // The stack_move value must always be a multiple of this.
-  static int stack_move_unit() {
-    return frame::interpreter_frame_expression_stack_direction() * Interpreter::stackElementWords;
-  }
-
-  // Adapter frame traversal.  (Implementation-specific.)
-  static frame ricochet_frame_sender(const frame& fr, RegisterMap* reg_map);
-  static void ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map);
-
-  enum { CONV_VMINFO_SIGN_FLAG = 0x80 };
-  // Shift values for prim-to-prim conversions.
-  static int adapter_prim_to_prim_subword_vminfo(BasicType dest) {
-    if (dest == T_BOOLEAN) return (BitsPerInt - 1);  // boolean is 1 bit
-    if (dest == T_CHAR)    return (BitsPerInt - BitsPerShort);
-    if (dest == T_BYTE)    return (BitsPerInt - BitsPerByte ) | CONV_VMINFO_SIGN_FLAG;
-    if (dest == T_SHORT)   return (BitsPerInt - BitsPerShort) | CONV_VMINFO_SIGN_FLAG;
-    return 0;                   // case T_INT
-  }
-  // Shift values for unboxing a primitive.
-  static int adapter_unbox_subword_vminfo(BasicType dest) {
-    if (dest == T_BOOLEAN) return (BitsPerInt - BitsPerByte );  // implemented as 1 byte
-    if (dest == T_CHAR)    return (BitsPerInt - BitsPerShort);
-    if (dest == T_BYTE)    return (BitsPerInt - BitsPerByte ) | CONV_VMINFO_SIGN_FLAG;
-    if (dest == T_SHORT)   return (BitsPerInt - BitsPerShort) | CONV_VMINFO_SIGN_FLAG;
-    return 0;                   // case T_INT
-  }
-  // Here is the transformation the i2i adapter must perform:
-  static int truncate_subword_from_vminfo(jint value, int vminfo) {
-    int shift = vminfo & ~CONV_VMINFO_SIGN_FLAG;
-    jint tem = value << shift;
-    if ((vminfo & CONV_VMINFO_SIGN_FLAG) != 0) {
-      return (jint)tem >> shift;
-    } else {
-      return (juint)tem >> shift;
-    }
-  }
-
-  static inline address from_compiled_entry(EntryKind ek);
-  static inline address from_interpreted_entry(EntryKind ek);
-
-  // helpers for decode_method.
-  static methodOop    decode_methodOop(methodOop m, int& decode_flags_result);
-  static methodHandle decode_vmtarget(oop vmtarget, int vmindex, oop mtype, KlassHandle& receiver_limit_result, int& decode_flags_result);
-  static methodHandle decode_MemberName(oop mname, KlassHandle& receiver_limit_result, int& decode_flags_result);
-  static methodHandle decode_MethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result);
-  static methodHandle decode_DirectMethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result);
-  static methodHandle decode_BoundMethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result);
-  static methodHandle decode_AdapterMethodHandle(oop mh, KlassHandle& receiver_limit_result, int& decode_flags_result);
-
-  // Find out how many stack slots an mh pushes or pops.
-  // The result is *not* reported as a multiple of stack_move_unit();
-  // It is a signed net number of pushes (a difference in vmslots).
-  // To compare with a stack_move value, first multiply by stack_move_unit().
-  static int decode_MethodHandle_stack_pushes(oop mh);
-
  public:
   // working with member names
-  static void resolve_MemberName(Handle mname, TRAPS); // compute vmtarget/vmindex from name/type
+  static Handle resolve_MemberName(Handle mname, TRAPS); // compute vmtarget/vmindex from name/type
   static void expand_MemberName(Handle mname, int suppress, TRAPS);  // expand defc/name/type if missing
   static Handle new_MemberName(TRAPS);  // must be followed by init_MemberName
-  static void init_MemberName(oop mname_oop, oop target); // compute vmtarget/vmindex from target
-  static void init_MemberName(oop mname_oop, methodOop m, bool do_dispatch = true);
-  static void init_MemberName(oop mname_oop, klassOop field_holder, AccessFlags mods, int offset);
+  static oop init_MemberName(oop mname_oop, oop target_oop); // compute vmtarget/vmindex from target
+  static oop init_method_MemberName(oop mname_oop, methodOop m, bool do_dispatch,
+                                    klassOop receiver_limit);
+  static oop init_field_MemberName(oop mname_oop, klassOop field_holder,
+                                   AccessFlags mods, oop type, oop name,
+                                   intptr_t offset, bool is_setter = false);
+  static Handle init_method_MemberName(oop mname_oop, CallInfo& info, TRAPS);
+  static Handle init_field_MemberName(oop mname_oop, FieldAccessInfo& info, TRAPS);
+  static int method_ref_kind(methodOop m, bool do_dispatch_if_possible = true);
   static int find_MemberNames(klassOop k, Symbol* name, Symbol* sig,
                               int mflags, klassOop caller,
                               int skip, objArrayOop results);
@@ -559,169 +72,113 @@
   // Generate MethodHandles adapters.
   static void generate_adapters();
 
-  // Called from InterpreterGenerator and MethodHandlesAdapterGenerator.
-  static address generate_method_handle_interpreter_entry(MacroAssembler* _masm);
-  static void generate_method_handle_stub(MacroAssembler* _masm, EntryKind ek);
-
-  // argument list parsing
-  static int argument_slot(oop method_type, int arg);
-  static int argument_slot_count(oop method_type) { return argument_slot(method_type, -1); }
-  static int argument_slot_to_argnum(oop method_type, int argslot);
+  // Called from MethodHandlesAdapterGenerator.
+  static address generate_method_handle_interpreter_entry(MacroAssembler* _masm, vmIntrinsics::ID iid);
+  static void generate_method_handle_dispatch(MacroAssembler* _masm,
+                                              vmIntrinsics::ID iid,
+                                              Register receiver_reg,
+                                              Register member_reg,
+                                              bool for_compiler_entry);
 
-  // Runtime support
-  enum {                        // bit-encoded flags from decode_method or decode_vmref
-    _dmf_has_receiver   = 0x01, // target method has leading reference argument
-    _dmf_does_dispatch  = 0x02, // method handle performs virtual or interface dispatch
-    _dmf_from_interface = 0x04, // peforms interface dispatch
-    _DMF_DIRECT_MASK    = (_dmf_from_interface*2 - _dmf_has_receiver),
-    _dmf_binds_method   = 0x08,
-    _dmf_binds_argument = 0x10,
-    _DMF_BOUND_MASK     = (_dmf_binds_argument*2 - _dmf_binds_method),
-    _dmf_adapter_lsb    = 0x20,
-    _DMF_ADAPTER_MASK   = (_dmf_adapter_lsb << CONV_OP_LIMIT) - _dmf_adapter_lsb
-  };
-  static methodHandle decode_method(oop x, KlassHandle& receiver_limit_result, int& decode_flags_result);
-  enum {
-    // format of query to getConstant:
-    GC_JVM_PUSH_LIMIT = 0,
-    GC_JVM_STACK_MOVE_UNIT = 1,
-    GC_CONV_OP_IMPLEMENTED_MASK = 2,
-    GC_OP_ROT_ARGS_DOWN_LIMIT_BIAS = 3,
-    GC_COUNT_GWT = 4,
+  // Queries
+  static bool is_signature_polymorphic(vmIntrinsics::ID iid) {
+    return (iid >= vmIntrinsics::FIRST_MH_SIG_POLY &&
+            iid <= vmIntrinsics::LAST_MH_SIG_POLY);
+  }
 
-    // format of result from getTarget / encode_target:
-    ETF_HANDLE_OR_METHOD_NAME = 0, // all available data (immediate MH or method)
-    ETF_DIRECT_HANDLE         = 1, // ultimate method handle (will be a DMH, may be self)
-    ETF_METHOD_NAME           = 2, // ultimate method as MemberName
-    ETF_REFLECT_METHOD        = 3, // ultimate method as java.lang.reflect object (sans refClass)
-    ETF_FORCE_DIRECT_HANDLE   = 64,
-    ETF_COMPILE_DIRECT_HANDLE = 65,
-
-    // ad hoc constants
-    OP_ROT_ARGS_DOWN_LIMIT_BIAS = -1
-  };
-  static int get_named_constant(int which, Handle name_box, TRAPS);
-  static oop encode_target(Handle mh, int format, TRAPS); // report vmtarget (to Java code)
-  static bool class_cast_needed(klassOop src, klassOop dst);
-
-  static instanceKlassHandle resolve_instance_klass(oop    java_mirror_oop, TRAPS);
-  static instanceKlassHandle resolve_instance_klass(jclass java_mirror_jh,  TRAPS) {
-    return resolve_instance_klass(JNIHandles::resolve(java_mirror_jh), THREAD);
+  static bool is_signature_polymorphic_intrinsic(vmIntrinsics::ID iid) {
+    assert(is_signature_polymorphic(iid), "");
+    // Most sig-poly methods are intrinsics which do not require an
+    // appeal to Java for adapter code.
+    return (iid != vmIntrinsics::_invokeGeneric);
   }
 
- private:
-  // These checkers operate on a pair of whole MethodTypes:
-  static const char* check_method_type_change(oop src_mtype, int src_beg, int src_end,
-                                              int insert_argnum, oop insert_type,
-                                              int change_argnum, oop change_type,
-                                              int delete_argnum,
-                                              oop dst_mtype, int dst_beg, int dst_end,
-                                              bool raw = false);
-  static const char* check_method_type_insertion(oop src_mtype,
-                                                 int insert_argnum, oop insert_type,
-                                                 oop dst_mtype) {
-    oop no_ref = NULL;
-    return check_method_type_change(src_mtype, 0, -1,
-                                    insert_argnum, insert_type,
-                                    -1, no_ref, -1, dst_mtype, 0, -1);
+  static bool is_signature_polymorphic_static(vmIntrinsics::ID iid) {
+    assert(is_signature_polymorphic(iid), "");
+    return (iid >= vmIntrinsics::FIRST_MH_STATIC &&
+            iid <= vmIntrinsics::LAST_MH_SIG_POLY);
+  }
+
+  static bool has_member_arg(vmIntrinsics::ID iid) {
+    assert(is_signature_polymorphic(iid), "");
+    return (iid >= vmIntrinsics::_linkToVirtual &&
+            iid <= vmIntrinsics::_linkToInterface);
   }
-  static const char* check_method_type_conversion(oop src_mtype,
-                                                  int change_argnum, oop change_type,
-                                                  oop dst_mtype) {
-    oop no_ref = NULL;
-    return check_method_type_change(src_mtype, 0, -1, -1, no_ref,
-                                    change_argnum, change_type,
-                                    -1, dst_mtype, 0, -1);
+  static bool has_member_arg(Symbol* klass, Symbol* name) {
+    if ((klass == vmSymbols::java_lang_invoke_MethodHandle()) &&
+        is_signature_polymorphic_name(name)) {
+      vmIntrinsics::ID iid = signature_polymorphic_name_id(name);
+      return has_member_arg(iid);
+    }
+    return false;
   }
-  static const char* check_method_type_passthrough(oop src_mtype, oop dst_mtype, bool raw) {
-    oop no_ref = NULL;
-    return check_method_type_change(src_mtype, 0, -1,
-                                    -1, no_ref, -1, no_ref, -1,
-                                    dst_mtype, 0, -1, raw);
+
+  static Symbol* signature_polymorphic_intrinsic_name(vmIntrinsics::ID iid);
+  static int signature_polymorphic_intrinsic_ref_kind(vmIntrinsics::ID iid);
+
+  static vmIntrinsics::ID signature_polymorphic_name_id(klassOop klass, Symbol* name);
+  static vmIntrinsics::ID signature_polymorphic_name_id(Symbol* name);
+  static bool is_signature_polymorphic_name(Symbol* name) {
+    return signature_polymorphic_name_id(name) != vmIntrinsics::_none;
+  }
+  static bool is_method_handle_invoke_name(klassOop klass, Symbol* name);
+  static bool is_signature_polymorphic_name(klassOop klass, Symbol* name) {
+    return signature_polymorphic_name_id(klass, name) != vmIntrinsics::_none;
   }
 
-  // These checkers operate on pairs of argument or return types:
-  static const char* check_argument_type_change(BasicType src_type, klassOop src_klass,
-                                                BasicType dst_type, klassOop dst_klass,
-                                                int argnum, bool raw = false);
+  enum {
+    // format of query to getConstant:
+    GC_COUNT_GWT = 4,
+    GC_LAMBDA_SUPPORT = 5
+  };
+  static int get_named_constant(int which, Handle name_box, TRAPS);
+
+public:
+  static Symbol* lookup_signature(oop type_str, bool polymorphic, TRAPS);  // use TempNewSymbol
+  static Symbol* lookup_basic_type_signature(Symbol* sig, bool keep_last_arg, TRAPS);  // use TempNewSymbol
+  static Symbol* lookup_basic_type_signature(Symbol* sig, TRAPS) {
+    return lookup_basic_type_signature(sig, false, THREAD);
+  }
+  static bool is_basic_type_signature(Symbol* sig);
+
+  static Symbol* lookup_method_type(Symbol* msig, Handle mtype, TRAPS);
+
+  static void print_as_method_type_on(outputStream* st, Symbol* sig) {
+    print_as_basic_type_signature_on(st, sig, true, true);
+  }
+  static void print_as_basic_type_signature_on(outputStream* st, Symbol* sig, bool keep_arrays = false, bool keep_basic_names = false);
 
-  static const char* check_argument_type_change(oop src_type, oop dst_type,
-                                                int argnum, bool raw = false) {
-    klassOop src_klass = NULL, dst_klass = NULL;
-    BasicType src_bt = java_lang_Class::as_BasicType(src_type, &src_klass);
-    BasicType dst_bt = java_lang_Class::as_BasicType(dst_type, &dst_klass);
-    return check_argument_type_change(src_bt, src_klass,
-                                      dst_bt, dst_klass, argnum, raw);
+  // decoding CONSTANT_MethodHandle constants
+  enum { JVM_REF_MIN = JVM_REF_getField, JVM_REF_MAX = JVM_REF_invokeInterface };
+  static bool ref_kind_is_valid(int ref_kind) {
+    return (ref_kind >= JVM_REF_MIN && ref_kind <= JVM_REF_MAX);
+  }
+  static bool ref_kind_is_field(int ref_kind) {
+    assert(ref_kind_is_valid(ref_kind), "");
+    return (ref_kind <= JVM_REF_putStatic);
+  }
+  static bool ref_kind_is_getter(int ref_kind) {
+    assert(ref_kind_is_valid(ref_kind), "");
+    return (ref_kind <= JVM_REF_getStatic);
   }
-
-  static const char* check_return_type_change(oop src_type, oop dst_type, bool raw = false) {
-    return check_argument_type_change(src_type, dst_type, -1, raw);
+  static bool ref_kind_is_setter(int ref_kind) {
+    return ref_kind_is_field(ref_kind) && !ref_kind_is_getter(ref_kind);
+  }
+  static bool ref_kind_is_method(int ref_kind) {
+    return !ref_kind_is_field(ref_kind) && (ref_kind != JVM_REF_newInvokeSpecial);
   }
-
-  static const char* check_return_type_change(BasicType src_type, klassOop src_klass,
-                                              BasicType dst_type, klassOop dst_klass) {
-    return check_argument_type_change(src_type, src_klass, dst_type, dst_klass, -1);
+  static bool ref_kind_has_receiver(int ref_kind) {
+    assert(ref_kind_is_valid(ref_kind), "");
+    return (ref_kind & 1) != 0;
+  }
+  static bool ref_kind_is_static(int ref_kind) {
+    return !ref_kind_has_receiver(ref_kind) && (ref_kind != JVM_REF_newInvokeSpecial);
+  }
+  static bool ref_kind_does_dispatch(int ref_kind) {
+    return (ref_kind == JVM_REF_invokeVirtual ||
+            ref_kind == JVM_REF_invokeInterface);
   }
 
-  static const char* check_method_receiver(methodOop m, klassOop passed_recv_type);
-
-  // These verifiers can block, and will throw an error if the checking fails:
-  static void verify_vmslots(Handle mh, TRAPS);
-  static void verify_vmargslot(Handle mh, int argnum, int argslot, TRAPS);
-
-  static void verify_method_type(methodHandle m, Handle mtype,
-                                 bool has_bound_oop,
-                                 KlassHandle bound_oop_type,
-                                 TRAPS);
-
-  static void verify_method_signature(methodHandle m, Handle mtype,
-                                      int first_ptype_pos,
-                                      KlassHandle insert_ptype, TRAPS);
-
-  static void verify_DirectMethodHandle(Handle mh, methodHandle m, TRAPS);
-  static void verify_BoundMethodHandle(Handle mh, Handle target, int argnum,
-                                       bool direct_to_method, TRAPS);
-  static void verify_BoundMethodHandle_with_receiver(Handle mh, methodHandle m, TRAPS);
-  static void verify_AdapterMethodHandle(Handle mh, int argnum, TRAPS);
-
- public:
-
-  // Fill in the fields of a DirectMethodHandle mh.  (MH.type must be pre-filled.)
-  static void init_DirectMethodHandle(Handle mh, methodHandle method, bool do_dispatch, TRAPS);
-
-  // Fill in the fields of a BoundMethodHandle mh.  (MH.type, BMH.argument must be pre-filled.)
-  static void init_BoundMethodHandle(Handle mh, Handle target, int argnum, TRAPS);
-  static void init_BoundMethodHandle_with_receiver(Handle mh,
-                                                   methodHandle original_m,
-                                                   KlassHandle receiver_limit,
-                                                   int decode_flags,
-                                                   TRAPS);
-
-  // Fill in the fields of an AdapterMethodHandle mh.  (MH.type must be pre-filled.)
-  static void init_AdapterMethodHandle(Handle mh, Handle target, int argnum, TRAPS);
-  static void ensure_vmlayout_field(Handle target, TRAPS);
-
-#ifdef ASSERT
-  static bool spot_check_entry_names();
-#endif
-
- private:
-  static methodHandle dispatch_decoded_method(methodHandle m,
-                                              KlassHandle receiver_limit,
-                                              int decode_flags,
-                                              KlassHandle receiver_klass,
-                                              TRAPS);
-
-public:
-  static bool is_float_fixed_reinterpretation_cast(BasicType src, BasicType dst);
-  static bool same_basic_type_for_arguments(BasicType src, BasicType dst,
-                                            bool raw = false,
-                                            bool for_return = false);
-  static bool same_basic_type_for_returns(BasicType src, BasicType dst, bool raw = false) {
-    return same_basic_type_for_arguments(src, dst, raw, true);
-  }
-
-  static Symbol* convert_to_signature(oop type_str, bool polymorphic, TRAPS);
 
 #ifdef TARGET_ARCH_x86
 # include "methodHandles_x86.hpp"
@@ -738,62 +195,10 @@
 #ifdef TARGET_ARCH_ppc
 # include "methodHandles_ppc.hpp"
 #endif
-};
 
 
-// Access methods for the "entry" field of a java.lang.invoke.MethodHandle.
-// The field is primarily a jump target for compiled calls.
-// However, we squirrel away some nice pointers for other uses,
-// just before the jump target.
-// Aspects of a method handle entry:
-//  - from_compiled_entry - stub used when compiled code calls the MH
-//  - from_interpreted_entry - stub used when the interpreter calls the MH
-//  - type_checking_entry - stub for runtime casting between MHForm siblings (NYI)
-class MethodHandleEntry {
- public:
-  class Data {
-    friend class MethodHandleEntry;
-    size_t              _total_size; // size including Data and code stub
-    MethodHandleEntry*  _type_checking_entry;
-    address             _from_interpreted_entry;
-    MethodHandleEntry* method_entry() { return (MethodHandleEntry*)(this + 1); }
-  };
-
-  Data*     data()                              { return (Data*)this - 1; }
-
-  address   start_address()                     { return (address) data(); }
-  address   end_address()                       { return start_address() + data()->_total_size; }
-
-  address   from_compiled_entry()               { return (address) this; }
-
-  address   from_interpreted_entry()            { return data()->_from_interpreted_entry; }
-  void  set_from_interpreted_entry(address e)   { data()->_from_interpreted_entry = e; }
-
-  MethodHandleEntry* type_checking_entry()           { return data()->_type_checking_entry; }
-  void set_type_checking_entry(MethodHandleEntry* e) { data()->_type_checking_entry = e; }
-
-  void set_end_address(address end_addr) {
-    size_t total_size = end_addr - start_address();
-    assert(total_size > 0 && total_size < 0x1000, "reasonable end address");
-    data()->_total_size = total_size;
-  }
-
-  // Compiler support:
-  static int from_interpreted_entry_offset_in_bytes() {
-    return (int)( offset_of(Data, _from_interpreted_entry) - sizeof(Data) );
-  }
-  static int type_checking_entry_offset_in_bytes() {
-    return (int)( offset_of(Data, _from_interpreted_entry) - sizeof(Data) );
-  }
-
-  static address            start_compiled_entry(MacroAssembler* _masm,
-                                                 address interpreted_entry = NULL);
-  static MethodHandleEntry* finish_compiled_entry(MacroAssembler* masm, address start_addr);
 };
 
-address MethodHandles::from_compiled_entry(EntryKind ek) { return entry(ek)->from_compiled_entry(); }
-address MethodHandles::from_interpreted_entry(EntryKind ek) { return entry(ek)->from_interpreted_entry(); }
-
 
 //------------------------------------------------------------------------------
 // MethodHandlesAdapterGenerator
--- a/src/share/vm/prims/nativeLookup.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/nativeLookup.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -121,6 +121,7 @@
   void JNICALL JVM_RegisterUnsafeMethods(JNIEnv *env, jclass unsafecls);
   void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
   void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
+  void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
 }
 
 #define CC (char*)  /* cast a literal from (const char*) */
@@ -133,7 +134,8 @@
 
   { CC"Java_sun_misc_Unsafe_registerNatives",                      NULL, FN_PTR(JVM_RegisterUnsafeMethods)       },
   { CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
-  { CC"Java_sun_misc_Perf_registerNatives",                        NULL, FN_PTR(JVM_RegisterPerfMethods)         }
+  { CC"Java_sun_misc_Perf_registerNatives",                        NULL, FN_PTR(JVM_RegisterPerfMethods)         },
+  { CC"Java_sun_hotspot_WhiteBox_registerNatives",                 NULL, FN_PTR(JVM_RegisterWhiteBoxMethods)     },
 };
 
 static address lookup_special_native(char* jni_name) {
@@ -379,7 +381,10 @@
 
 address NativeLookup::lookup(methodHandle method, bool& in_base_library, TRAPS) {
   if (!method->has_native_function()) {
-    address entry = lookup_base(method, in_base_library, CHECK_NULL);
+    address entry =
+        method->intrinsic_id() == vmIntrinsics::_invokeGeneric ?
+            SharedRuntime::native_method_throw_unsupported_operation_exception_entry() :
+            lookup_base(method, in_base_library, CHECK_NULL);
     method->set_native_function(entry,
       methodOopDesc::native_bind_event_is_interesting);
     // -verbose:jni printing
--- a/src/share/vm/prims/unsafe.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -178,17 +178,6 @@
     v = *(oop*)index_oop_from_field_offset_long(p, offset);                 \
   }
 
-#define GET_OOP_FIELD_VOLATILE(obj, offset, v) \
-  oop p = JNIHandles::resolve(obj);   \
-  volatile oop v;                     \
-  if (UseCompressedOops) {            \
-    volatile narrowOop n = *(volatile narrowOop*)index_oop_from_field_offset_long(p, offset); \
-    v = oopDesc::decode_heap_oop(n);                               \
-  } else {                            \
-    v = *(volatile oop*)index_oop_from_field_offset_long(p, offset);       \
-  } \
-  OrderAccess::acquire();
-
 
 // Get/SetObject must be special-cased, since it works with handles.
 
@@ -296,28 +285,21 @@
 
 UNSAFE_ENTRY(jobject, Unsafe_GetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
   UnsafeWrapper("Unsafe_GetObjectVolatile");
-  GET_OOP_FIELD_VOLATILE(obj, offset, v)
+  oop p = JNIHandles::resolve(obj);
+  void* addr = index_oop_from_field_offset_long(p, offset);
+  volatile oop v;
+  if (UseCompressedOops) {
+    volatile narrowOop n = *(volatile narrowOop*) addr;
+    v = oopDesc::decode_heap_oop(n);
+  } else {
+    v = *(volatile oop*) addr;
+  }
+  OrderAccess::acquire();
   return JNIHandles::make_local(env, v);
 UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_SetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
   UnsafeWrapper("Unsafe_SetObjectVolatile");
-  {
-    // Catch VolatileCallSite.target stores (via
-    // CallSite.setTargetVolatile) and check call site dependencies.
-    oop p = JNIHandles::resolve(obj);
-    if ((offset == java_lang_invoke_CallSite::target_offset_in_bytes()) && p->is_a(SystemDictionary::CallSite_klass())) {
-      Handle call_site    (THREAD, p);
-      Handle method_handle(THREAD, JNIHandles::resolve(x_h));
-      assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-      assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
-      {
-        // Walk all nmethods depending on this call site.
-        MutexLocker mu(Compile_lock, thread);
-        Universe::flush_dependents_on(call_site(), method_handle());
-      }
-    }
-  }
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
   void* addr = index_oop_from_field_offset_long(p, offset);
@@ -596,7 +578,7 @@
     return 0;
   }
   sz = round_to(sz, HeapWordSize);
-  void* x = os::malloc(sz);
+  void* x = os::malloc(sz, mtInternal);
   if (x == NULL) {
     THROW_0(vmSymbols::java_lang_OutOfMemoryError());
   }
@@ -616,7 +598,7 @@
     return 0;
   }
   sz = round_to(sz, HeapWordSize);
-  void* x = (p == NULL) ? os::malloc(sz) : os::realloc(p, sz);
+  void* x = (p == NULL) ? os::malloc(sz, mtInternal) : os::realloc(p, sz, mtInternal);
   if (x == NULL) {
     THROW_0(vmSymbols::java_lang_OutOfMemoryError());
   }
@@ -779,16 +761,33 @@
   return JNIHandles::make_local(env, JNIHandles::resolve_non_null(clazz));
 UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_EnsureClassInitialized(JNIEnv *env, jobject unsafe, jobject clazz))
+UNSAFE_ENTRY(void, Unsafe_EnsureClassInitialized(JNIEnv *env, jobject unsafe, jobject clazz)) {
   UnsafeWrapper("Unsafe_EnsureClassInitialized");
   if (clazz == NULL) {
     THROW(vmSymbols::java_lang_NullPointerException());
   }
   oop mirror = JNIHandles::resolve_non_null(clazz);
-  instanceKlass* k = instanceKlass::cast(java_lang_Class::as_klassOop(mirror));
-  if (k != NULL) {
+
+  klassOop klass = java_lang_Class::as_klassOop(mirror);
+  if (klass != NULL && Klass::cast(klass)->should_be_initialized()) {
+    instanceKlass* k = instanceKlass::cast(klass);
     k->initialize(CHECK);
   }
+}
+UNSAFE_END
+
+UNSAFE_ENTRY(jboolean, Unsafe_ShouldBeInitialized(JNIEnv *env, jobject unsafe, jobject clazz)) {
+  UnsafeWrapper("Unsafe_ShouldBeInitialized");
+  if (clazz == NULL) {
+    THROW_(vmSymbols::java_lang_NullPointerException(), false);
+  }
+  oop mirror = JNIHandles::resolve_non_null(clazz);
+  klassOop klass = java_lang_Class::as_klassOop(mirror);
+  if (klass != NULL && Klass::cast(klass)->should_be_initialized()) {
+    return true;
+  }
+  return false;
+}
 UNSAFE_END
 
 static void getBaseAndScale(int& base, int& scale, jclass acls, TRAPS) {
@@ -877,7 +876,7 @@
         return 0;
     }
 
-    body = NEW_C_HEAP_ARRAY(jbyte, length);
+    body = NEW_C_HEAP_ARRAY(jbyte, length, mtInternal);
 
     if (body == 0) {
         throw_new(env, "OutOfMemoryError");
@@ -893,7 +892,7 @@
         uint len = env->GetStringUTFLength(name);
         int unicode_len = env->GetStringLength(name);
         if (len >= sizeof(buf)) {
-            utfName = NEW_C_HEAP_ARRAY(char, len + 1);
+            utfName = NEW_C_HEAP_ARRAY(char, len + 1, mtInternal);
             if (utfName == NULL) {
                 throw_new(env, "OutOfMemoryError");
                 goto free_body;
@@ -913,10 +912,10 @@
     result = JVM_DefineClass(env, utfName, loader, body, length, pd);
 
     if (utfName && utfName != buf)
-        FREE_C_HEAP_ARRAY(char, utfName);
+        FREE_C_HEAP_ARRAY(char, utfName, mtInternal);
 
  free_body:
-    FREE_C_HEAP_ARRAY(jbyte, body);
+    FREE_C_HEAP_ARRAY(jbyte, body, mtInternal);
     return result;
   }
 }
@@ -1011,7 +1010,7 @@
 
   jint length = typeArrayOop(JNIHandles::resolve_non_null(data))->length();
   jint word_length = (length + sizeof(HeapWord)-1) / sizeof(HeapWord);
-  HeapWord* body = NEW_C_HEAP_ARRAY(HeapWord, word_length);
+  HeapWord* body = NEW_C_HEAP_ARRAY(HeapWord, word_length, mtInternal);
   if (body == NULL) {
     THROW_0(vmSymbols::java_lang_OutOfMemoryError());
   }
@@ -1095,7 +1094,7 @@
 
   // try/finally clause:
   if (temp_alloc != NULL) {
-    FREE_C_HEAP_ARRAY(HeapWord, temp_alloc);
+    FREE_C_HEAP_ARRAY(HeapWord, temp_alloc, mtInternal);
   }
 
   return (jclass) res_jh;
@@ -1584,6 +1583,10 @@
     {CC"defineAnonymousClass", CC"("DAC_Args")"CLS,      FN_PTR(Unsafe_DefineAnonymousClass)},
 };
 
+JNINativeMethod lform_methods[] = {
+    {CC"shouldBeInitialized",CC"("CLS")Z",               FN_PTR(Unsafe_ShouldBeInitialized)},
+};
+
 #undef CC
 #undef FN_PTR
 
@@ -1654,6 +1657,15 @@
         env->ExceptionClear();
       }
     }
+    if (EnableInvokeDynamic) {
+      env->RegisterNatives(unsafecls, lform_methods, sizeof(lform_methods)/sizeof(JNINativeMethod));
+      if (env->ExceptionOccurred()) {
+        if (PrintMiscellaneous && (Verbose || WizardMode)) {
+          tty->print_cr("Warning:  SDK 1.7 LambdaForm support in Unsafe not found.");
+        }
+        env->ExceptionClear();
+      }
+    }
     int status = env->RegisterNatives(unsafecls, methods, sizeof(methods)/sizeof(JNINativeMethod));
     if (env->ExceptionOccurred()) {
       if (PrintMiscellaneous && (Verbose || WizardMode)) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/prims/wbtestmethods/parserTests.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "classfile/symbolTable.hpp"
+
+#include "prims/jni.h"
+#include "prims/whitebox.hpp"
+#include "prims/wbtestmethods/parserTests.hpp"
+#include "runtime/interfaceSupport.hpp"
+
+#include "memory/oopFactory.hpp"
+
+#include "services/diagnosticArgument.hpp"
+#include "services/diagnosticFramework.hpp"
+
+//There's no way of beforeahnd knowing an upper size
+//Of the length of a string representation of
+//the value of an argument.
+#define VALUE_MAXLEN 256
+
+// DiagnosticFramework test utility methods
+
+/*
+ * The DiagnosticArgumentType class contains an enum that says which type
+ * this argument represents. (JLONG, BOOLEAN etc).
+ * This method Returns a char* representation of that enum value.
+ */
+static const char* lookup_diagnosticArgumentEnum(const char* field_name, oop object) {
+  Thread* THREAD = Thread::current();
+  const char* enum_sig = "Lsun/hotspot/parser/DiagnosticCommand$DiagnosticArgumentType;";
+  TempNewSymbol enumSigSymbol = SymbolTable::lookup(enum_sig, (int) strlen(enum_sig), THREAD);
+  int offset = WhiteBox::offset_for_field(field_name, object, enumSigSymbol);
+  oop enumOop = object->obj_field(offset);
+
+  const char* ret = WhiteBox::lookup_jstring("name", enumOop);
+  return ret;
+}
+
+/*
+ * Takes an oop to a DiagnosticArgumentType-instance and
+ * reads the fields from it. Fills an native DCmdParser with
+ * this info.
+ */
+static void fill_in_parser(DCmdParser* parser, oop argument)
+{
+  const char* name = WhiteBox::lookup_jstring("name", argument);
+  const char* desc = WhiteBox::lookup_jstring("desc", argument);
+  const char* default_value = WhiteBox::lookup_jstring("defaultValue", argument);
+  bool mandatory = WhiteBox::lookup_bool("mandatory", argument);
+  const char*  type = lookup_diagnosticArgumentEnum("type", argument);
+
+   if (strcmp(type, "STRING") == 0) {
+     DCmdArgument<char*>* argument = new DCmdArgument<char*>(
+     name, desc,
+     "STRING", mandatory, default_value);
+     parser->add_dcmd_option(argument);
+   } else if (strcmp(type, "NANOTIME") == 0) {
+     DCmdArgument<NanoTimeArgument>* argument = new DCmdArgument<NanoTimeArgument>(
+     name, desc,
+     "NANOTIME", mandatory, default_value);
+     parser->add_dcmd_option(argument);
+   } else if (strcmp(type, "JLONG") == 0) {
+     DCmdArgument<jlong>* argument = new DCmdArgument<jlong>(
+     name, desc,
+     "JLONG", mandatory, default_value);
+     parser->add_dcmd_option(argument);
+   } else if (strcmp(type, "BOOLEAN") == 0) {
+     DCmdArgument<bool>* argument = new DCmdArgument<bool>(
+     name, desc,
+     "BOOLEAN", mandatory, default_value);
+     parser->add_dcmd_option(argument);
+   } else if (strcmp(type, "MEMORYSIZE") == 0) {
+     DCmdArgument<MemorySizeArgument>* argument = new DCmdArgument<MemorySizeArgument>(
+     name, desc,
+     "MEMORY SIZE", mandatory, default_value);
+     parser->add_dcmd_option(argument);
+   } else if (strcmp(type, "STRINGARRAY") == 0) {
+     DCmdArgument<StringArrayArgument*>* argument = new DCmdArgument<StringArrayArgument*>(
+     name, desc,
+     "STRING SET", mandatory);
+     parser->add_dcmd_option(argument);
+   }
+}
+
+/*
+ * Will Fill in a java object array with alternating names of parsed command line options and
+ * the value that has been parsed for it:
+ * { name, value, name, value ... }
+ * This can then be checked from java.
+ */
+WB_ENTRY(jobjectArray, WB_ParseCommandLine(JNIEnv* env, jobject o, jstring j_cmdline, jobjectArray arguments))
+  ResourceMark rm;
+  DCmdParser parser;
+
+  const char* c_cmdline = java_lang_String::as_utf8_string(JNIHandles::resolve(j_cmdline));
+  objArrayOop argumentArray = objArrayOop(JNIHandles::resolve_non_null(arguments));
+
+  int length = argumentArray->length();
+
+  for (int i = 0; i < length; i++) {
+    oop argument_oop = argumentArray->obj_at(i);
+    fill_in_parser(&parser, argument_oop);
+  }
+
+  CmdLine cmdline(c_cmdline, strlen(c_cmdline), true);
+  parser.parse(&cmdline,',',CHECK_NULL);
+
+  klassOop k = SystemDictionary::Object_klass();
+  objArrayOop returnvalue_array = oopFactory::new_objArray(k, parser.num_arguments() * 2, CHECK_NULL);
+
+  GrowableArray<const char *>*parsedArgNames = parser.argument_name_array();
+
+  for (int i = 0; i < parser.num_arguments(); i++) {
+    oop parsedName = java_lang_String::create_oop_from_str(parsedArgNames->at(i), CHECK_NULL);
+    returnvalue_array->obj_at_put(i*2, parsedName);
+    GenDCmdArgument* arg = parser.lookup_dcmd_option(parsedArgNames->at(i), strlen(parsedArgNames->at(i)));
+    char buf[VALUE_MAXLEN];
+    arg->value_as_str(buf, sizeof(buf));
+    oop parsedValue = java_lang_String::create_oop_from_str(buf, CHECK_NULL);
+    returnvalue_array->obj_at_put(i*2+1, parsedValue);
+  }
+
+  return (jobjectArray) JNIHandles::make_local(returnvalue_array);
+
+WB_END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/prims/wbtestmethods/parserTests.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef SHARE_VM_PRIMS_WBTESTMETHODS_PARSERTESTS_H
+#define SHARE_VM_PRIMS_WBTESTMETHODS_PARSERTESTS_H
+
+#include "prims/jni.h"
+#include "prims/whitebox.hpp"
+
+WB_METHOD_DECLARE WB_ParseCommandLine(JNIEnv* env, jobject o, jstring args, jobjectArray arguments);
+
+#endif //SHARE_VM_PRIMS_WBTESTMETHODS_PARSERTESTS_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/prims/whitebox.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "memory/universe.hpp"
+#include "oops/oop.inline.hpp"
+
+#include "classfile/symbolTable.hpp"
+
+#include "prims/whitebox.hpp"
+#include "prims/wbtestmethods/parserTests.hpp"
+
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+
+#ifndef SERIALGC
+#include "gc_implementation/g1/concurrentMark.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
+#endif // !SERIALGC
+
+bool WhiteBox::_used = false;
+
+WB_ENTRY(jlong, WB_GetObjectAddress(JNIEnv* env, jobject o, jobject obj))
+  return (jlong)(void*)JNIHandles::resolve(obj);
+WB_END
+
+WB_ENTRY(jint, WB_GetHeapOopSize(JNIEnv* env, jobject o))
+  return heapOopSize;
+WB_END
+
+#ifndef SERIALGC
+WB_ENTRY(jboolean, WB_G1IsHumongous(JNIEnv* env, jobject o, jobject obj))
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  oop result = JNIHandles::resolve(obj);
+  const HeapRegion* hr = g1->heap_region_containing(result);
+  return hr->isHumongous();
+WB_END
+
+WB_ENTRY(jlong, WB_G1NumFreeRegions(JNIEnv* env, jobject o))
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  size_t nr = g1->free_regions();
+  return (jlong)nr;
+WB_END
+
+WB_ENTRY(jboolean, WB_G1InConcurrentMark(JNIEnv* env, jobject o))
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  ConcurrentMark* cm = g1->concurrent_mark();
+  return cm->concurrent_marking_in_progress();
+WB_END
+
+WB_ENTRY(jint, WB_G1RegionSize(JNIEnv* env, jobject o))
+  return (jint)HeapRegion::GrainBytes;
+WB_END
+#endif // !SERIALGC
+
+//Some convenience methods to deal with objects from java
+int WhiteBox::offset_for_field(const char* field_name, oop object,
+    Symbol* signature_symbol) {
+  assert(field_name != NULL && strlen(field_name) > 0, "Field name not valid");
+  Thread* THREAD = Thread::current();
+
+  //Get the class of our object
+  klassOop arg_klass = object->klass();
+  //Turn it into an instance-klass
+  instanceKlass* ik = instanceKlass::cast(arg_klass);
+
+  //Create symbols to look for in the class
+  TempNewSymbol name_symbol = SymbolTable::lookup(field_name, (int) strlen(field_name),
+      THREAD);
+
+  //To be filled in with an offset of the field we're looking for
+  fieldDescriptor fd;
+
+  klassOop res = ik->find_field(name_symbol, signature_symbol, &fd);
+  if (res == NULL) {
+    tty->print_cr("Invalid layout of %s at %s", ik->external_name(),
+        name_symbol->as_C_string());
+    fatal("Invalid layout of preloaded class");
+  }
+
+  //fetch the field at the offset we've found
+  int dest_offset = fd.offset();
+
+  return dest_offset;
+}
+
+
+const char* WhiteBox::lookup_jstring(const char* field_name, oop object) {
+  int offset = offset_for_field(field_name, object,
+      vmSymbols::string_signature());
+  oop string = object->obj_field(offset);
+  if (string == NULL) {
+    return NULL;
+  }
+  const char* ret = java_lang_String::as_utf8_string(string);
+  return ret;
+}
+
+bool WhiteBox::lookup_bool(const char* field_name, oop object) {
+  int offset =
+      offset_for_field(field_name, object, vmSymbols::bool_signature());
+  bool ret = (object->bool_field(offset) == JNI_TRUE);
+  return ret;
+}
+
+
+#define CC (char*)
+
+static JNINativeMethod methods[] = {
+  {CC"getObjectAddress",   CC"(Ljava/lang/Object;)J", (void*)&WB_GetObjectAddress  },
+  {CC"getHeapOopSize",     CC"()I",                   (void*)&WB_GetHeapOopSize    },
+  {CC "parseCommandLine",
+      CC "(Ljava/lang/String;[Lsun/hotspot/parser/DiagnosticCommand;)[Ljava/lang/Object;",
+      (void*) &WB_ParseCommandLine
+  },
+#ifndef SERIALGC
+  {CC"g1InConcurrentMark", CC"()Z",                   (void*)&WB_G1InConcurrentMark},
+  {CC"g1IsHumongous",      CC"(Ljava/lang/Object;)Z", (void*)&WB_G1IsHumongous     },
+  {CC"g1NumFreeRegions",   CC"()J",                   (void*)&WB_G1NumFreeRegions  },
+  {CC"g1RegionSize",       CC"()I",                   (void*)&WB_G1RegionSize      },
+#endif // !SERIALGC
+};
+
+#undef CC
+
+JVM_ENTRY(void, JVM_RegisterWhiteBoxMethods(JNIEnv* env, jclass wbclass))
+  {
+    if (WhiteBoxAPI) {
+      // Make sure that wbclass is loaded by the null classloader
+      instanceKlassHandle ikh = instanceKlassHandle(JNIHandles::resolve(wbclass)->klass());
+      Handle loader(ikh->class_loader());
+      if (loader.is_null()) {
+        ThreadToNativeFromVM ttnfv(thread); // can't be in VM when we call JNI
+        jint result = env->RegisterNatives(wbclass, methods, sizeof(methods)/sizeof(methods[0]));
+        if (result == 0) {
+          WhiteBox::set_used();
+        }
+      }
+    }
+  }
+JVM_END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/prims/whitebox.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_PRIMS_WHITEBOX_HPP
+#define SHARE_VM_PRIMS_WHITEBOX_HPP
+
+#include "prims/jni.h"
+
+#include "memory/allocation.hpp"
+#include "oops/oopsHierarchy.hpp"
+
+// Entry macro to transition from JNI to VM state.
+
+#define WB_ENTRY(result_type, header) JNI_ENTRY(result_type, header)
+#define WB_END JNI_END
+#define WB_METHOD_DECLARE extern "C" jobjectArray JNICALL
+
+class WhiteBox : public AllStatic {
+ private:
+  static bool _used;
+ public:
+  static bool used()     { return _used; }
+  static void set_used() { _used = true; }
+  static int offset_for_field(const char* field_name, oop object,
+    Symbol* signature_symbol);
+  static const char* lookup_jstring(const char* field_name, oop object);
+  static bool lookup_bool(const char* field_name, oop object);
+};
+
+
+
+#endif // SHARE_VM_PRIMS_WHITEBOX_HPP
--- a/src/share/vm/runtime/arguments.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,6 +35,7 @@
 #include "runtime/globals_extension.hpp"
 #include "runtime/java.hpp"
 #include "services/management.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/taskqueue.hpp"
 #ifdef TARGET_OS_FAMILY_linux
@@ -368,7 +369,7 @@
 inline void SysClassPath::reset_item_at(int index) {
   assert(index < _scp_nitems && index != _scp_base, "just checking");
   if (_items[index] != NULL) {
-    FREE_C_HEAP_ARRAY(char, _items[index]);
+    FREE_C_HEAP_ARRAY(char, _items[index], mtInternal);
     _items[index] = NULL;
   }
 }
@@ -400,11 +401,11 @@
       expanded_path = add_jars_to_path(expanded_path, path);
       path = end;
     } else {
-      char* dirpath = NEW_C_HEAP_ARRAY(char, tmp_end - path + 1);
+      char* dirpath = NEW_C_HEAP_ARRAY(char, tmp_end - path + 1, mtInternal);
       memcpy(dirpath, path, tmp_end - path);
       dirpath[tmp_end - path] = '\0';
       expanded_path = add_jars_to_path(expanded_path, dirpath);
-      FREE_C_HEAP_ARRAY(char, dirpath);
+      FREE_C_HEAP_ARRAY(char, dirpath, mtInternal);
       path = tmp_end + 1;
     }
   }
@@ -435,7 +436,7 @@
   assert(total_len > 0, "empty sysclasspath not allowed");
 
   // Copy the _items to a single string.
-  char* cp = NEW_C_HEAP_ARRAY(char, total_len);
+  char* cp = NEW_C_HEAP_ARRAY(char, total_len, mtInternal);
   char* cp_tmp = cp;
   for (i = 0; i < _scp_nitems; ++i) {
     if (_items[i] != NULL) {
@@ -456,7 +457,7 @@
   assert(str != NULL, "just checking");
   if (path == NULL) {
     size_t len = strlen(str) + 1;
-    cp = NEW_C_HEAP_ARRAY(char, len);
+    cp = NEW_C_HEAP_ARRAY(char, len, mtInternal);
     memcpy(cp, str, len);                       // copy the trailing null
   } else {
     const char separator = *os::path_separator();
@@ -465,15 +466,15 @@
     size_t len = old_len + str_len + 2;
 
     if (prepend) {
-      cp = NEW_C_HEAP_ARRAY(char, len);
+      cp = NEW_C_HEAP_ARRAY(char, len, mtInternal);
       char* cp_tmp = cp;
       memcpy(cp_tmp, str, str_len);
       cp_tmp += str_len;
       *cp_tmp = separator;
       memcpy(++cp_tmp, path, old_len + 1);      // copy the trailing null
-      FREE_C_HEAP_ARRAY(char, path);
+      FREE_C_HEAP_ARRAY(char, path, mtInternal);
     } else {
-      cp = REALLOC_C_HEAP_ARRAY(char, path, len);
+      cp = REALLOC_C_HEAP_ARRAY(char, path, len, mtInternal);
       char* cp_tmp = cp + old_len;
       *cp_tmp = separator;
       memcpy(++cp_tmp, str, str_len + 1);       // copy the trailing null
@@ -495,7 +496,7 @@
 
   /* Scan the directory for jars/zips, appending them to path. */
   struct dirent *entry;
-  char *dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(directory));
+  char *dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(directory), mtInternal);
   while ((entry = os::readdir(dir, (dirent *) dbuf)) != NULL) {
     const char* name = entry->d_name;
     const char* ext = name + strlen(name) - 4;
@@ -503,13 +504,13 @@
       (os::file_name_strcmp(ext, ".jar") == 0 ||
        os::file_name_strcmp(ext, ".zip") == 0);
     if (isJarOrZip) {
-      char* jarpath = NEW_C_HEAP_ARRAY(char, directory_len + 2 + strlen(name));
+      char* jarpath = NEW_C_HEAP_ARRAY(char, directory_len + 2 + strlen(name), mtInternal);
       sprintf(jarpath, "%s%s%s", directory, dir_sep, name);
       path = add_to_path(path, jarpath, false);
-      FREE_C_HEAP_ARRAY(char, jarpath);
+      FREE_C_HEAP_ARRAY(char, jarpath, mtInternal);
     }
   }
-  FREE_C_HEAP_ARRAY(char, dbuf);
+  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
   os::closedir(dir);
   return path;
 }
@@ -631,7 +632,7 @@
 static bool set_string_flag(char* name, const char* value, FlagValueOrigin origin) {
   if (!CommandLineFlags::ccstrAtPut(name, &value, origin))  return false;
   // Contract:  CommandLineFlags always returns a pointer that needs freeing.
-  FREE_C_HEAP_ARRAY(char, value);
+  FREE_C_HEAP_ARRAY(char, value, mtInternal);
   return true;
 }
 
@@ -647,7 +648,7 @@
   } else if (new_len == 0) {
     value = old_value;
   } else {
-    char* buf = NEW_C_HEAP_ARRAY(char, old_len + 1 + new_len + 1);
+    char* buf = NEW_C_HEAP_ARRAY(char, old_len + 1 + new_len + 1, mtInternal);
     // each new setting adds another LINE to the switch:
     sprintf(buf, "%s\n%s", old_value, new_value);
     value = buf;
@@ -655,10 +656,10 @@
   }
   (void) CommandLineFlags::ccstrAtPut(name, &value, origin);
   // CommandLineFlags always returns a pointer that needs freeing.
-  FREE_C_HEAP_ARRAY(char, value);
+  FREE_C_HEAP_ARRAY(char, value, mtInternal);
   if (free_this_too != NULL) {
     // CommandLineFlags made its own copy, so I must delete my own temp. buffer.
-    FREE_C_HEAP_ARRAY(char, free_this_too);
+    FREE_C_HEAP_ARRAY(char, free_this_too, mtInternal);
   }
   return true;
 }
@@ -735,9 +736,9 @@
   // expand the array and add arg to the last element
   (*count)++;
   if (*bldarray == NULL) {
-    *bldarray = NEW_C_HEAP_ARRAY(char*, *count);
+    *bldarray = NEW_C_HEAP_ARRAY(char*, *count, mtInternal);
   } else {
-    *bldarray = REALLOC_C_HEAP_ARRAY(char*, *bldarray, *count);
+    *bldarray = REALLOC_C_HEAP_ARRAY(char*, *bldarray, *count, mtInternal);
   }
   (*bldarray)[index] = strdup(arg);
 }
@@ -917,13 +918,13 @@
   char* value = (char *)ns;
 
   size_t key_len = (eq == NULL) ? strlen(prop) : (eq - prop);
-  key = AllocateHeap(key_len + 1, "add_property");
+  key = AllocateHeap(key_len + 1, mtInternal);
   strncpy(key, prop, key_len);
   key[key_len] = '\0';
 
   if (eq != NULL) {
     size_t value_len = strlen(prop) - key_len - 1;
-    value = AllocateHeap(value_len + 1, "add_property");
+    value = AllocateHeap(value_len + 1, mtInternal);
     strncpy(value, &prop[key_len + 1], value_len + 1);
   }
 
@@ -1915,7 +1916,7 @@
       (ExplicitGCInvokesConcurrent ||
        ExplicitGCInvokesConcurrentAndUnloadsClasses)) {
     jio_fprintf(defaultStream::error_stream(),
-                "error: +ExplictGCInvokesConcurrent[AndUnloadsClasses] conflicts"
+                "error: +ExplicitGCInvokesConcurrent[AndUnloadsClasses] conflicts"
                 " with -UseAsyncConcMarkSweepGC");
     status = false;
   }
@@ -2058,12 +2059,25 @@
     const char* altclasses_jar = "alt-rt.jar";
     size_t altclasses_path_len = strlen(get_meta_index_dir()) + 1 +
                                  strlen(altclasses_jar);
-    char* altclasses_path = NEW_C_HEAP_ARRAY(char, altclasses_path_len);
+    char* altclasses_path = NEW_C_HEAP_ARRAY(char, altclasses_path_len, mtInternal);
     strcpy(altclasses_path, get_meta_index_dir());
     strcat(altclasses_path, altclasses_jar);
     scp.add_suffix_to_prefix(altclasses_path);
     scp_assembly_required = true;
-    FREE_C_HEAP_ARRAY(char, altclasses_path);
+    FREE_C_HEAP_ARRAY(char, altclasses_path, mtInternal);
+  }
+
+  if (WhiteBoxAPI) {
+    // Append wb.jar to bootclasspath if enabled
+    const char* wb_jar = "wb.jar";
+    size_t wb_path_len = strlen(get_meta_index_dir()) + 1 +
+                         strlen(wb_jar);
+    char* wb_path = NEW_C_HEAP_ARRAY(char, wb_path_len, mtInternal);
+    strcpy(wb_path, get_meta_index_dir());
+    strcat(wb_path, wb_jar);
+    scp.add_suffix(wb_path);
+    scp_assembly_required = true;
+    FREE_C_HEAP_ARRAY(char, wb_path, mtInternal);
   }
 
   // Parse _JAVA_OPTIONS environment variable (if present) (mimics classic VM)
@@ -2148,13 +2162,13 @@
       if (tail != NULL) {
         const char* pos = strchr(tail, ':');
         size_t len = (pos == NULL) ? strlen(tail) : pos - tail;
-        char* name = (char*)memcpy(NEW_C_HEAP_ARRAY(char, len + 1), tail, len);
+        char* name = (char*)memcpy(NEW_C_HEAP_ARRAY(char, len + 1, mtInternal), tail, len);
         name[len] = '\0';
 
         char *options = NULL;
         if(pos != NULL) {
           size_t len2 = strlen(pos+1) + 1; // options start after ':'.  Final zero must be copied.
-          options = (char*)memcpy(NEW_C_HEAP_ARRAY(char, len2), pos+1, len2);
+          options = (char*)memcpy(NEW_C_HEAP_ARRAY(char, len2, mtInternal), pos+1, len2);
         }
 #ifdef JVMTI_KERNEL
         if ((strcmp(name, "hprof") == 0) || (strcmp(name, "jdwp") == 0)) {
@@ -2169,12 +2183,12 @@
       if(tail != NULL) {
         const char* pos = strchr(tail, '=');
         size_t len = (pos == NULL) ? strlen(tail) : pos - tail;
-        char* name = strncpy(NEW_C_HEAP_ARRAY(char, len + 1), tail, len);
+        char* name = strncpy(NEW_C_HEAP_ARRAY(char, len + 1, mtInternal), tail, len);
         name[len] = '\0';
 
         char *options = NULL;
         if(pos != NULL) {
-          options = strcpy(NEW_C_HEAP_ARRAY(char, strlen(pos + 1) + 1), pos + 1);
+          options = strcpy(NEW_C_HEAP_ARRAY(char, strlen(pos + 1) + 1, mtInternal), pos + 1);
         }
 #ifdef JVMTI_KERNEL
         if ((strcmp(name, "hprof") == 0) || (strcmp(name, "jdwp") == 0)) {
@@ -2187,7 +2201,7 @@
     // -javaagent
     } else if (match_option(option, "-javaagent:", &tail)) {
       if(tail != NULL) {
-        char *options = strcpy(NEW_C_HEAP_ARRAY(char, strlen(tail) + 1), tail);
+        char *options = strcpy(NEW_C_HEAP_ARRAY(char, strlen(tail) + 1, mtInternal), tail);
         add_init_agent("instrument", options, false);
       }
     // -Xnoclassgc
@@ -2695,6 +2709,17 @@
         return JNI_EINVAL;
       }
       FLAG_SET_CMDLINE(uintx, ConcGCThreads, conc_threads);
+    } else if (match_option(option, "-XX:MaxDirectMemorySize=", &tail)) {
+      julong max_direct_memory_size = 0;
+      ArgsRange errcode = parse_memory_size(tail, &max_direct_memory_size, 0);
+      if (errcode != arg_in_range) {
+        jio_fprintf(defaultStream::error_stream(),
+                    "Invalid maximum direct memory size: %s\n",
+                    option->optionString);
+        describe_range_error(errcode);
+        return JNI_EINVAL;
+      }
+      FLAG_SET_CMDLINE(uintx, MaxDirectMemorySize, max_direct_memory_size);
     } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
       // Skip -XX:Flags= since that case has already been handled
       if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) {
@@ -2945,7 +2970,7 @@
   char *end = strrchr(jvm_path, *os::file_separator());
   if (end != NULL) *end = '\0';
   char *shared_archive_path = NEW_C_HEAP_ARRAY(char, strlen(jvm_path) +
-                                        strlen(os::file_separator()) + 20);
+      strlen(os::file_separator()) + 20, mtInternal);
   if (shared_archive_path == NULL) return JNI_ENOMEM;
   strcpy(shared_archive_path, jvm_path);
   strcat(shared_archive_path, os::file_separator());
@@ -2986,6 +3011,10 @@
       CommandLineFlags::printFlags(tty, false);
       vm_exit(0);
     }
+    if (match_option(option, "-XX:NativeMemoryTracking", &tail)) {
+      MemTracker::init_tracking_options(tail);
+    }
+
 
 #ifndef PRODUCT
     if (match_option(option, "-XX:+PrintFlagsWithComments", &tail)) {
@@ -3058,15 +3087,6 @@
   }
 #endif // PRODUCT
 
-  // Transitional
-  if (EnableMethodHandles || AnonymousClasses) {
-    if (!EnableInvokeDynamic && !FLAG_IS_DEFAULT(EnableInvokeDynamic)) {
-      warning("EnableMethodHandles and AnonymousClasses are obsolete.  Keeping EnableInvokeDynamic disabled.");
-    } else {
-      EnableInvokeDynamic = true;
-    }
-  }
-
   // JSR 292 is not supported before 1.7
   if (!JDK_Version::is_gte_jdk17x_version()) {
     if (EnableInvokeDynamic) {
@@ -3095,6 +3115,14 @@
     PrintGC = true;
   }
 
+  if (!JDK_Version::is_gte_jdk18x_version()) {
+    // To avoid changing the log format for 7 updates this flag is only
+    // true by default in JDK8 and above.
+    if (FLAG_IS_DEFAULT(PrintGCCause)) {
+      FLAG_SET_DEFAULT(PrintGCCause, false);
+    }
+  }
+
   // Set object alignment values.
   set_object_alignment();
 
@@ -3326,7 +3354,7 @@
     }
   }
   // Add one for null terminator.
-  char *props = AllocateHeap(length + 1, "get_kernel_properties");
+  char *props = AllocateHeap(length + 1, mtInternal);
   if (length != 0) {
     int pos = 0;
     for (prop = _system_properties; prop != NULL; prop = prop->next()) {
--- a/src/share/vm/runtime/arguments.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/arguments.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,7 +44,7 @@
 
 // Element describing System and User (-Dkey=value flags) defined property.
 
-class SystemProperty: public CHeapObj {
+class SystemProperty: public CHeapObj<mtInternal> {
  private:
   char*           _key;
   char*           _value;
@@ -63,7 +63,7 @@
       if (_value != NULL) {
         FreeHeap(_value);
       }
-      _value = AllocateHeap(strlen(value)+1);
+      _value = AllocateHeap(strlen(value)+1, mtInternal);
       if (_value != NULL) {
         strcpy(_value, value);
       }
@@ -80,7 +80,7 @@
       if (_value != NULL) {
         len += strlen(_value);
       }
-      sp = AllocateHeap(len+2);
+      sp = AllocateHeap(len+2, mtInternal);
       if (sp != NULL) {
         if (_value != NULL) {
           strcpy(sp, _value);
@@ -100,13 +100,13 @@
     if (key == NULL) {
       _key = NULL;
     } else {
-      _key = AllocateHeap(strlen(key)+1);
+      _key = AllocateHeap(strlen(key)+1, mtInternal);
       strcpy(_key, key);
     }
     if (value == NULL) {
       _value = NULL;
     } else {
-      _value = AllocateHeap(strlen(value)+1);
+      _value = AllocateHeap(strlen(value)+1, mtInternal);
       strcpy(_value, value);
     }
     _next = NULL;
@@ -116,7 +116,7 @@
 
 
 // For use by -agentlib, -agentpath and -Xrun
-class AgentLibrary : public CHeapObj {
+class AgentLibrary : public CHeapObj<mtInternal> {
   friend class AgentLibraryList;
  private:
   char*           _name;
@@ -136,12 +136,12 @@
 
   // Constructor
   AgentLibrary(const char* name, const char* options, bool is_absolute_path, void* os_lib) {
-    _name = AllocateHeap(strlen(name)+1);
+    _name = AllocateHeap(strlen(name)+1, mtInternal);
     strcpy(_name, name);
     if (options == NULL) {
       _options = NULL;
     } else {
-      _options = AllocateHeap(strlen(options)+1);
+      _options = AllocateHeap(strlen(options)+1, mtInternal);
       strcpy(_options, options);
     }
     _is_absolute_path = is_absolute_path;
--- a/src/share/vm/runtime/biasedLocking.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/biasedLocking.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -687,8 +687,8 @@
   // monitors in a prepass and, if they are biased, preserve their
   // mark words here. This should be a relatively small set of objects
   // especially compared to the number of objects in the heap.
-  _preserved_mark_stack = new (ResourceObj::C_HEAP) GrowableArray<markOop>(10, true);
-  _preserved_oop_stack = new (ResourceObj::C_HEAP) GrowableArray<Handle>(10, true);
+  _preserved_mark_stack = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<markOop>(10, true);
+  _preserved_oop_stack = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<Handle>(10, true);
 
   ResourceMark rm;
   Thread* cur = Thread::current();
--- a/src/share/vm/runtime/compilationPolicy.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/compilationPolicy.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -37,7 +37,7 @@
 class CompileTask;
 class CompileQueue;
 
-class CompilationPolicy : public CHeapObj {
+class CompilationPolicy : public CHeapObj<mtCompiler> {
   static CompilationPolicy* _policy;
   // Accumulated time
   static elapsedTimer       _accumulated_time;
--- a/src/share/vm/runtime/deoptimization.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/deoptimization.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -101,7 +101,7 @@
   _number_of_frames          = number_of_frames;
   _frame_sizes               = frame_sizes;
   _frame_pcs                 = frame_pcs;
-  _register_block            = NEW_C_HEAP_ARRAY(intptr_t, RegisterMap::reg_count * 2);
+  _register_block            = NEW_C_HEAP_ARRAY(intptr_t, RegisterMap::reg_count * 2, mtCompiler);
   _return_type               = return_type;
   _initial_info              = 0;
   // PD (x86 only)
@@ -114,9 +114,9 @@
 
 
 Deoptimization::UnrollBlock::~UnrollBlock() {
-  FREE_C_HEAP_ARRAY(intptr_t, _frame_sizes);
-  FREE_C_HEAP_ARRAY(intptr_t, _frame_pcs);
-  FREE_C_HEAP_ARRAY(intptr_t, _register_block);
+  FREE_C_HEAP_ARRAY(intptr_t, _frame_sizes, mtCompiler);
+  FREE_C_HEAP_ARRAY(intptr_t, _frame_pcs, mtCompiler);
+  FREE_C_HEAP_ARRAY(intptr_t, _register_block, mtCompiler);
 }
 
 
@@ -358,9 +358,9 @@
 
   // Compute the vframes' sizes.  Note that frame_sizes[] entries are ordered from outermost to innermost
   // virtual activation, which is the reverse of the elements in the vframes array.
-  intptr_t* frame_sizes = NEW_C_HEAP_ARRAY(intptr_t, number_of_frames);
+  intptr_t* frame_sizes = NEW_C_HEAP_ARRAY(intptr_t, number_of_frames, mtCompiler);
   // +1 because we always have an interpreter return address for the final slot.
-  address* frame_pcs = NEW_C_HEAP_ARRAY(address, number_of_frames + 1);
+  address* frame_pcs = NEW_C_HEAP_ARRAY(address, number_of_frames + 1, mtCompiler);
   int popframe_extra_args = 0;
   // Create an interpreter return address for the stub to use as its return
   // address so the skeletal frames are perfectly walkable
@@ -388,7 +388,7 @@
   if (deopt_sender.is_interpreted_frame()) {
     methodHandle method = deopt_sender.interpreter_frame_method();
     Bytecode_invoke cur = Bytecode_invoke_check(method, deopt_sender.interpreter_frame_bci());
-    if (cur.is_method_handle_invoke()) {
+    if (cur.is_invokedynamic() || cur.is_invokehandle()) {
       // Method handle invokes may involve fairly arbitrary chains of
       // calls so it's impossible to know how much actual space the
       // caller has for locals.
--- a/src/share/vm/runtime/deoptimization.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/deoptimization.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -129,7 +129,7 @@
 
   // UnrollBlock is returned by fetch_unroll_info() to the deoptimization handler (blob).
   // This is only a CheapObj to ease debugging after a deopt failure
-  class UnrollBlock : public CHeapObj {
+  class UnrollBlock : public CHeapObj<mtCompiler> {
    private:
     int       _size_of_deoptimized_frame; // Size, in bytes, of current deoptimized frame
     int       _caller_adjustment;         // Adjustment, in bytes, to caller's SP by initial interpreted frame
--- a/src/share/vm/runtime/dtraceJSDT.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/dtraceJSDT.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -63,7 +63,7 @@
   static jboolean is_supported();
 };
 
-class RegisteredProbes : public CHeapObj {
+class RegisteredProbes : public CHeapObj<mtInternal> {
  private:
   nmethod** _nmethods;      // all the probe methods
   size_t    _count;         // number of probe methods
@@ -72,7 +72,7 @@
  public:
   RegisteredProbes(size_t count) {
     _count = count;
-    _nmethods = NEW_C_HEAP_ARRAY(nmethod*, count);
+    _nmethods = NEW_C_HEAP_ARRAY(nmethod*, count, mtInternal);
   }
 
   ~RegisteredProbes() {
@@ -81,7 +81,7 @@
       _nmethods[i]->make_not_entrant();
       _nmethods[i]->method()->clear_code();
     }
-    FREE_C_HEAP_ARRAY(nmethod*, _nmethods);
+    FREE_C_HEAP_ARRAY(nmethod*, _nmethods, mtInternal);
     _nmethods = NULL;
     _count = 0;
   }
--- a/src/share/vm/runtime/fieldDescriptor.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/fieldDescriptor.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -28,6 +28,7 @@
 #include "memory/resourceArea.hpp"
 #include "memory/universe.inline.hpp"
 #include "oops/instanceKlass.hpp"
+#include "oops/fieldStreams.hpp"
 #include "runtime/fieldDescriptor.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/signature.hpp"
@@ -37,6 +38,24 @@
   return instanceKlass::cast(_cp->pool_holder())->class_loader();
 }
 
+Symbol* fieldDescriptor::generic_signature() const {
+  if (!has_generic_signature()) {
+    return NULL;
+  }
+
+  int idx = 0;
+  instanceKlass* ik = instanceKlass::cast(field_holder());
+  for (AllFieldStream fs(ik); !fs.done(); fs.next()) {
+    if (idx == _index) {
+      return fs.generic_signature();
+    } else {
+      idx ++;
+    }
+  }
+  assert(false, "should never happen");
+  return NULL;
+}
+
 typeArrayOop fieldDescriptor::annotations() const {
   instanceKlass* ik = instanceKlass::cast(field_holder());
   objArrayOop md = ik->fields_annotations();
--- a/src/share/vm/runtime/fieldDescriptor.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/fieldDescriptor.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -67,7 +67,7 @@
   oop loader() const;
   // Offset (in words) of field from start of instanceOop / klassOop
   int offset() const                   { return field()->offset(); }
-  Symbol* generic_signature() const    { return field()->generic_signature(_cp); }
+  Symbol* generic_signature() const;
   int index() const                    { return _index; }
   typeArrayOop annotations() const;
 
@@ -100,6 +100,7 @@
   bool is_field_access_watched() const    { return access_flags().is_field_access_watched(); }
   bool is_field_modification_watched() const
                                           { return access_flags().is_field_modification_watched(); }
+  bool has_generic_signature() const      { return access_flags().field_has_generic_signature(); }
 
   void set_is_field_access_watched(const bool value) {
     _access_flags.set_is_field_access_watched(value);
@@ -115,6 +116,7 @@
   void initialize(klassOop k, int index);
 
   // Print
+  void print() { print_on(tty); }
   void print_on(outputStream* st) const         PRODUCT_RETURN;
   void print_on_for(outputStream* st, oop obj)  PRODUCT_RETURN;
 };
--- a/src/share/vm/runtime/fprofiler.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/fprofiler.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -70,12 +70,12 @@
 ThreadProfiler::ThreadProfiler() {
   // Space for the ProfilerNodes
   const int area_size = 1 * ProfilerNodeSize * 1024;
-  area_bottom = AllocateHeap(area_size, "fprofiler");
+  area_bottom = AllocateHeap(area_size, mtInternal);
   area_top    = area_bottom;
   area_limit  = area_bottom + area_size;
 
   // ProfilerNode pointer table
-  table = NEW_C_HEAP_ARRAY(ProfilerNode*, table_size);
+  table = NEW_C_HEAP_ARRAY(ProfilerNode*, table_size, mtInternal);
   initialize();
   engaged = false;
 }
@@ -157,7 +157,7 @@
 void PCRecorder::init() {
   MutexLockerEx lm(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   int s = size();
-  counters = NEW_C_HEAP_ARRAY(int, s);
+  counters = NEW_C_HEAP_ARRAY(int, s, mtInternal);
   for (int index = 0; index < s; index++) {
     counters[index] = 0;
   }
@@ -337,11 +337,13 @@
       char c = (char) n->byte_at(i);
       st->print("%c", c);
     }
-    if( Verbose ) {
+    if (Verbose || WizardMode) {
       // Disambiguate overloaded methods
       Symbol* sig = m->signature();
       sig->print_symbol_on(st);
-    }
+    } else if (MethodHandles::is_signature_polymorphic(m->intrinsic_id()))
+      // compare with methodOopDesc::print_short_name
+      MethodHandles::print_as_basic_type_signature_on(st, m->signature(), true);
   }
 
   virtual void print(outputStream* st, int total_ticks) {
@@ -850,7 +852,7 @@
   if (Threads_lock->try_lock()) {
     {  // Threads_lock scope
       maxthreads = Threads::number_of_threads();
-      threadsList = NEW_C_HEAP_ARRAY(JavaThread *, maxthreads);
+      threadsList = NEW_C_HEAP_ARRAY(JavaThread *, maxthreads, mtInternal);
       suspendedthreadcount = 0;
       for (JavaThread* tp = Threads::first(); tp != NULL; tp = tp->next()) {
         if (tp->is_Compiler_thread()) {
@@ -1195,8 +1197,8 @@
 
 void FlatProfiler::allocate_table() {
   { // Bytecode table
-    bytecode_ticks      = NEW_C_HEAP_ARRAY(int, Bytecodes::number_of_codes);
-    bytecode_ticks_stub = NEW_C_HEAP_ARRAY(int, Bytecodes::number_of_codes);
+    bytecode_ticks      = NEW_C_HEAP_ARRAY(int, Bytecodes::number_of_codes, mtInternal);
+    bytecode_ticks_stub = NEW_C_HEAP_ARRAY(int, Bytecodes::number_of_codes, mtInternal);
     for(int index = 0; index < Bytecodes::number_of_codes; index++) {
       bytecode_ticks[index]      = 0;
       bytecode_ticks_stub[index] = 0;
@@ -1205,7 +1207,7 @@
 
   if (ProfilerRecordPC) PCRecorder::init();
 
-  interval_data         = NEW_C_HEAP_ARRAY(IntervalData, interval_print_size);
+  interval_data         = NEW_C_HEAP_ARRAY(IntervalData, interval_print_size, mtInternal);
   FlatProfiler::interval_reset();
 }
 
--- a/src/share/vm/runtime/fprofiler.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/fprofiler.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -121,7 +121,7 @@
 };
 #endif // FPROF_KERNEL
 
-class ThreadProfiler: public CHeapObj {
+class ThreadProfiler: public CHeapObj<mtInternal> {
 public:
   ThreadProfiler()    KERNEL_RETURN;
   ~ThreadProfiler()   KERNEL_RETURN;
--- a/src/share/vm/runtime/frame.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/frame.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -170,11 +170,9 @@
 }
 
 // type testers
-bool frame::is_ricochet_frame() const {
-  RicochetBlob* rcb = SharedRuntime::ricochet_blob();
-  return (_cb == rcb && rcb != NULL && rcb->returns_to_bounce_addr(_pc));
+bool frame::is_ignored_frame() const {
+  return false;  // FIXME: some LambdaForm frames should be ignored
 }
-
 bool frame::is_deoptimized_frame() const {
   assert(_deopt_state != unknown, "not answerable");
   return _deopt_state == is_deoptimized;
@@ -348,17 +346,12 @@
 frame frame::real_sender(RegisterMap* map) const {
   frame result = sender(map);
   while (result.is_runtime_frame() ||
-         result.is_ricochet_frame()) {
+         result.is_ignored_frame()) {
     result = result.sender(map);
   }
   return result;
 }
 
-frame frame::sender_for_ricochet_frame(RegisterMap* map) const {
-  assert(is_ricochet_frame(), "");
-  return MethodHandles::ricochet_frame_sender(*this, map);
-}
-
 // Note: called by profiler - NOT for current thread
 frame frame::profile_find_Java_sender_frame(JavaThread *thread) {
 // If we don't recognize this frame, walk back up the stack until we do
@@ -541,7 +534,6 @@
 const char* frame::print_name() const {
   if (is_native_frame())      return "Native";
   if (is_interpreted_frame()) return "Interpreted";
-  if (is_ricochet_frame())    return "Ricochet";
   if (is_compiled_frame()) {
     if (is_deoptimized_frame()) return "Deoptimized";
     return "Compiled";
@@ -728,8 +720,6 @@
       st->print("v  ~RuntimeStub::%s", ((RuntimeStub *)_cb)->name());
     } else if (_cb->is_deoptimization_stub()) {
       st->print("v  ~DeoptimizationBlob");
-    } else if (_cb->is_ricochet_stub()) {
-      st->print("v  ~RichochetBlob");
     } else if (_cb->is_exception_stub()) {
       st->print("v  ~ExceptionBlob");
     } else if (_cb->is_safepoint_stub()) {
@@ -993,9 +983,6 @@
 
 void frame::oops_code_blob_do(OopClosure* f, CodeBlobClosure* cf, const RegisterMap* reg_map) {
   assert(_cb != NULL, "sanity check");
-  if (_cb == SharedRuntime::ricochet_blob()) {
-    oops_ricochet_do(f, reg_map);
-  }
   if (_cb->oop_maps() != NULL) {
     OopMapSet::oops_do(this, reg_map, f);
 
@@ -1014,11 +1001,6 @@
     cf->do_code_blob(_cb);
 }
 
-void frame::oops_ricochet_do(OopClosure* f, const RegisterMap* map) {
-  assert(is_ricochet_frame(), "");
-  MethodHandles::ricochet_frame_oops_do(*this, f, map);
-}
-
 class CompiledArgumentOopFinder: public SignatureInfo {
  protected:
   OopClosure*     _f;
@@ -1087,7 +1069,7 @@
   // First consult the ADLC on where it puts parameter 0 for this signature.
   VMReg reg = SharedRuntime::name_for_receiver();
   oop r = *caller.oopmapreg_to_location(reg, reg_map);
-  assert( Universe::heap()->is_in_or_null(r), "bad receiver" );
+  assert(Universe::heap()->is_in_or_null(r), err_msg("bad receiver: " INTPTR_FORMAT " (" INTX_FORMAT ")", (intptr_t) r, (intptr_t) r));
   return r;
 }
 
@@ -1407,8 +1389,6 @@
     values.describe(-1, info_address,
                     FormatBuffer<1024>("#%d nmethod " INTPTR_FORMAT " for native method %s", frame_no,
                                        nm, nm->method()->name_and_sig_as_C_string()), 2);
-  } else if (is_ricochet_frame()) {
-      values.describe(-1, info_address, err_msg("#%d ricochet frame", frame_no), 2);
   } else {
     // provide default info if not handled before
     char *info = (char *) "special frame";
--- a/src/share/vm/runtime/frame.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/frame.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -135,7 +135,7 @@
   bool is_interpreted_frame()    const;
   bool is_java_frame()           const;
   bool is_entry_frame()          const;             // Java frame called from C?
-  bool is_ricochet_frame()       const;
+  bool is_ignored_frame()        const;
   bool is_native_frame()         const;
   bool is_runtime_frame()        const;
   bool is_compiled_frame()       const;
@@ -176,7 +176,6 @@
   // Helper methods for better factored code in frame::sender
   frame sender_for_compiled_frame(RegisterMap* map) const;
   frame sender_for_entry_frame(RegisterMap* map) const;
-  frame sender_for_ricochet_frame(RegisterMap* map) const;
   frame sender_for_interpreter_frame(RegisterMap* map) const;
   frame sender_for_native_frame(RegisterMap* map) const;
 
@@ -415,7 +414,6 @@
   // Oops-do's
   void oops_compiled_arguments_do(Symbol* signature, bool has_receiver, const RegisterMap* reg_map, OopClosure* f);
   void oops_interpreted_do(OopClosure* f, const RegisterMap* map, bool query_oop_map_cache = true);
-  void oops_ricochet_do(OopClosure* f, const RegisterMap* map);
 
  private:
   void oops_interpreted_arguments_do(Symbol* signature, bool has_receiver, OopClosure* f);
--- a/src/share/vm/runtime/globals.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/globals.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,7 +43,6 @@
 #include "shark/shark_globals.hpp"
 #endif
 
-
 RUNTIME_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \
               MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \
               MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \
@@ -55,6 +54,10 @@
                  MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \
                  MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
 
+ARCH_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PRODUCT_FLAG, \
+           MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \
+           MATERIALIZE_NOTPRODUCT_FLAG)
+
 MATERIALIZE_FLAGS_EXT
 
 
@@ -81,7 +84,8 @@
   }
 }
 
-// Get custom message for this locked flag, if any.
+// Get custom message for this locked flag, or return NULL if
+// none is available.
 void Flag::get_locked_message(char* buf, int buflen) const {
   get_locked_message_ext(buf, buflen);
 }
@@ -147,6 +151,8 @@
     st->print("-XX:%s=" UINTX_FORMAT, name, get_uintx());
   } else if (is_uint64_t()) {
     st->print("-XX:%s=" UINT64_FORMAT, name, get_uint64_t());
+  } else if (is_double()) {
+    st->print("-XX:%s=%f", name, get_double());
   } else if (is_ccstr()) {
     st->print("-XX:%s=", name);
     const char* cp = get_ccstr();
@@ -209,7 +215,6 @@
   #define C1_NOTPRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, doc, "{C1 notproduct}", DEFAULT },
 #endif
 
-
 #define C2_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{C2 product}", DEFAULT },
 #define C2_PD_PRODUCT_FLAG_STRUCT(type, name, doc)     { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{C2 pd product}", DEFAULT },
 #define C2_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{C2 diagnostic}", DEFAULT },
@@ -224,6 +229,17 @@
   #define C2_NOTPRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, doc, "{C2 notproduct}", DEFAULT },
 #endif
 
+#define ARCH_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{ARCH product}", DEFAULT },
+#define ARCH_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{ARCH diagnostic}", DEFAULT },
+#define ARCH_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{ARCH experimental}", DEFAULT },
+#ifdef PRODUCT
+  #define ARCH_DEVELOP_FLAG_STRUCT(type, name, value, doc) /* flag is constant */
+  #define ARCH_NOTPRODUCT_FLAG_STRUCT(type, name, value, doc)
+#else
+  #define ARCH_DEVELOP_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, doc, "{ARCH}", DEFAULT },
+  #define ARCH_NOTPRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, doc, "{ARCH notproduct}", DEFAULT },
+#endif
+
 #define SHARK_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{Shark product}", DEFAULT },
 #define SHARK_PD_PRODUCT_FLAG_STRUCT(type, name, doc)     { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{Shark pd product}", DEFAULT },
 #define SHARK_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, NOT_PRODUCT_ARG(doc) "{Shark diagnostic}", DEFAULT },
@@ -252,6 +268,7 @@
 #ifdef SHARK
  SHARK_FLAGS(SHARK_DEVELOP_FLAG_STRUCT, SHARK_PD_DEVELOP_FLAG_STRUCT, SHARK_PRODUCT_FLAG_STRUCT, SHARK_PD_PRODUCT_FLAG_STRUCT, SHARK_DIAGNOSTIC_FLAG_STRUCT, SHARK_NOTPRODUCT_FLAG_STRUCT)
 #endif
+ ARCH_FLAGS(ARCH_DEVELOP_FLAG_STRUCT, ARCH_PRODUCT_FLAG_STRUCT, ARCH_DIAGNOSTIC_FLAG_STRUCT, ARCH_EXPERIMENTAL_FLAG_STRUCT, ARCH_NOTPRODUCT_FLAG_STRUCT)
  FLAGTABLE_EXT
  {0, NULL, NULL}
 };
@@ -462,13 +479,13 @@
   ccstr old_value = result->get_ccstr();
   char* new_value = NULL;
   if (*value != NULL) {
-    new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1);
+    new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1, mtInternal);
     strcpy(new_value, *value);
   }
   result->set_ccstr(new_value);
   if (result->origin == DEFAULT && old_value != NULL) {
     // Prior value is NOT heap allocated, but was a literal constant.
-    char* old_value_to_free = NEW_C_HEAP_ARRAY(char, strlen(old_value)+1);
+    char* old_value_to_free = NEW_C_HEAP_ARRAY(char, strlen(old_value)+1, mtInternal);
     strcpy(old_value_to_free, old_value);
     old_value = old_value_to_free;
   }
@@ -482,12 +499,12 @@
   Flag* faddr = address_of_flag(flag);
   guarantee(faddr != NULL && faddr->is_ccstr(), "wrong flag type");
   ccstr old_value = faddr->get_ccstr();
-  char* new_value = NEW_C_HEAP_ARRAY(char, strlen(value)+1);
+  char* new_value = NEW_C_HEAP_ARRAY(char, strlen(value)+1, mtInternal);
   strcpy(new_value, value);
   faddr->set_ccstr(new_value);
   if (faddr->origin != DEFAULT && old_value != NULL) {
     // Prior value is heap allocated so free it.
-    FREE_C_HEAP_ARRAY(char, old_value);
+    FREE_C_HEAP_ARRAY(char, old_value, mtInternal);
   }
   faddr->origin = origin;
 }
@@ -508,7 +525,7 @@
   while (flagTable[length].name != NULL) length++;
 
   // Sort
-  Flag** array = NEW_C_HEAP_ARRAY(Flag*, length);
+  Flag** array = NEW_C_HEAP_ARRAY(Flag*, length, mtInternal);
   for (int index = 0; index < length; index++) {
     array[index] = &flagTable[index];
   }
@@ -522,7 +539,7 @@
     }
   }
   out->cr();
-  FREE_C_HEAP_ARRAY(Flag*, array);
+  FREE_C_HEAP_ARRAY(Flag*, array, mtInternal);
 }
 
 #ifndef PRODUCT
@@ -544,7 +561,7 @@
   while (flagTable[length].name != NULL) length++;
 
   // Sort
-  Flag** array = NEW_C_HEAP_ARRAY(Flag*, length);
+  Flag** array = NEW_C_HEAP_ARRAY(Flag*, length, mtInternal);
   for (int index = 0; index < length; index++) {
     array[index] = &flagTable[index];
   }
@@ -557,5 +574,5 @@
       array[i]->print_on(out, withComments);
     }
   }
-  FREE_C_HEAP_ARRAY(Flag*, array);
+  FREE_C_HEAP_ARRAY(Flag*, array, mtInternal);
 }
--- a/src/share/vm/runtime/globals.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/globals.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -190,7 +190,6 @@
 
 #endif // no compilers
 
-
 // string type aliases used only in this file
 typedef const char* ccstr;
 typedef const char* ccstrlist;   // represents string arguments which accumulate
@@ -531,12 +530,6 @@
   product(intx, UseSSE, 99,                                                 \
           "Highest supported SSE instructions set on x86/x64")              \
                                                                             \
-  product(intx, UseAVX, 99,                                                 \
-          "Highest supported AVX instructions set on x86/x64")              \
-                                                                            \
-  product(intx, UseVIS, 99,                                                 \
-          "Highest supported VIS instructions set on Sparc")                \
-                                                                            \
   product(uintx, LargePageSizeInBytes, 0,                                   \
           "Large page size (0 to let VM choose the page size")              \
                                                                             \
@@ -573,10 +566,6 @@
   product(bool, PrintVMQWaitTime, false,                                    \
           "Prints out the waiting time in VM operation queue")              \
                                                                             \
-  develop(bool, BailoutToInterpreterForThrows, false,                       \
-          "Compiled methods which throws/catches exceptions will be "       \
-          "deopt and intp.")                                                \
-                                                                            \
   develop(bool, NoYieldsInMicrolock, false,                                 \
           "Disable yields in microlock")                                    \
                                                                             \
@@ -619,9 +608,6 @@
           "inline Object::hashCode() native that is known to be part "      \
           "of base library DLL")                                            \
                                                                             \
-  develop(bool, InlineObjectCopy, true,                                     \
-          "inline Object.clone and Arrays.copyOf[Range] intrinsics")        \
-                                                                            \
   develop(bool, InlineNatives, true,                                        \
           "inline natives that are known to be part of base library DLL")   \
                                                                             \
@@ -631,37 +617,12 @@
   develop(bool, InlineClassNatives, true,                                   \
           "inline Class.isInstance, etc")                                   \
                                                                             \
-  develop(bool, InlineAtomicLong, true,                                     \
-          "inline sun.misc.AtomicLong")                                     \
-                                                                            \
   develop(bool, InlineThreadNatives, true,                                  \
           "inline Thread.currentThread, etc")                               \
                                                                             \
-  develop(bool, InlineReflectionGetCallerClass, true,                       \
-          "inline sun.reflect.Reflection.getCallerClass(), known to be part "\
-          "of base library DLL")                                            \
-                                                                            \
   develop(bool, InlineUnsafeOps, true,                                      \
           "inline memory ops (native methods) from sun.misc.Unsafe")        \
                                                                             \
-  develop(bool, ConvertCmpD2CmpF, true,                                     \
-          "Convert cmpD to cmpF when one input is constant in float range") \
-                                                                            \
-  develop(bool, ConvertFloat2IntClipping, true,                             \
-          "Convert float2int clipping idiom to integer clipping")           \
-                                                                            \
-  develop(bool, SpecialStringCompareTo, true,                               \
-          "special version of string compareTo")                            \
-                                                                            \
-  develop(bool, SpecialStringIndexOf, true,                                 \
-          "special version of string indexOf")                              \
-                                                                            \
-  develop(bool, SpecialStringEquals, true,                                  \
-          "special version of string equals")                               \
-                                                                            \
-  develop(bool, SpecialArraysEquals, true,                                  \
-          "special version of Arrays.equals(char[],char[])")                \
-                                                                            \
   product(bool, CriticalJNINatives, true,                                   \
           "check for critical JNI entry points")                            \
                                                                             \
@@ -671,9 +632,6 @@
   product(bool, UseSSE42Intrinsics, false,                                  \
           "SSE4.2 versions of intrinsics")                                  \
                                                                             \
-  product(bool, UseCondCardMark, false,                                     \
-          "Check for already marked card before updating card table")       \
-                                                                            \
   develop(bool, TraceCallFixup, false,                                      \
           "traces all call fixups")                                         \
                                                                             \
@@ -760,9 +718,6 @@
   develop(bool, ForceFloatExceptions, trueInDebug,                          \
           "Force exceptions on FP stack under/overflow")                    \
                                                                             \
-  develop(bool, SoftMatchFailure, trueInProduct,                            \
-          "If the DFA fails to match a node, print a message and bail out") \
-                                                                            \
   develop(bool, VerifyStackAtCalls, false,                                  \
           "Verify that the stack pointer is unchanged after calls")         \
                                                                             \
@@ -833,6 +788,9 @@
   product(bool, PrintGCApplicationStoppedTime, false,                       \
           "Print the time the application has been stopped")                \
                                                                             \
+  diagnostic(bool, VerboseVerification, false,                              \
+             "Display detailed verification details")                       \
+                                                                            \
   notproduct(uintx, ErrorHandlerTest, 0,                                    \
           "If > 0, provokes an error after VM initialization; the value"    \
           "determines which error to provoke.  See test_error_handler()"    \
@@ -899,6 +857,9 @@
   develop(bool, UseFakeTimers, false,                                       \
           "Tells whether the VM should use system time or a fake timer")    \
                                                                             \
+  product(ccstr, NativeMemoryTracking, "off",                               \
+          "Native memory tracking options")                                 \
+                                                                            \
   diagnostic(bool, LogCompilation, false,                                   \
           "Log compilation activity in detail to hotspot.log or LogFile")   \
                                                                             \
@@ -913,15 +874,6 @@
              "1: allow scavenging from the code cache; "                    \
              "2: emit as many constants as the compiler can see")           \
                                                                             \
-  diagnostic(bool, TraceOSRBreakpoint, false,                               \
-             "Trace OSR Breakpoint ")                                       \
-                                                                            \
-  diagnostic(bool, TraceCompileTriggered, false,                            \
-             "Trace compile triggered")                                     \
-                                                                            \
-  diagnostic(bool, TraceTriggers, false,                                    \
-             "Trace triggers")                                              \
-                                                                            \
   product(bool, AlwaysRestoreFPU, false,                                    \
           "Restore the FPU control word after every JNI call (expensive)")  \
                                                                             \
@@ -931,6 +883,9 @@
   diagnostic(bool, PrintAdapterHandlers, false,                             \
           "Print code generated for i2c/c2i adapters")                      \
                                                                             \
+  diagnostic(bool, VerifyAdapterCalls, trueInDebug,                         \
+          "Verify that i2c/c2i adapters are called properly")               \
+                                                                            \
   develop(bool, VerifyAdapterSharing, false,                                \
           "Verify that the code for shared adapters is the equivalent")     \
                                                                             \
@@ -1032,9 +987,6 @@
   develop(bool, UsePrivilegedStack, true,                                   \
           "Enable the security JVM functions")                              \
                                                                             \
-  develop(bool, IEEEPrecision, true,                                        \
-          "Enables IEEE precision (for INTEL only)")                        \
-                                                                            \
   develop(bool, ProtectionDomainVerification, true,                         \
           "Verifies protection domain before resolution in system "         \
           "dictionary")                                                     \
@@ -1104,8 +1056,6 @@
           "(Unsafe,Unstable) "                                              \
           " Controls emission of inline sync fast-path code")               \
                                                                             \
-  product(intx, AlwaysInflate, 0, "(Unstable) Force inflation")             \
-                                                                            \
   product(intx, MonitorBound, 0, "Bound Monitor population")                \
                                                                             \
   product(bool, MonitorInUseLists, false, "Track Monitors for Deflation")   \
@@ -1113,9 +1063,6 @@
   product(intx, Atomics, 0,                                                 \
           "(Unsafe,Unstable) Diagnostic - Controls emission of atomics")    \
                                                                             \
-  product(intx, FenceInstruction, 0,                                        \
-          "(Unsafe,Unstable) Experimental")                                 \
-                                                                            \
   product(intx, SyncFlags, 0, "(Unsafe,Unstable) Experimental Sync flags" ) \
                                                                             \
   product(intx, SyncVerbose, 0, "(Unstable)" )                              \
@@ -1145,10 +1092,6 @@
           "call thr_setconcurrency at thread create time to avoid "         \
           "LWP starvation on MP systems (For Solaris Only)")                \
                                                                             \
-  develop(bool, UpdateHotSpotCompilerFileOnError, true,                     \
-          "Should the system attempt to update the compiler file when "     \
-          "an error occurs?")                                               \
-                                                                            \
   product(bool, ReduceSignalUsage, false,                                   \
           "Reduce the use of OS signals in Java and/or the VM")             \
                                                                             \
@@ -1183,15 +1126,6 @@
           "Use alternate signals instead of SIGUSR1 & SIGUSR2 for VM "      \
           "internal signals (Solaris only)")                                \
                                                                             \
-  product(bool, UseSpinning, false,                                         \
-          "Use spinning in monitor inflation and before entry")             \
-                                                                            \
-  product(bool, PreSpinYield, false,                                        \
-          "Yield before inner spinning loop")                               \
-                                                                            \
-  product(bool, PostSpinYield, true,                                        \
-          "Yield after inner spinning loop")                                \
-                                                                            \
   product(bool, AllowJNIEnvProxy, false,                                    \
           "Allow JNIEnv proxies for jdbx")                                  \
                                                                             \
@@ -1220,39 +1154,9 @@
   product(bool, LazyBootClassLoader, true,                                  \
           "Enable/disable lazy opening of boot class path entries")         \
                                                                             \
-  diagnostic(bool, UseIncDec, true,                                         \
-          "Use INC, DEC instructions on x86")                               \
-                                                                            \
-  product(bool, UseNewLongLShift, false,                                    \
-          "Use optimized bitwise shift left")                               \
-                                                                            \
-  product(bool, UseStoreImmI16, true,                                       \
-          "Use store immediate 16-bits value instruction on x86")           \
-                                                                            \
-  product(bool, UseAddressNop, false,                                       \
-          "Use '0F 1F [addr]' NOP instructions on x86 cpus")                \
-                                                                            \
-  product(bool, UseXmmLoadAndClearUpper, true,                              \
-          "Load low part of XMM register and clear upper part")             \
-                                                                            \
-  product(bool, UseXmmRegToRegMoveAll, false,                               \
-          "Copy all XMM register bits when moving value between registers") \
-                                                                            \
-  product(bool, UseXmmI2D, false,                                           \
-          "Use SSE2 CVTDQ2PD instruction to convert Integer to Double")     \
-                                                                            \
-  product(bool, UseXmmI2F, false,                                           \
-          "Use SSE2 CVTDQ2PS instruction to convert Integer to Float")      \
-                                                                            \
   product(bool, UseXMMForArrayCopy, false,                                  \
           "Use SSE2 MOVQ instruction for Arraycopy")                        \
                                                                             \
-  product(bool, UseUnalignedLoadStores, false,                              \
-          "Use SSE2 MOVDQU instruction for Arraycopy")                      \
-                                                                            \
-  product(bool, UseCBCond, false,                                           \
-          "Use compare and branch instruction on SPARC")                    \
-                                                                            \
   product(intx, FieldsAllocationStyle, 1,                                   \
           "0 - type based with oops first, 1 - with oops last, "            \
           "2 - oops in super and sub classes are together")                 \
@@ -1382,9 +1286,6 @@
   develop(bool, TraceStartupTime, false,                                    \
           "Trace setup time")                                               \
                                                                             \
-  product(ccstr, HPILibPath, NULL,                                          \
-          "Specify alternate path to HPI library")                          \
-                                                                            \
   develop(bool, TraceProtectionDomainVerification, false,                   \
           "Trace protection domain verifcation")                            \
                                                                             \
@@ -1400,10 +1301,6 @@
   product(bool, TraceMonitorInflation, false,                               \
           "Trace monitor inflation in JVM")                                 \
                                                                             \
-  /* assembler */                                                           \
-  product(bool, Use486InstrsOnly, false,                                    \
-          "Use 80486 Compliant instruction subset")                         \
-                                                                            \
   /* gc */                                                                  \
                                                                             \
   product(bool, UseSerialGC, false,                                         \
@@ -1462,9 +1359,6 @@
   develop(uintx, ParallelOldGCSplitInterval, 3,                             \
           "How often to provoke splitting a young gen space")               \
                                                                             \
-  develop(bool, TraceRegionTasksQueuing, false,                             \
-          "Trace the queuing of the region tasks")                          \
-                                                                            \
   product(uintx, ConcGCThreads, 0,                                          \
           "Number of threads concurrent gc will use")                       \
                                                                             \
@@ -1616,10 +1510,6 @@
           "The gain in the feedback loop for on-the-fly PLAB resizing"      \
           " during a scavenge")                                             \
                                                                             \
-  product(uintx, CMSOldPLABReactivityCeiling, 10,                           \
-          "The clamping of the gain in the feedback loop for on-the-fly"    \
-          " PLAB resizing during a scavenge")                               \
-                                                                            \
   product(bool, AlwaysPreTouch, false,                                      \
           "It forces all freshly committed pages to be pre-touched.")       \
                                                                             \
@@ -1627,12 +1517,6 @@
           "The maximum size of young gen chosen by default per GC worker "  \
           "thread available")                                               \
                                                                             \
-  product(bool, GCOverheadReporting, false,                                 \
-         "Enables the GC overhead reporting facility")                      \
-                                                                            \
-  product(intx, GCOverheadReportingPeriodMS, 100,                           \
-          "Reporting period for conc GC overhead reporting, in ms ")        \
-                                                                            \
   product(bool, CMSIncrementalMode, false,                                  \
           "Whether CMS GC should operate in \"incremental\" mode")          \
                                                                             \
@@ -2012,9 +1896,6 @@
   experimental(uintx, WorkStealingSpinToYieldRatio, 10,                     \
           "Ratio of hard spins to calls to yield")                          \
                                                                             \
-  product(uintx, PreserveMarkStackSize, 1024,                               \
-          "Size for stack used in promotion failure handling")              \
-                                                                            \
   develop(uintx, ObjArrayMarkingStride, 512,                                \
           "Number of ObjArray elements to push onto the marking stack"      \
           "before pushing a continuation entry")                            \
@@ -2039,18 +1920,6 @@
   product(bool, TLABStats, true,                                            \
           "Print various TLAB related information")                         \
                                                                             \
-  product(bool, UseBlockZeroing, false,                                     \
-          "Use special cpu instructions for block zeroing")                 \
-                                                                            \
-  product(intx, BlockZeroingLowLimit, 2048,                                 \
-          "Minimum size in bytes when block zeroing will be used")          \
-                                                                            \
-  product(bool, UseBlockCopy, false,                                        \
-          "Use special cpu instructions for block copy")                    \
-                                                                            \
-  product(intx, BlockCopyLowLimit, 2048,                                    \
-          "Minimum size in bytes when block copy will be used")             \
-                                                                            \
   product(bool, PrintRevisitStats, false,                                   \
           "Print revisit (klass and MDO) stack related information")        \
                                                                             \
@@ -2243,9 +2112,6 @@
   product(intx, PrefetchFieldsAhead, -1,                                    \
           "How many fields ahead to prefetch in oop scan (<= 0 means off)") \
                                                                             \
-  develop(bool, UsePrefetchQueue, true,                                     \
-          "Use the prefetch queue during PS promotion")                     \
-                                                                            \
   diagnostic(bool, VerifyBeforeExit, trueInDebug,                           \
           "Verify system before exiting")                                   \
                                                                             \
@@ -2481,27 +2347,9 @@
   develop(bool, CITraceTypeFlow, false,                                     \
           "detailed per-bytecode tracing of ciTypeFlow analysis")           \
                                                                             \
-  develop(intx, CICloneLoopTestLimit, 100,                                  \
-          "size limit for blocks heuristically cloned in ciTypeFlow")       \
-                                                                            \
   develop(intx, OSROnlyBCI, -1,                                             \
           "OSR only at this bci.  Negative values mean exclude that bci")   \
                                                                             \
-  /* temp diagnostics */                                                    \
-                                                                            \
-  diagnostic(bool, TraceRedundantCompiles, false,                           \
-          "Have compile broker print when a request already in the queue is"\
-          " requested again")                                               \
-                                                                            \
-  diagnostic(bool, InitialCompileFast, false,                               \
-          "Initial compile at CompLevel_fast_compile")                      \
-                                                                            \
-  diagnostic(bool, InitialCompileReallyFast, false,                         \
-          "Initial compile at CompLevel_really_fast_compile (no profile)")  \
-                                                                            \
-  diagnostic(bool, FullProfileOnReInterpret, true,                          \
-          "On re-interpret unc-trap compile next at CompLevel_fast_compile")\
-                                                                            \
   /* compiler */                                                            \
                                                                             \
   product(intx, CICompilerCount, CI_COMPILER_COUNT,                         \
@@ -2515,12 +2363,6 @@
           "proper StackOverflow handling; disable only to measure cost "    \
           "of stackbanging)")                                               \
                                                                             \
-  develop(bool, Use24BitFPMode, true,                                       \
-          "Set 24-bit FPU mode on a per-compile basis ")                    \
-                                                                            \
-  develop(bool, Use24BitFP, true,                                           \
-          "use FP instructions that produce 24-bit precise results")        \
-                                                                            \
   develop(bool, UseStrictFP, true,                                          \
           "use strict fp if modifier strictfp is set")                      \
                                                                             \
@@ -2552,9 +2394,6 @@
           "print the break down of clean up tasks performed during"         \
           " safepoint")                                                     \
                                                                             \
-  develop(bool, InlineAccessors, true,                                      \
-          "inline accessor methods (get/set)")                              \
-                                                                            \
   product(bool, Inline, true,                                               \
           "enable inlining")                                                \
                                                                             \
@@ -2567,33 +2406,15 @@
   product(bool, UseTypeProfile, true,                                       \
           "Check interpreter profile for historically monomorphic calls")   \
                                                                             \
-  product(intx, TypeProfileMajorReceiverPercent, 90,                        \
-          "% of major receiver type to all profiled receivers")             \
-                                                                            \
   notproduct(bool, TimeCompiler, false,                                     \
           "time the compiler")                                              \
                                                                             \
-  notproduct(bool, TimeCompiler2, false,                                    \
-          "detailed time the compiler (requires +TimeCompiler)")            \
-                                                                            \
   diagnostic(bool, PrintInlining, false,                                    \
           "prints inlining optimizations")                                  \
                                                                             \
-  diagnostic(bool, PrintIntrinsics, false,                                  \
-          "prints attempted and successful inlining of intrinsics")         \
-                                                                            \
-  product(bool, UseCountLeadingZerosInstruction, false,                     \
-          "Use count leading zeros instruction")                            \
-                                                                            \
   product(bool, UsePopCountInstruction, false,                              \
           "Use population count instruction")                               \
                                                                             \
-  diagnostic(ccstrlist, DisableIntrinsic, "",                               \
-          "do not expand intrinsics whose (internal) names appear here")    \
-                                                                            \
-  develop(bool, StressReflectiveCode, false,                                \
-          "Use inexact types at allocations, etc., to test reflection")     \
-                                                                            \
   develop(bool, EagerInitialization, false,                                 \
           "Eagerly initialize classes if possible")                         \
                                                                             \
@@ -2603,10 +2424,6 @@
   develop(bool, PrintMethodFlushing, false,                                 \
           "print the nmethods being flushed")                               \
                                                                             \
-  notproduct(bool, LogMultipleMutexLocking, false,                          \
-          "log locking and unlocking of mutexes (only if multiple locks "   \
-          "are held)")                                                      \
-                                                                            \
   develop(bool, UseRelocIndex, false,                                       \
          "use an index to speed random access to relocations")              \
                                                                             \
@@ -2616,9 +2433,6 @@
   diagnostic(bool, DebugNonSafepoints, trueInDebug,                         \
          "Generate extra debugging info for non-safepoints in nmethods")    \
                                                                             \
-  diagnostic(bool, DebugInlinedCalls, true,                                 \
-         "If false, restricts profiled locations to the root method only")  \
-                                                                            \
   product(bool, PrintVMOptions, false,                                      \
          "Print flags that appeared on the command line")                   \
                                                                             \
@@ -2695,9 +2509,6 @@
   notproduct(bool, IgnoreLockingAssertions, false,                          \
           "disable locking assertions (for speed)")                         \
                                                                             \
-  notproduct(bool, VerifyLoopOptimizations, false,                          \
-          "verify major loop optimizations")                                \
-                                                                            \
   product(bool, RangeCheckElimination, true,                                \
           "Split loop iterations to eliminate range checks")                \
                                                                             \
@@ -2707,12 +2518,6 @@
   develop(bool, TypeProfileCasts,  true,                                    \
           "treat casts like calls for purposes of type profiling")          \
                                                                             \
-  develop(bool, MonomorphicArrayCheck, true,                                \
-          "Uncommon-trap array store checks that require full type check")  \
-                                                                            \
-  diagnostic(bool, ProfileDynamicTypes, true,                               \
-          "do extra type profiling and use it more aggressively")           \
-                                                                            \
   develop(bool, DelayCompilationDuringStartup, true,                        \
           "Delay invoking the compiler until main application class is "    \
           "loaded")                                                         \
@@ -2727,19 +2532,9 @@
   notproduct(intx, CompileTheWorldSafepointInterval, 100,                   \
           "Force a safepoint every n compiles so sweeper can keep up")      \
                                                                             \
-  develop(bool, TraceIterativeGVN, false,                                   \
-          "Print progress during Iterative Global Value Numbering")         \
-                                                                            \
   develop(bool, FillDelaySlots, true,                                       \
           "Fill delay slots (on SPARC only)")                               \
                                                                             \
-  develop(bool, VerifyIterativeGVN, false,                                  \
-          "Verify Def-Use modifications during sparse Iterative Global "    \
-          "Value Numbering")                                                \
-                                                                            \
-  notproduct(bool, TracePhaseCCP, false,                                    \
-          "Print progress during Conditional Constant Propagation")         \
-                                                                            \
   develop(bool, TimeLivenessAnalysis, false,                                \
           "Time computation of bytecode liveness analysis")                 \
                                                                             \
@@ -2752,22 +2547,9 @@
   notproduct(bool, CollectIndexSetStatistics, false,                        \
           "Collect information about IndexSets")                            \
                                                                             \
-  develop(bool, PrintDominators, false,                                     \
-          "Print out dominator trees for GVN")                              \
-                                                                            \
   develop(bool, UseLoopSafepoints, true,                                    \
           "Generate Safepoint nodes in every loop")                         \
                                                                             \
-  notproduct(bool, TraceCISCSpill, false,                                   \
-          "Trace allocators use of cisc spillable instructions")            \
-                                                                            \
-  notproduct(bool, TraceSpilling, false,                                    \
-          "Trace spilling")                                                 \
-                                                                            \
-  product(bool, SplitIfBlocks, true,                                        \
-          "Clone compares and control flow through merge points to fold "   \
-          "some branches")                                                  \
-                                                                            \
   develop(intx, FastAllocateSizeLimit, 128*K,                               \
           /* Note:  This value is zero mod 1<<13 for a cheap sparc set. */  \
           "Inline allocations larger than this in doublewords must go slow")\
@@ -2824,15 +2606,6 @@
   develop(bool, UseFastSignatureHandlers, true,                             \
           "Use fast signature handlers for native calls")                   \
                                                                             \
-  develop(bool, UseV8InstrsOnly, false,                                     \
-          "Use SPARC-V8 Compliant instruction subset")                      \
-                                                                            \
-  product(bool, UseNiagaraInstrs, false,                                    \
-          "Use Niagara-efficient instruction subset")                       \
-                                                                            \
-  develop(bool, UseCASForSwap, false,                                       \
-          "Do not use swap instructions, but only CAS (in a loop) on SPARC")\
-                                                                            \
   product(bool, UseLoopCounter, true,                                       \
           "Increment invocation counter on backward branch")                \
                                                                             \
@@ -2849,9 +2622,6 @@
   notproduct(bool, TraceOnStackReplacement, false,                          \
           "Trace on stack replacement")                                     \
                                                                             \
-  develop(bool, PoisonOSREntry, true,                                       \
-           "Detect abnormal calls to OSR code")                             \
-                                                                            \
   product_pd(bool, PreferInterpreterNativeStubs,                            \
           "Use always interpreter stubs for native methods invoked via "    \
           "interpreter")                                                    \
@@ -2894,9 +2664,6 @@
   develop(bool, TraceFrequencyInlining, false,                              \
           "Trace frequency based inlining")                                 \
                                                                             \
-  notproduct(bool, TraceTypeProfile, false,                                 \
-          "Trace type profile")                                             \
-                                                                            \
   develop_pd(bool, InlineIntrinsics,                                        \
            "Inline intrinsics that can be statically resolved")             \
                                                                             \
@@ -2984,15 +2751,6 @@
   product(intx,  AllocatePrefetchInstr, 0,                                  \
           "Prefetch instruction to prefetch ahead of allocation pointer")   \
                                                                             \
-  product(intx,  ReadPrefetchInstr, 0,                                      \
-          "Prefetch instruction to prefetch ahead")                         \
-                                                                            \
-  product(uintx,  ArraycopySrcPrefetchDistance, 0,                          \
-          "Distance to prefetch source array in arracopy")                  \
-                                                                            \
-  product(uintx,  ArraycopyDstPrefetchDistance, 0,                          \
-          "Distance to prefetch destination array in arracopy")             \
-                                                                            \
   /* deoptimization */                                                      \
   develop(bool, TraceDeoptimization, false,                                 \
           "Trace deoptimization")                                           \
@@ -3083,9 +2841,6 @@
   product(intx, MinInliningThreshold, 250,                                  \
           "min. invocation count a method needs to have to be inlined")     \
                                                                             \
-  develop(intx, AlignEntryCode, 4,                                          \
-          "aligns entry code to specified value (in bytes)")                \
-                                                                            \
   develop(intx, MethodHistogramCutoff, 100,                                 \
           "cutoff value for method invoc. histogram (+CountCalls)")         \
                                                                             \
@@ -3125,9 +2880,6 @@
           "Minimum sleep() interval (milliseconds) when "                   \
           "ConvertSleepToYield is off (used for SOLARIS)")                  \
                                                                             \
-  product(intx, EventLogLength,  2000,                                      \
-          "maximum nof events in event log")                                \
-                                                                            \
   develop(intx, ProfilerPCTickThreshold,    15,                             \
           "Number of ticks in a PC buckets to be a hotspot")                \
                                                                             \
@@ -3166,9 +2918,6 @@
   product(intx, PerBytecodeTrapLimit,  4,                                   \
           "Limit on traps (of one kind) at a particular BCI")               \
                                                                             \
-  develop(intx, FreqCountInvocations,  1,                                   \
-          "Scaling factor for branch frequencies (deprecated)")             \
-                                                                            \
   develop(intx, InlineFrequencyRatio,    20,                                \
           "Ratio of call site execution to caller method invocation")       \
                                                                             \
@@ -3182,29 +2931,12 @@
   develop(intx, InlineThrowMaxSize,   200,                                  \
           "Force inlining of throwing methods smaller than this")           \
                                                                             \
-  product(intx, AliasLevel,     3,                                          \
-          "0 for no aliasing, 1 for oop/field/static/array split, "         \
-          "2 for class split, 3 for unique instances")                      \
-                                                                            \
-  develop(bool, VerifyAliases, false,                                       \
-          "perform extra checks on the results of alias analysis")          \
-                                                                            \
   develop(intx, ProfilerNodeSize,  1024,                                    \
           "Size in K to allocate for the Profile Nodes of each thread")     \
                                                                             \
-  develop(intx, V8AtomicOperationUnderLockSpinCount,    50,                 \
-          "Number of times to spin wait on a v8 atomic operation lock")     \
-                                                                            \
-  product(intx, ReadSpinIterations,   100,                                  \
-          "Number of read attempts before a yield (spin inner loop)")       \
-                                                                            \
   product_pd(intx, PreInflateSpin,                                          \
           "Number of times to spin wait before inflation")                  \
                                                                             \
-  product(intx, PreBlockSpin,    10,                                        \
-          "Number of times to spin in an inflated lock before going to "    \
-          "an OS lock")                                                     \
-                                                                            \
   /* gc parameters */                                                       \
   product(uintx, InitialHeapSize, 0,                                        \
           "Initial heap size (in bytes); zero means OldSize + NewSize")     \
@@ -3288,9 +3020,6 @@
   diagnostic(intx, VerifyGCLevel,     0,                                    \
           "Generation level at which to start +VerifyBefore/AfterGC")       \
                                                                             \
-  develop(uintx, ExitAfterGCNum,   0,                                       \
-          "If non-zero, exit after this GC.")                               \
-                                                                            \
   product(intx, MaxTenuringThreshold,    15,                                \
           "Maximum value for tenuring threshold")                           \
                                                                             \
@@ -3464,10 +3193,6 @@
           "(non-negative value throws OOM after this many CI accesses "     \
           "in each compile)")                                               \
                                                                             \
-  develop(intx, CIFireOOMAtDelay, -1,                                       \
-          "Wait for this many CI accesses to occur in all compiles before " \
-          "beginning to throw OutOfMemoryErrors in each compile")           \
-                                                                            \
   notproduct(bool, CIObjectFactoryVerify, false,                            \
           "enable potentially expensive verification in ciObjectFactory")   \
                                                                             \
@@ -3661,9 +3386,6 @@
   product(bool, PrintTieredEvents, false,                                   \
           "Print tiered events notifications")                              \
                                                                             \
-  product(bool, StressTieredRuntime, false,                                 \
-          "Alternate client and server compiler on compile requests")       \
-                                                                            \
   product_pd(intx, OnStackReplacePercentage,                                \
           "NON_TIERED number of method invocations/branches (expressed as %"\
           "of CompileThreshold) before (re-)compiling OSR code")            \
@@ -3709,7 +3431,7 @@
                                                                             \
   /* Properties for Java libraries  */                                      \
                                                                             \
-  product(intx, MaxDirectMemorySize, -1,                                    \
+  product(uintx, MaxDirectMemorySize, 0,                                    \
           "Maximum total size of NIO direct-buffer allocations")            \
                                                                             \
   /* temporary developer defined flags  */                                  \
@@ -3810,7 +3532,7 @@
   product(uintx, SharedReadOnlySize,   10*M,                                \
           "Size of read-only space in permanent generation (in bytes)")     \
                                                                             \
-  product(uintx, SharedMiscDataSize,    NOT_LP64(4*M) LP64_ONLY(5*M),       \
+  product(uintx, SharedMiscDataSize, NOT_LP64(4*M) LP64_ONLY(5*M) NOT_PRODUCT(+1*M),       \
           "Size of the shared data area adjacent to the heap (in bytes)")   \
                                                                             \
   product(uintx, SharedMiscCodeSize,    4*M,                                \
@@ -3833,15 +3555,6 @@
           "support JSR 292 (method handles, invokedynamic, "                \
           "anonymous classes")                                              \
                                                                             \
-  product(bool, AnonymousClasses, false,                                    \
-          "support sun.misc.Unsafe.defineAnonymousClass (deprecated)")      \
-                                                                            \
-  experimental(bool, EnableMethodHandles, false,                            \
-          "support method handles (deprecated)")                            \
-                                                                            \
-  diagnostic(intx, MethodHandlePushLimit, 3,                                \
-          "number of additional stack slots a method handle may push")      \
-                                                                            \
   diagnostic(bool, PrintMethodHandleStubs, false,                           \
           "Print generated stub code for method handles")                   \
                                                                             \
@@ -3851,19 +3564,12 @@
   diagnostic(bool, VerifyMethodHandles, trueInDebug,                        \
           "perform extra checks when constructing method handles")          \
                                                                             \
-  diagnostic(bool, OptimizeMethodHandles, true,                             \
-          "when constructing method handles, try to improve them")          \
-                                                                            \
-  develop(bool, StressMethodHandleWalk, false,                              \
-          "Process all method handles with MethodHandleWalk")               \
+  diagnostic(bool, ShowHiddenFrames, false,                                 \
+          "show method handle implementation frames (usually hidden)")      \
                                                                             \
   experimental(bool, TrustFinalNonStaticFields, false,                      \
           "trust final non-static declarations for constant folding")       \
                                                                             \
-  experimental(bool, AllowInvokeGeneric, false,                             \
-          "accept MethodHandle.invoke and MethodHandle.invokeGeneric "      \
-          "as equivalent methods")                                          \
-                                                                            \
   develop(bool, TraceInvokeDynamic, false,                                  \
           "trace internal invoke dynamic operations")                       \
                                                                             \
@@ -3902,7 +3608,13 @@
   product(bool, UseVMInterruptibleIO, false,                                \
           "(Unstable, Solaris-specific) Thread interrupt before or with "   \
           "EINTR for I/O operations results in OS_INTRPT. The default value"\
-          " of this flag is true for JDK 6 and earlier")
+          " of this flag is true for JDK 6 and earlier")                    \
+                                                                            \
+  diagnostic(bool, WhiteBoxAPI, false,                                      \
+          "Enable internal testing APIs")                                   \
+                                                                            \
+  product(bool, PrintGCCause, true,                                         \
+          "Include GC cause in GC logging")
 
 /*
  *  Macros for factoring of globals
@@ -3957,6 +3669,8 @@
 
 RUNTIME_OS_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
 
+ARCH_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
+
 // Extensions
 
 #include "runtime/globals_ext.hpp"
--- a/src/share/vm/runtime/globals_extension.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/globals_extension.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,7 +66,6 @@
   #define C1_NOTPRODUCT_FLAG_MEMBER(type, name, value, doc)    FLAG_MEMBER(name),
 #endif
 
-
 #define C2_PRODUCT_FLAG_MEMBER(type, name, value, doc)         FLAG_MEMBER(name),
 #define C2_PD_PRODUCT_FLAG_MEMBER(type, name, doc)             FLAG_MEMBER(name),
 #define C2_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc)      FLAG_MEMBER(name),
@@ -81,6 +80,17 @@
   #define C2_NOTPRODUCT_FLAG_MEMBER(type, name, value, doc)    FLAG_MEMBER(name),
 #endif
 
+#define ARCH_PRODUCT_FLAG_MEMBER(type, name, value, doc)         FLAG_MEMBER(name),
+#define ARCH_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc)      FLAG_MEMBER(name),
+#define ARCH_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc)    FLAG_MEMBER(name),
+#ifdef PRODUCT
+  #define ARCH_DEVELOP_FLAG_MEMBER(type, name, value, doc)       /* flag is constant */
+  #define ARCH_NOTPRODUCT_FLAG_MEMBER(type, name, value, doc)
+#else
+  #define ARCH_DEVELOP_FLAG_MEMBER(type, name, value, doc)       FLAG_MEMBER(name),
+  #define ARCH_NOTPRODUCT_FLAG_MEMBER(type, name, value, doc)    FLAG_MEMBER(name),
+#endif
+
 typedef enum {
  RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_EXPERIMENTAL_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER, RUNTIME_LP64_PRODUCT_FLAG_MEMBER)
  RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER)
@@ -93,6 +103,7 @@
 #ifdef COMPILER2
  C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_EXPERIMENTAL_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
 #endif
+ ARCH_FLAGS(ARCH_DEVELOP_FLAG_MEMBER, ARCH_PRODUCT_FLAG_MEMBER, ARCH_DIAGNOSTIC_FLAG_MEMBER, ARCH_EXPERIMENTAL_FLAG_MEMBER, ARCH_NOTPRODUCT_FLAG_MEMBER)
  COMMANDLINEFLAG_EXT
  NUM_CommandLineFlag
 } CommandLineFlag;
@@ -134,7 +145,6 @@
 #define RUNTIME_LP64_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)    /* flag is constant */
 #endif // _LP64
 
-
 #define C2_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)         FLAG_MEMBER_WITH_TYPE(name,type),
 #define C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc)             FLAG_MEMBER_WITH_TYPE(name,type),
 #define C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)      FLAG_MEMBER_WITH_TYPE(name,type),
@@ -149,6 +159,17 @@
   #define C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)    FLAG_MEMBER_WITH_TYPE(name,type),
 #endif
 
+#define ARCH_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)         FLAG_MEMBER_WITH_TYPE(name,type),
+#define ARCH_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)      FLAG_MEMBER_WITH_TYPE(name,type),
+#define ARCH_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)      FLAG_MEMBER_WITH_TYPE(name,type),
+#ifdef PRODUCT
+  #define ARCH_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)       /* flag is constant */
+  #define ARCH_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)
+#else
+  #define ARCH_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)       FLAG_MEMBER_WITH_TYPE(name,type),
+  #define ARCH_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)    FLAG_MEMBER_WITH_TYPE(name,type),
+#endif
+
 typedef enum {
  RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
@@ -193,6 +214,11 @@
           C2_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
           C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
+ ARCH_FLAGS(ARCH_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          ARCH_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          ARCH_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          ARCH_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
+          ARCH_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
  COMMANDLINEFLAGWITHTYPE_EXT
  NUM_CommandLineFlagWithType
 } CommandLineFlagWithType;
--- a/src/share/vm/runtime/handles.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/handles.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -111,7 +111,7 @@
   _chunk = _area->_chunk;
   _hwm   = _area->_hwm;
   _max   = _area->_max;
-  NOT_PRODUCT(_size_in_bytes = _area->_size_in_bytes;)
+  _size_in_bytes = _area->_size_in_bytes;
   debug_only(_area->_handle_mark_nesting++);
   assert(_area->_handle_mark_nesting > 0, "must stack allocate HandleMarks");
   debug_only(Atomic::inc(&_nof_handlemarks);)
@@ -159,7 +159,7 @@
   area->_chunk = _chunk;
   area->_hwm = _hwm;
   area->_max = _max;
-  NOT_PRODUCT(area->set_size_in_bytes(_size_in_bytes);)
+  area->set_size_in_bytes(_size_in_bytes);
 #ifdef ASSERT
   // clear out first chunk (to detect allocation bugs)
   if (ZapVMHandleArea) {
--- a/src/share/vm/runtime/handles.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/handles.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -238,7 +238,6 @@
 
 //------------------------------------------------------------------------------------------------------------------------
 // Thread local handle area
-
 class HandleArea: public Arena {
   friend class HandleMark;
   friend class NoHandleMark;
@@ -312,7 +311,7 @@
   HandleArea *_area;            // saved handle area
   Chunk *_chunk;                // saved arena chunk
   char *_hwm, *_max;            // saved arena info
-  NOT_PRODUCT(size_t _size_in_bytes;) // size of handle area
+  size_t _size_in_bytes;        // size of handle area
   // Link to previous active HandleMark in thread
   HandleMark* _previous_handle_mark;
 
--- a/src/share/vm/runtime/handles.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/handles.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -85,7 +85,7 @@
   area->_chunk = _chunk;
   area->_hwm = _hwm;
   area->_max = _max;
-  NOT_PRODUCT(area->set_size_in_bytes(_size_in_bytes);)
+  area->set_size_in_bytes(_size_in_bytes);
   debug_only(area->_handle_mark_nesting--);
 }
 
--- a/src/share/vm/runtime/interfaceSupport.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/interfaceSupport.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -436,6 +436,7 @@
 #define VM_LEAF_BASE(result_type, header)                            \
   TRACE_CALL(result_type, header)                                    \
   debug_only(NoHandleMark __hm;)                                     \
+  os::verify_stack_alignment();                                      \
   /* begin of body */
 
 
@@ -445,6 +446,7 @@
   TRACE_CALL(result_type, header)                                    \
   HandleMarkCleaner __hm(thread);                                    \
   Thread* THREAD = thread;                                           \
+  os::verify_stack_alignment();                                      \
   /* begin of body */
 
 
@@ -454,6 +456,7 @@
   TRACE_CALL(result_type, header)                                    \
   debug_only(NoHandleMark __hm;)                                     \
   Thread* THREAD = thread;                                           \
+  os::verify_stack_alignment();                                      \
   /* begin of body */
 
 
--- a/src/share/vm/runtime/java.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/java.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -384,7 +384,7 @@
     typedef void (*__exit_proc)(void);
 }
 
-class ExitProc : public CHeapObj {
+class ExitProc : public CHeapObj<mtInternal> {
  private:
   __exit_proc _proc;
   // void (*_proc)(void);
@@ -660,6 +660,7 @@
 }
 
 JDK_Version JDK_Version::_current;
+const char* JDK_Version::_runtime_name;
 
 void JDK_Version::initialize() {
   jdk_version_info info;
--- a/src/share/vm/runtime/java.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/java.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -74,6 +74,7 @@
  private:
 
   static JDK_Version _current;
+  static const char* _runtime_name;
 
   // In this class, we promote the minor version of release to be the
   // major version for releases >= 5 in anticipation of the JDK doing the
@@ -181,6 +182,13 @@
 
   void to_string(char* buffer, size_t buflen) const;
 
+  static const char* runtime_name() {
+    return _runtime_name;
+  }
+  static void set_runtime_name(const char* name) {
+    _runtime_name = name;
+  }
+
   // Convenience methods for queries on the current major/minor version
   static bool is_jdk12x_version() {
     return current().compare_major(2) == 0;
@@ -206,6 +214,10 @@
     return current().compare_major(7) == 0;
   }
 
+  static bool is_jdk18x_version() {
+    return current().compare_major(8) == 0;
+  }
+
   static bool is_gte_jdk13x_version() {
     return current().compare_major(3) >= 0;
   }
@@ -225,6 +237,10 @@
   static bool is_gte_jdk17x_version() {
     return current().compare_major(7) >= 0;
   }
+
+  static bool is_gte_jdk18x_version() {
+    return current().compare_major(8) >= 0;
+  }
 };
 
 #endif // SHARE_VM_RUNTIME_JAVA_HPP
--- a/src/share/vm/runtime/jniHandles.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/jniHandles.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -109,7 +109,7 @@
 
 // JNI handle blocks holding local/global JNI handles
 
-class JNIHandleBlock : public CHeapObj {
+class JNIHandleBlock : public CHeapObj<mtInternal> {
   friend class VMStructs;
   friend class CppInterpreter;
 
--- a/src/share/vm/runtime/monitorChunk.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/monitorChunk.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,7 +29,7 @@
 
 MonitorChunk::MonitorChunk(int number_on_monitors) {
   _number_of_monitors = number_on_monitors;
-  _monitors           = NEW_C_HEAP_ARRAY(BasicObjectLock, number_on_monitors);
+  _monitors           = NEW_C_HEAP_ARRAY(BasicObjectLock, number_on_monitors, mtInternal);
   _next               = NULL;
 }
 
--- a/src/share/vm/runtime/monitorChunk.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/monitorChunk.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,7 +30,7 @@
 // Data structure for holding monitors for one activation during
 // deoptimization.
 
-class MonitorChunk: public CHeapObj {
+class MonitorChunk: public CHeapObj<mtInternal> {
  private:
   int              _number_of_monitors;
   BasicObjectLock* _monitors;
--- a/src/share/vm/runtime/mutex.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/mutex.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -84,7 +84,7 @@
 // The default length of monitor name is chosen to be 64 to avoid false sharing.
 static const int MONITOR_NAME_LEN = 64;
 
-class Monitor : public CHeapObj {
+class Monitor : public CHeapObj<mtInternal> {
 
  public:
   // A special lock: Is a lock where you are guaranteed not to block while you are
--- a/src/share/vm/runtime/os.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/os.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -45,6 +45,7 @@
 #include "runtime/os.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "services/attachListener.hpp"
+#include "services/memTracker.hpp"
 #include "services/threadService.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/events.hpp"
@@ -433,9 +434,9 @@
 
 // --------------------- heap allocation utilities ---------------------
 
-char *os::strdup(const char *str) {
+char *os::strdup(const char *str, MEMFLAGS flags) {
   size_t size = strlen(str);
-  char *dup_str = (char *)malloc(size + 1);
+  char *dup_str = (char *)malloc(size + 1, flags);
   if (dup_str == NULL) return NULL;
   strcpy(dup_str, str);
   return dup_str;
@@ -559,7 +560,7 @@
 }
 #endif
 
-void* os::malloc(size_t size) {
+void* os::malloc(size_t size, MEMFLAGS memflags, address caller) {
   NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1));
   NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size));
 
@@ -571,6 +572,7 @@
 
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
   u_char* ptr = (u_char*)::malloc(size + space_before + space_after);
+
 #ifdef ASSERT
   if (ptr == NULL) return NULL;
   if (MallocCushion) {
@@ -589,18 +591,29 @@
   }
   debug_only(if (paranoid) verify_block(memblock));
   if (PrintMalloc && tty != NULL) tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock);
+
+  // we do not track MallocCushion memory
+  if (MemTracker::is_on()) {
+    MemTracker::record_malloc((address)memblock, size, memflags, caller == 0 ? CALLER_PC : caller);
+  }
+
   return memblock;
 }
 
 
-void* os::realloc(void *memblock, size_t size) {
+void* os::realloc(void *memblock, size_t size, MEMFLAGS memflags, address caller) {
 #ifndef ASSERT
   NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1));
   NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size));
-  return ::realloc(memblock, size);
+  void* ptr = ::realloc(memblock, size);
+  if (ptr != NULL && MemTracker::is_on()) {
+    MemTracker::record_realloc((address)memblock, (address)ptr, size, memflags,
+     caller == 0 ? CALLER_PC : caller);
+  }
+  return ptr;
 #else
   if (memblock == NULL) {
-    return malloc(size);
+    return malloc(size, memflags, (caller == 0 ? CALLER_PC : caller));
   }
   if ((intptr_t)memblock == (intptr_t)MallocCatchPtr) {
     tty->print_cr("os::realloc caught " PTR_FORMAT, memblock);
@@ -610,7 +623,7 @@
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
   if (size == 0) return NULL;
   // always move the block
-  void* ptr = malloc(size);
+  void* ptr = malloc(size, memflags, caller == 0 ? CALLER_PC : caller);
   if (PrintMalloc) tty->print_cr("os::remalloc " SIZE_FORMAT " bytes, " PTR_FORMAT " --> " PTR_FORMAT, size, memblock, ptr);
   // Copy to new memory if malloc didn't fail
   if ( ptr != NULL ) {
@@ -627,7 +640,7 @@
 }
 
 
-void  os::free(void *memblock) {
+void  os::free(void *memblock, MEMFLAGS memflags) {
   NOT_PRODUCT(inc_stat_counter(&num_frees, 1));
 #ifdef ASSERT
   if (memblock == NULL) return;
@@ -660,6 +673,8 @@
     fprintf(stderr, "os::free " PTR_FORMAT "\n", (uintptr_t)memblock);
   }
 #endif
+  MemTracker::record_free((address)memblock, memflags);
+
   ::free((char*)memblock - space_before);
 }
 
@@ -807,7 +822,7 @@
       // the interpreter is generated into a buffer blob
       InterpreterCodelet* i = Interpreter::codelet_containing(addr);
       if (i != NULL) {
-        st->print_cr(INTPTR_FORMAT " is an Interpreter codelet", addr);
+        st->print_cr(INTPTR_FORMAT " is at code_begin+%d in an Interpreter codelet", addr, (int)(addr - i->code_begin()));
         i->print_on(st);
         return;
       }
@@ -818,14 +833,15 @@
       }
       //
       if (AdapterHandlerLibrary::contains(b)) {
-        st->print_cr(INTPTR_FORMAT " is an AdapterHandler", addr);
+        st->print_cr(INTPTR_FORMAT " is at code_begin+%d in an AdapterHandler", addr, (int)(addr - b->code_begin()));
         AdapterHandlerLibrary::print_handler_on(st, b);
       }
       // the stubroutines are generated into a buffer blob
       StubCodeDesc* d = StubCodeDesc::desc_for(addr);
       if (d != NULL) {
+        st->print_cr(INTPTR_FORMAT " is at begin+%d in a stub", addr, (int)(addr - d->begin()));
         d->print_on(st);
-        if (verbose) st->cr();
+        st->cr();
         return;
       }
       if (StubRoutines::contains(addr)) {
@@ -840,26 +856,25 @@
       }
       VtableStub* v = VtableStubs::stub_containing(addr);
       if (v != NULL) {
+        st->print_cr(INTPTR_FORMAT " is at entry_point+%d in a vtable stub", addr, (int)(addr - v->entry_point()));
         v->print_on(st);
+        st->cr();
         return;
       }
     }
-    if (verbose && b->is_nmethod()) {
+    nmethod* nm = b->as_nmethod_or_null();
+    if (nm != NULL) {
       ResourceMark rm;
-      st->print("%#p: Compiled ", addr);
-      ((nmethod*)b)->method()->print_value_on(st);
-      st->print("  = (CodeBlob*)" INTPTR_FORMAT, b);
-      st->cr();
+      st->print(INTPTR_FORMAT " is at entry_point+%d in (nmethod*)" INTPTR_FORMAT,
+                addr, (int)(addr - nm->entry_point()), nm);
+      if (verbose) {
+        st->print(" for ");
+        nm->method()->print_value_on(st);
+      }
+      nm->print_nmethod(verbose);
       return;
     }
-    st->print(INTPTR_FORMAT " ", b);
-    if ( b->is_nmethod()) {
-      if (b->is_zombie()) {
-        st->print_cr("is zombie nmethod");
-      } else if (b->is_not_entrant()) {
-        st->print_cr("is non-entrant nmethod");
-      }
-    }
+    st->print_cr(INTPTR_FORMAT " is at code_begin+%d in ", addr, (int)(addr - b->code_begin()));
     b->print_on(st);
     return;
   }
@@ -1048,7 +1063,7 @@
         ++formatted_path_len;
     }
 
-    char* formatted_path = NEW_C_HEAP_ARRAY(char, formatted_path_len + 1);
+    char* formatted_path = NEW_C_HEAP_ARRAY(char, formatted_path_len + 1, mtInternal);
     if (formatted_path == NULL) {
         return NULL;
     }
@@ -1127,7 +1142,7 @@
     return NULL;
   }
   const char psepchar = *os::path_separator();
-  char* inpath = (char*)NEW_C_HEAP_ARRAY(char, strlen(path) + 1);
+  char* inpath = (char*)NEW_C_HEAP_ARRAY(char, strlen(path) + 1, mtInternal);
   if (inpath == NULL) {
     return NULL;
   }
@@ -1140,7 +1155,7 @@
     p++;
     p = strchr(p, psepchar);
   }
-  char** opath = (char**) NEW_C_HEAP_ARRAY(char*, count);
+  char** opath = (char**) NEW_C_HEAP_ARRAY(char*, count, mtInternal);
   if (opath == NULL) {
     return NULL;
   }
@@ -1153,7 +1168,7 @@
       return NULL;
     }
     // allocate the string and add terminator storage
-    char* s  = (char*)NEW_C_HEAP_ARRAY(char, len + 1);
+    char* s  = (char*)NEW_C_HEAP_ARRAY(char, len + 1, mtInternal);
     if (s == NULL) {
       return NULL;
     }
@@ -1162,7 +1177,7 @@
     opath[i] = s;
     p += len + 1;
   }
-  FREE_C_HEAP_ARRAY(char, inpath);
+  FREE_C_HEAP_ARRAY(char, inpath, mtInternal);
   *n = count;
   return opath;
 }
@@ -1366,3 +1381,97 @@
 
   return (int) i;
 }
+
+bool os::create_stack_guard_pages(char* addr, size_t bytes) {
+  return os::pd_create_stack_guard_pages(addr, bytes);
+}
+
+
+char* os::reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
+  char* result = pd_reserve_memory(bytes, addr, alignment_hint);
+  if (result != NULL && MemTracker::is_on()) {
+    MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
+  }
+
+  return result;
+}
+char* os::attempt_reserve_memory_at(size_t bytes, char* addr) {
+  char* result = pd_attempt_reserve_memory_at(bytes, addr);
+  if (result != NULL && MemTracker::is_on()) {
+    MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
+  }
+  return result;
+}
+
+void os::split_reserved_memory(char *base, size_t size,
+                                 size_t split, bool realloc) {
+  pd_split_reserved_memory(base, size, split, realloc);
+}
+
+bool os::commit_memory(char* addr, size_t bytes, bool executable) {
+  bool res = pd_commit_memory(addr, bytes, executable);
+  if (res && MemTracker::is_on()) {
+    MemTracker::record_virtual_memory_commit((address)addr, bytes, CALLER_PC);
+  }
+  return res;
+}
+
+bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+                              bool executable) {
+  bool res = os::pd_commit_memory(addr, size, alignment_hint, executable);
+  if (res && MemTracker::is_on()) {
+    MemTracker::record_virtual_memory_commit((address)addr, size, CALLER_PC);
+  }
+  return res;
+}
+
+bool os::uncommit_memory(char* addr, size_t bytes) {
+  bool res = pd_uncommit_memory(addr, bytes);
+  if (res) {
+    MemTracker::record_virtual_memory_uncommit((address)addr, bytes);
+  }
+  return res;
+}
+
+bool os::release_memory(char* addr, size_t bytes) {
+  bool res = pd_release_memory(addr, bytes);
+  if (res) {
+    MemTracker::record_virtual_memory_release((address)addr, bytes);
+  }
+  return res;
+}
+
+
+char* os::map_memory(int fd, const char* file_name, size_t file_offset,
+                           char *addr, size_t bytes, bool read_only,
+                           bool allow_exec) {
+  char* result = pd_map_memory(fd, file_name, file_offset, addr, bytes, read_only, allow_exec);
+  if (result != NULL && MemTracker::is_on()) {
+    MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
+  }
+  return result;
+}
+
+char* os::remap_memory(int fd, const char* file_name, size_t file_offset,
+                             char *addr, size_t bytes, bool read_only,
+                             bool allow_exec) {
+  return pd_remap_memory(fd, file_name, file_offset, addr, bytes,
+                    read_only, allow_exec);
+}
+
+bool os::unmap_memory(char *addr, size_t bytes) {
+  bool result = pd_unmap_memory(addr, bytes);
+  if (result) {
+    MemTracker::record_virtual_memory_release((address)addr, bytes);
+  }
+  return result;
+}
+
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  pd_free_memory(addr, bytes, alignment_hint);
+}
+
+void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  pd_realign_memory(addr, bytes, alignment_hint);
+}
+
--- a/src/share/vm/runtime/os.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/os.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -99,6 +99,28 @@
     _page_sizes[1] = 0; // sentinel
   }
 
+  static char*  pd_reserve_memory(size_t bytes, char* addr = 0,
+                               size_t alignment_hint = 0);
+  static char*  pd_attempt_reserve_memory_at(size_t bytes, char* addr);
+  static void   pd_split_reserved_memory(char *base, size_t size,
+                                      size_t split, bool realloc);
+  static bool   pd_commit_memory(char* addr, size_t bytes, bool executable = false);
+  static bool   pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
+                              bool executable = false);
+  static bool   pd_uncommit_memory(char* addr, size_t bytes);
+  static bool   pd_release_memory(char* addr, size_t bytes);
+
+  static char*  pd_map_memory(int fd, const char* file_name, size_t file_offset,
+                           char *addr, size_t bytes, bool read_only = false,
+                           bool allow_exec = false);
+  static char*  pd_remap_memory(int fd, const char* file_name, size_t file_offset,
+                             char *addr, size_t bytes, bool read_only,
+                             bool allow_exec);
+  static bool   pd_unmap_memory(char *addr, size_t bytes);
+  static void   pd_free_memory(char *addr, size_t bytes, size_t alignment_hint);
+  static void   pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint);
+
+
  public:
   static void init(void);                      // Called before command line parsing
   static jint init_2(void);                    // Called after command line parsing
@@ -236,8 +258,7 @@
   static char*  attempt_reserve_memory_at(size_t bytes, char* addr);
   static void   split_reserved_memory(char *base, size_t size,
                                       size_t split, bool realloc);
-  static bool   commit_memory(char* addr, size_t bytes,
-                              bool executable = false);
+  static bool   commit_memory(char* addr, size_t bytes, bool executable = false);
   static bool   commit_memory(char* addr, size_t size, size_t alignment_hint,
                               bool executable = false);
   static bool   uncommit_memory(char* addr, size_t bytes);
@@ -250,6 +271,7 @@
   static bool   guard_memory(char* addr, size_t bytes);
   static bool   unguard_memory(char* addr, size_t bytes);
   static bool   create_stack_guard_pages(char* addr, size_t bytes);
+  static bool   pd_create_stack_guard_pages(char* addr, size_t bytes);
   static bool   remove_stack_guard_pages(char* addr, size_t bytes);
 
   static char*  map_memory(int fd, const char* file_name, size_t file_offset,
@@ -404,6 +426,8 @@
   static address current_stack_base();
   static size_t current_stack_size();
 
+  static void verify_stack_alignment() PRODUCT_RETURN;
+
   static int message_box(const char* title, const char* message);
   static char* do_you_want_to_debug(const char* message);
 
@@ -571,12 +595,15 @@
   static void* thread_local_storage_at(int index);
   static void  free_thread_local_storage(int index);
 
+  // Stack walk
+  static address get_caller_pc(int n = 0);
+
   // General allocation (must be MT-safe)
-  static void* malloc  (size_t size);
-  static void* realloc (void *memblock, size_t size);
-  static void  free    (void *memblock);
+  static void* malloc  (size_t size, MEMFLAGS flags, address caller_pc = 0);
+  static void* realloc (void *memblock, size_t size, MEMFLAGS flags, address caller_pc = 0);
+  static void  free    (void *memblock, MEMFLAGS flags = mtNone);
   static bool  check_heap(bool force = false);      // verify C heap integrity
-  static char* strdup(const char *);  // Like strdup
+  static char* strdup(const char *, MEMFLAGS flags = mtInternal);  // Like strdup
 
 #ifndef PRODUCT
   static julong num_mallocs;         // # of calls to malloc/realloc
@@ -638,6 +665,10 @@
   // On Windows this will create an actual minidump, on Linux/Solaris it will simply check core dump limits
   static void check_or_create_dump(void* exceptionRecord, void* contextRecord, char* buffer, size_t bufferSize);
 
+  // Get the default path to the core file
+  // Returns the length of the string
+  static int get_core_path(char* buffer, size_t bufferSize);
+
   // JVMTI & JVM monitoring and management support
   // The thread_cpu_time() and current_thread_cpu_time() are only
   // supported if is_thread_cpu_time_supported() returns true.
--- a/src/share/vm/runtime/osThread.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/osThread.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,10 +58,9 @@
 // the main thread into its own Thread at will.
 
 
-class OSThread: public CHeapObj {
+class OSThread: public CHeapObj<mtThread> {
   friend class VMStructs;
  private:
-  //void* _start_proc;            // Thread start routine
   OSThreadStartFunc _start_proc;  // Thread start routine
   void* _start_parm;              // Thread start routine parameter
   volatile ThreadState _state;    // Thread state *hint*
@@ -77,10 +76,7 @@
   void set_state(ThreadState state)                { _state = state; }
   ThreadState get_state()                          { return _state; }
 
-  // Constructor
   OSThread(OSThreadStartFunc start_proc, void* start_parm);
-
-  // Destructor
   ~OSThread();
 
   // Accessors
@@ -113,6 +109,19 @@
 # include "osThread_bsd.hpp"
 #endif
 
+ public:
+  static ByteSize thread_id_offset()              { return byte_offset_of(OSThread, _thread_id); }
+  static size_t thread_id_size()                  { return sizeof(thread_id_t); }
+
+  thread_id_t thread_id() const                   { return _thread_id; }
+
+  void set_thread_id(thread_id_t id)              { _thread_id = id; }
+
+ private:
+  // _thread_id is kernel thread id (similar to LWP id on Solaris). Each
+  // thread has a unique thread_id (BsdThreads or NPTL). It can be used
+  // to access /proc.
+  thread_id_t _thread_id;
 };
 
 
--- a/src/share/vm/runtime/park.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/park.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -141,7 +141,7 @@
 // although Niagara's hash function should help.
 
 void * ParkEvent::operator new (size_t sz) {
-  return (void *) ((intptr_t (CHeapObj::operator new (sz + 256)) + 256) & -256) ;
+  return (void *) ((intptr_t (AllocateHeap(sz + 256, mtInternal, CALLER_PC)) + 256) & -256) ;
 }
 
 void ParkEvent::operator delete (void * a) {
--- a/src/share/vm/runtime/perfData.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/perfData.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -81,7 +81,7 @@
 
   const char* prefix = PerfDataManager::ns_to_string(ns);
 
-  _name = NEW_C_HEAP_ARRAY(char, strlen(name) + strlen(prefix) + 2);
+  _name = NEW_C_HEAP_ARRAY(char, strlen(name) + strlen(prefix) + 2, mtInternal);
   assert(_name != NULL && strlen(name) != 0, "invalid name");
 
   if (ns == NULL_NS) {
@@ -111,10 +111,10 @@
 
 PerfData::~PerfData() {
   if (_name != NULL) {
-    FREE_C_HEAP_ARRAY(char, _name);
+    FREE_C_HEAP_ARRAY(char, _name, mtInternal);
   }
   if (is_on_c_heap()) {
-    FREE_C_HEAP_ARRAY(PerfDataEntry, _pdep);
+    FREE_C_HEAP_ARRAY(PerfDataEntry, _pdep, mtInternal);
   }
 }
 
@@ -137,7 +137,7 @@
   if (psmp == NULL) {
     // out of PerfMemory memory resources. allocate on the C heap
     // to avoid vm termination.
-    psmp = NEW_C_HEAP_ARRAY(char, size);
+    psmp = NEW_C_HEAP_ARRAY(char, size, mtInternal);
     _on_c_heap = true;
   }
 
@@ -559,12 +559,12 @@
 
 PerfDataList::PerfDataList(int length) {
 
-  _set = new(ResourceObj::C_HEAP) PerfDataArray(length, true);
+  _set = new(ResourceObj::C_HEAP, mtInternal) PerfDataArray(length, true);
 }
 
 PerfDataList::PerfDataList(PerfDataList* p) {
 
-  _set = new(ResourceObj::C_HEAP) PerfDataArray(p->length(), true);
+  _set = new(ResourceObj::C_HEAP, mtInternal) PerfDataArray(p->length(), true);
 
   _set->appendAll(p->get_impl());
 }
--- a/src/share/vm/runtime/perfData.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/perfData.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -240,7 +240,7 @@
  * be removed from the product in the future.
  *
  */
-class PerfData : public CHeapObj {
+class PerfData : public CHeapObj<mtInternal> {
 
   friend class StatSampler;      // for access to protected void sample()
   friend class PerfDataManager;  // for access to protected destructor
@@ -342,7 +342,7 @@
  * invoke the take_sample() method and write the value returned to its
  * appropriate location in the PerfData memory region.
  */
-class PerfLongSampleHelper : public CHeapObj {
+class PerfLongSampleHelper : public CHeapObj<mtInternal> {
   public:
     virtual jlong take_sample() = 0;
 };
@@ -591,7 +591,7 @@
  * some other implementation, as long as that implementation provides
  * a mechanism to iterate over the container by index.
  */
-class PerfDataList : public CHeapObj {
+class PerfDataList : public CHeapObj<mtInternal> {
 
   private:
 
--- a/src/share/vm/runtime/perfMemory.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/perfMemory.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -112,7 +112,7 @@
       warning("Could not create PerfData Memory region, reverting to malloc");
     }
 
-    _prologue = NEW_C_HEAP_OBJ(PerfDataPrologue);
+    _prologue = NEW_C_HEAP_OBJ(PerfDataPrologue, mtInternal);
   }
   else {
 
@@ -244,10 +244,10 @@
   if (PerfDataSaveFile != NULL) {
     // dest_file_name stores the validated file name if file_name
     // contains %p which will be replaced by pid.
-    dest_file = NEW_C_HEAP_ARRAY(char, JVM_MAXPATHLEN);
+    dest_file = NEW_C_HEAP_ARRAY(char, JVM_MAXPATHLEN, mtInternal);
     if(!Arguments::copy_expand_pid(PerfDataSaveFile, strlen(PerfDataSaveFile),
                                    dest_file, JVM_MAXPATHLEN)) {
-      FREE_C_HEAP_ARRAY(char, dest_file);
+      FREE_C_HEAP_ARRAY(char, dest_file, mtInternal);
       if (PrintMiscellaneous && Verbose) {
         warning("Invalid performance data file path name specified, "\
                 "fall back to a default name");
@@ -257,7 +257,7 @@
     }
   }
   // create the name of the file for retaining the instrumentation memory.
-  dest_file = NEW_C_HEAP_ARRAY(char, PERFDATA_FILENAME_LEN);
+  dest_file = NEW_C_HEAP_ARRAY(char, PERFDATA_FILENAME_LEN, mtInternal);
   jio_snprintf(dest_file, PERFDATA_FILENAME_LEN,
                "%s_%d", PERFDATA_NAME, os::current_process_id());
 
--- a/src/share/vm/runtime/reflection.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/reflection.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,7 +36,6 @@
 #include "oops/objArrayKlass.hpp"
 #include "oops/objArrayOop.hpp"
 #include "prims/jvm.h"
-#include "prims/methodHandleWalk.hpp"
 #include "runtime/arguments.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
@@ -502,11 +501,6 @@
       under_host_klass(accessee_ik, accessor))
     return true;
 
-  // Adapter frames can access anything.
-  if (MethodHandleCompiler::klass_is_method_handle_adapter_holder(accessor))
-    // This is an internal adapter frame from the MethodHandleCompiler.
-    return true;
-
   if (RelaxAccessControlCheck ||
       (accessor_ik->major_version() < JAVA_1_5_VERSION &&
        accessee_ik->major_version() < JAVA_1_5_VERSION)) {
@@ -591,14 +585,11 @@
 // Caller is responsible for figuring out in advance which case must be true.
 void Reflection::check_for_inner_class(instanceKlassHandle outer, instanceKlassHandle inner,
                                        bool inner_is_member, TRAPS) {
-  const int inner_class_info_index = 0;
-  const int outer_class_info_index = 1;
-
-  typeArrayHandle    icls (THREAD, outer->inner_classes());
+  InnerClassesIterator iter(outer);
   constantPoolHandle cp   (THREAD, outer->constants());
-  for(int i = 0; i < icls->length(); i += 4) {
-     int ioff = icls->ushort_at(i + inner_class_info_index);
-     int ooff = icls->ushort_at(i + outer_class_info_index);
+  for (; !iter.done(); iter.next()) {
+     int ioff = iter.inner_class_info_index();
+     int ooff = iter.outer_class_info_index();
 
      if (inner_is_member && ioff != 0 && ooff != 0) {
         klassOop o = cp->klass_at(ooff, CHECK);
@@ -832,7 +823,7 @@
   java_lang_reflect_Field::set_modifiers(rh(), fd->access_flags().as_int() & JVM_RECOGNIZED_FIELD_MODIFIERS);
   java_lang_reflect_Field::set_override(rh(), false);
   if (java_lang_reflect_Field::has_signature_field() &&
-      fd->generic_signature() != NULL) {
+      fd->has_generic_signature()) {
     Symbol*  gs = fd->generic_signature();
     Handle sig = java_lang_String::create_from_symbol(gs, CHECK_NULL);
     java_lang_reflect_Field::set_signature(rh(), sig());
--- a/src/share/vm/runtime/reflectionUtils.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/reflectionUtils.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -59,7 +59,7 @@
 
 
 GrowableArray<FilteredField*> *FilteredFieldsMap::_filtered_fields =
-  new (ResourceObj::C_HEAP) GrowableArray<FilteredField*>(3,true);
+  new (ResourceObj::C_HEAP, mtInternal) GrowableArray<FilteredField*>(3,true);
 
 
 void FilteredFieldsMap::initialize() {
--- a/src/share/vm/runtime/relocator.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/relocator.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -392,16 +392,16 @@
 // The width of instruction at "pc" is changing by "delta".  Adjust the
 // exception table, if any, of "rc->mb".
 void Relocator::adjust_exception_table(int bci, int delta) {
-  typeArrayOop table = method()->exception_table();
-  for (int index = 0; index < table->length(); index +=4) {
-    if (table->int_at(index) > bci) {
-      table->int_at_put(index+0, table->int_at(index+0) + delta);
-      table->int_at_put(index+1, table->int_at(index+1) + delta);
-    } else if (bci < table->int_at(index+1)) {
-      table->int_at_put(index+1, table->int_at(index+1) + delta);
+  ExceptionTable table(_method());
+  for (int index = 0; index < table.length(); index ++) {
+    if (table.start_pc(index) > bci) {
+      table.set_start_pc(index, table.start_pc(index) + delta);
+      table.set_end_pc(index, table.end_pc(index) + delta);
+    } else if (bci < table.end_pc(index)) {
+      table.set_end_pc(index, table.end_pc(index) + delta);
     }
-    if (table->int_at(index+2) > bci)
-      table->int_at_put(index+2, table->int_at(index+2) + delta);
+    if (table.handler_pc(index) > bci)
+      table.set_handler_pc(index, table.handler_pc(index) + delta);
   }
 }
 
@@ -465,13 +465,12 @@
 void Relocator::adjust_stack_map_table(int bci, int delta) {
   if (method()->has_stackmap_table()) {
     typeArrayOop data = method()->stackmap_data();
-    // The data in the array is a classfile representation of the stackmap
-    // table attribute, less the initial u2 tag and u4 attribute_length fields.
-    stack_map_table_attribute* attr = stack_map_table_attribute::at(
-        (address)data->byte_at_addr(0) - (sizeof(u2) + sizeof(u4)));
+    // The data in the array is a classfile representation of the stackmap table
+    stack_map_table* sm_table =
+        stack_map_table::at((address)data->byte_at_addr(0));
 
-    int count = attr->number_of_entries();
-    stack_map_frame* frame = attr->entries();
+    int count = sm_table->number_of_entries();
+    stack_map_frame* frame = sm_table->entries();
     int bci_iter = -1;
     bool offset_adjusted = false; // only need to adjust one offset
 
@@ -486,7 +485,7 @@
           frame->set_offset_delta(new_offset_delta);
         } else {
           assert(frame->is_same_frame() ||
-                 frame->is_same_frame_1_stack_item_frame(),
+                 frame->is_same_locals_1_stack_item_frame(),
                  "Frame must be one of the compressed forms");
           // The new delta exceeds the capacity of the 'same_frame' or
           // 'same_frame_1_stack_item_frame' frame types.  We need to
@@ -513,7 +512,7 @@
           if (frame->is_same_frame()) {
             same_frame_extended::create_at(frame_addr, new_offset_delta);
           } else {
-            same_frame_1_stack_item_extended::create_at(
+            same_locals_1_stack_item_extended::create_at(
               frame_addr, new_offset_delta, NULL);
             // the verification_info_type should already be at the right spot
           }
--- a/src/share/vm/runtime/safepoint.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/safepoint.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -48,6 +48,7 @@
 #include "runtime/stubRoutines.hpp"
 #include "runtime/sweeper.hpp"
 #include "runtime/synchronizer.hpp"
+#include "services/memTracker.hpp"
 #include "services/runtimeService.hpp"
 #include "utilities/events.hpp"
 #ifdef TARGET_ARCH_x86
@@ -546,6 +547,10 @@
   if (UseGCLogFileRotation) {
     gclog_or_tty->rotate_log();
   }
+
+  if (MemTracker::is_on()) {
+    MemTracker::sync();
+  }
 }
 
 
@@ -1157,7 +1162,7 @@
     stats_array_size = PrintSafepointStatisticsCount;
   }
   _safepoint_stats = (SafepointStats*)os::malloc(stats_array_size
-                                                 * sizeof(SafepointStats));
+                                                 * sizeof(SafepointStats), mtInternal);
   guarantee(_safepoint_stats != NULL,
             "not enough memory for safepoint instrumentation data");
 
--- a/src/share/vm/runtime/safepoint.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/safepoint.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -190,7 +190,7 @@
 };
 
 // State class for a thread suspended at a safepoint
-class ThreadSafepointState: public CHeapObj {
+class ThreadSafepointState: public CHeapObj<mtInternal> {
  public:
   // These states are maintained by VM thread while threads are being brought
   // to a safepoint.  After SafepointSynchronize::end(), they are reset to
--- a/src/share/vm/runtime/sharedRuntime.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -88,8 +88,6 @@
 RuntimeStub*        SharedRuntime::_resolve_static_call_blob;
 
 DeoptimizationBlob* SharedRuntime::_deopt_blob;
-RicochetBlob*       SharedRuntime::_ricochet_blob;
-
 SafepointBlob*      SharedRuntime::_polling_page_safepoint_handler_blob;
 SafepointBlob*      SharedRuntime::_polling_page_return_handler_blob;
 
@@ -109,7 +107,6 @@
   _polling_page_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), false);
   _polling_page_return_handler_blob    = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), true);
 
-  generate_ricochet_blob();
   generate_deopt_blob();
 
 #ifdef COMPILER2
@@ -117,33 +114,6 @@
 #endif // COMPILER2
 }
 
-//----------------------------generate_ricochet_blob---------------------------
-void SharedRuntime::generate_ricochet_blob() {
-  if (!EnableInvokeDynamic)  return;  // leave it as a null
-
-  // allocate space for the code
-  ResourceMark rm;
-  // setup code generation tools
-  CodeBuffer buffer("ricochet_blob", 256 LP64_ONLY(+ 256), 256);  // XXX x86 LP64L: 512, 512
-  MacroAssembler* masm = new MacroAssembler(&buffer);
-
-  int bounce_offset = -1, exception_offset = -1, frame_size_in_words = -1;
-  MethodHandles::RicochetFrame::generate_ricochet_blob(masm, &bounce_offset, &exception_offset, &frame_size_in_words);
-
-  // -------------
-  // make sure all code is generated
-  masm->flush();
-
-  // failed to generate?
-  if (bounce_offset < 0 || exception_offset < 0 || frame_size_in_words < 0) {
-    assert(false, "bad ricochet blob");
-    return;
-  }
-
-  _ricochet_blob = RicochetBlob::create(&buffer, bounce_offset, exception_offset, frame_size_in_words);
-}
-
-
 #include <math.h>
 
 #ifndef USDT2
@@ -527,10 +497,6 @@
   if (Interpreter::contains(return_address)) {
     return Interpreter::rethrow_exception_entry();
   }
-  // Ricochet frame unwind code
-  if (SharedRuntime::ricochet_blob() != NULL && SharedRuntime::ricochet_blob()->returns_to_bounce_addr(return_address)) {
-    return SharedRuntime::ricochet_blob()->exception_addr();
-  }
 
   guarantee(blob == NULL || !blob->is_runtime_stub(), "caller should have skipped stub");
   guarantee(!VtableStubs::contains(return_address), "NULL exceptions in vtables should have been handled already!");
@@ -768,13 +734,6 @@
   throw_and_post_jvmti_exception(thread, exception);
 JRT_END
 
-JRT_ENTRY(void, SharedRuntime::throw_WrongMethodTypeException(JavaThread* thread, oopDesc* required, oopDesc* actual))
-  assert(thread == JavaThread::current() && required->is_oop() && actual->is_oop(), "bad args");
-  ResourceMark rm;
-  char* message = SharedRuntime::generate_wrong_method_type_message(thread, required, actual);
-  throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_invoke_WrongMethodTypeException(), message);
-JRT_END
-
 address SharedRuntime::continuation_for_implicit_exception(JavaThread* thread,
                                                            address pc,
                                                            SharedRuntime::ImplicitExceptionKind exception_kind)
@@ -857,6 +816,12 @@
             return StubRoutines::throw_NullPointerException_at_call_entry();
           }
 
+          if (nm->method()->is_method_handle_intrinsic()) {
+            // exception happened inside MH dispatch code, similar to a vtable stub
+            Events::log_exception(thread, "NullPointerException in MH adapter " INTPTR_FORMAT, pc);
+            return StubRoutines::throw_NullPointerException_at_call_entry();
+          }
+
 #ifndef PRODUCT
           _implicit_null_throws++;
 #endif
@@ -909,11 +874,20 @@
 }
 JNI_END
 
+JNI_ENTRY(void, throw_unsupported_operation_exception(JNIEnv* env, ...))
+{
+  THROW(vmSymbols::java_lang_UnsupportedOperationException());
+}
+JNI_END
 
 address SharedRuntime::native_method_throw_unsatisfied_link_error_entry() {
   return CAST_FROM_FN_PTR(address, &throw_unsatisfied_link_error);
 }
 
+address SharedRuntime::native_method_throw_unsupported_operation_exception_entry() {
+  return CAST_FROM_FN_PTR(address, &throw_unsupported_operation_exception);
+}
+
 
 #ifndef PRODUCT
 JRT_ENTRY(intptr_t, SharedRuntime::trace_bytecode(JavaThread* thread, intptr_t preserve_this_value, intptr_t tos, intptr_t tos2))
@@ -1045,16 +1019,17 @@
   assert(!vfst.at_end(), "Java frame must exist");
 
   // Find caller and bci from vframe
-  methodHandle caller (THREAD, vfst.method());
-  int          bci    = vfst.bci();
+  methodHandle caller(THREAD, vfst.method());
+  int          bci   = vfst.bci();
 
   // Find bytecode
   Bytecode_invoke bytecode(caller, bci);
-  bc = bytecode.java_code();
+  bc = bytecode.invoke_code();
   int bytecode_index = bytecode.index();
 
   // Find receiver for non-static call
-  if (bc != Bytecodes::_invokestatic) {
+  if (bc != Bytecodes::_invokestatic &&
+      bc != Bytecodes::_invokedynamic) {
     // This register map must be update since we need to find the receiver for
     // compiled frames. The receiver might be in a register.
     RegisterMap reg_map2(thread);
@@ -1075,25 +1050,32 @@
   }
 
   // Resolve method. This is parameterized by bytecode.
-  constantPoolHandle constants (THREAD, caller->constants());
-  assert (receiver.is_null() || receiver->is_oop(), "wrong receiver");
+  constantPoolHandle constants(THREAD, caller->constants());
+  assert(receiver.is_null() || receiver->is_oop(), "wrong receiver");
   LinkResolver::resolve_invoke(callinfo, receiver, constants, bytecode_index, bc, CHECK_(nullHandle));
 
 #ifdef ASSERT
   // Check that the receiver klass is of the right subtype and that it is initialized for virtual calls
   if (bc != Bytecodes::_invokestatic && bc != Bytecodes::_invokedynamic) {
     assert(receiver.not_null(), "should have thrown exception");
-    KlassHandle receiver_klass (THREAD, receiver->klass());
+    KlassHandle receiver_klass(THREAD, receiver->klass());
     klassOop rk = constants->klass_ref_at(bytecode_index, CHECK_(nullHandle));
                             // klass is already loaded
-    KlassHandle static_receiver_klass (THREAD, rk);
-    assert(receiver_klass->is_subtype_of(static_receiver_klass()), "actual receiver must be subclass of static receiver klass");
+    KlassHandle static_receiver_klass(THREAD, rk);
+    // Method handle invokes might have been optimized to a direct call
+    // so don't check for the receiver class.
+    // FIXME this weakens the assert too much
+    methodHandle callee = callinfo.selected_method();
+    assert(receiver_klass->is_subtype_of(static_receiver_klass()) ||
+           callee->is_method_handle_intrinsic() ||
+           callee->is_compiled_lambda_form(),
+           "actual receiver must be subclass of static receiver klass");
     if (receiver_klass->oop_is_instance()) {
       if (instanceKlass::cast(receiver_klass())->is_not_initialized()) {
         tty->print_cr("ERROR: Klass not yet initialized!!");
         receiver_klass.print();
       }
-      assert (!instanceKlass::cast(receiver_klass())->is_not_initialized(), "receiver_klass must be initialized");
+      assert(!instanceKlass::cast(receiver_klass())->is_not_initialized(), "receiver_klass must be initialized");
     }
   }
 #endif
@@ -1186,8 +1168,10 @@
                                      call_info, CHECK_(methodHandle()));
   methodHandle callee_method = call_info.selected_method();
 
-  assert((!is_virtual && invoke_code == Bytecodes::_invokestatic) ||
-         ( is_virtual && invoke_code != Bytecodes::_invokestatic), "inconsistent bytecode");
+  assert((!is_virtual && invoke_code == Bytecodes::_invokestatic ) ||
+         (!is_virtual && invoke_code == Bytecodes::_invokehandle ) ||
+         (!is_virtual && invoke_code == Bytecodes::_invokedynamic) ||
+         ( is_virtual && invoke_code != Bytecodes::_invokestatic ), "inconsistent bytecode");
 
 #ifndef PRODUCT
   // tracing/debugging/statistics
@@ -1202,16 +1186,17 @@
       (is_optimized) ? "optimized " : "", (is_virtual) ? "virtual" : "static",
       Bytecodes::name(invoke_code));
     callee_method->print_short_name(tty);
-    tty->print_cr(" code: " INTPTR_FORMAT, callee_method->code());
+    tty->print_cr(" at pc: " INTPTR_FORMAT " to code: " INTPTR_FORMAT, caller_frame.pc(), callee_method->code());
   }
 #endif
 
-  // JSR 292
+  // JSR 292 key invariant:
   // If the resolved method is a MethodHandle invoke target the call
-  // site must be a MethodHandle call site.
-  if (callee_method->is_method_handle_invoke()) {
-    assert(caller_nm->is_method_handle_return(caller_frame.pc()), "must be MH call site");
-  }
+  // site must be a MethodHandle call site, because the lambda form might tail-call
+  // leaving the stack in a state unknown to either caller or callee
+  // TODO detune for now but we might need it again
+//  assert(!callee_method->is_compiled_lambda_form() ||
+//         caller_nm->is_method_handle_return(caller_frame.pc()), "must be MH call site");
 
   // Compute entry points. This might require generation of C2I converter
   // frames, so we cannot be holding any locks here. Furthermore, the
@@ -1284,7 +1269,6 @@
   assert(stub_frame.is_runtime_frame(), "sanity check");
   frame caller_frame = stub_frame.sender(&reg_map);
   assert(!caller_frame.is_interpreted_frame() && !caller_frame.is_entry_frame(), "unexpected frame");
-  assert(!caller_frame.is_ricochet_frame(), "unexpected frame");
 #endif /* ASSERT */
 
   methodHandle callee_method;
@@ -1320,21 +1304,9 @@
   address   sender_pc = caller_frame.pc();
   CodeBlob* sender_cb = caller_frame.cb();
   nmethod*  sender_nm = sender_cb->as_nmethod_or_null();
-  bool is_mh_invoke_via_adapter = false;  // Direct c2c call or via adapter?
-  if (sender_nm != NULL && sender_nm->is_method_handle_return(sender_pc)) {
-    // If the callee_target is set, then we have come here via an i2c
-    // adapter.
-    methodOop callee = thread->callee_target();
-    if (callee != NULL) {
-      assert(callee->is_method(), "sanity");
-      is_mh_invoke_via_adapter = true;
-    }
-  }
 
   if (caller_frame.is_interpreted_frame() ||
-      caller_frame.is_entry_frame()       ||
-      caller_frame.is_ricochet_frame()    ||
-      is_mh_invoke_via_adapter) {
+      caller_frame.is_entry_frame()) {
     methodOop callee = thread->callee_target();
     guarantee(callee != NULL && callee->is_method(), "bad handshake");
     thread->set_vm_result(callee);
@@ -1677,12 +1649,6 @@
   // Get the return PC for the passed caller PC.
   address return_pc = caller_pc + frame::pc_return_offset;
 
-  // Don't fixup method handle call sites as the executed method
-  // handle adapters are doing the required MethodHandle chain work.
-  if (nm->is_method_handle_return(return_pc)) {
-    return;
-  }
-
   // There is a benign race here. We could be attempting to patch to a compiled
   // entry point at the same time the callee is being deoptimized. If that is
   // the case then entry_point may in fact point to a c2i and we'd patch the
@@ -1788,97 +1754,6 @@
   return generate_class_cast_message(objName, targetKlass->external_name());
 }
 
-char* SharedRuntime::generate_wrong_method_type_message(JavaThread* thread,
-                                                        oopDesc* required,
-                                                        oopDesc* actual) {
-  if (TraceMethodHandles) {
-    tty->print_cr("WrongMethodType thread="PTR_FORMAT" req="PTR_FORMAT" act="PTR_FORMAT"",
-                  thread, required, actual);
-  }
-  assert(EnableInvokeDynamic, "");
-  oop singleKlass = wrong_method_type_is_for_single_argument(thread, required);
-  char* message = NULL;
-  if (singleKlass != NULL) {
-    const char* objName = "argument or return value";
-    if (actual != NULL) {
-      // be flexible about the junk passed in:
-      klassOop ak = (actual->is_klass()
-                     ? (klassOop)actual
-                     : actual->klass());
-      objName = Klass::cast(ak)->external_name();
-    }
-    Klass* targetKlass = Klass::cast(required->is_klass()
-                                     ? (klassOop)required
-                                     : java_lang_Class::as_klassOop(required));
-    message = generate_class_cast_message(objName, targetKlass->external_name());
-  } else {
-    // %%% need to get the MethodType string, without messing around too much
-    const char* desc = NULL;
-    // Get a signature from the invoke instruction
-    const char* mhName = "method handle";
-    const char* targetType = "the required signature";
-    int targetArity = -1, mhArity = -1;
-    vframeStream vfst(thread, true);
-    if (!vfst.at_end()) {
-      Bytecode_invoke call(vfst.method(), vfst.bci());
-      methodHandle target;
-      {
-        EXCEPTION_MARK;
-        target = call.static_target(THREAD);
-        if (HAS_PENDING_EXCEPTION) { CLEAR_PENDING_EXCEPTION; }
-      }
-      if (target.not_null()
-          && target->is_method_handle_invoke()
-          && required == target->method_handle_type()) {
-        targetType = target->signature()->as_C_string();
-        targetArity = ArgumentCount(target->signature()).size();
-      }
-    }
-    KlassHandle kignore; int dmf_flags = 0;
-    methodHandle actual_method = MethodHandles::decode_method(actual, kignore, dmf_flags);
-    if ((dmf_flags & ~(MethodHandles::_dmf_has_receiver |
-                       MethodHandles::_dmf_does_dispatch |
-                       MethodHandles::_dmf_from_interface)) != 0)
-      actual_method = methodHandle();  // MH does extra binds, drops, etc.
-    bool has_receiver = ((dmf_flags & MethodHandles::_dmf_has_receiver) != 0);
-    if (actual_method.not_null()) {
-      mhName = actual_method->signature()->as_C_string();
-      mhArity = ArgumentCount(actual_method->signature()).size();
-      if (!actual_method->is_static())  mhArity += 1;
-    } else if (java_lang_invoke_MethodHandle::is_instance(actual)) {
-      oopDesc* mhType = java_lang_invoke_MethodHandle::type(actual);
-      mhArity = java_lang_invoke_MethodType::ptype_count(mhType);
-      stringStream st;
-      java_lang_invoke_MethodType::print_signature(mhType, &st);
-      mhName = st.as_string();
-    }
-    if (targetArity != -1 && targetArity != mhArity) {
-      if (has_receiver && targetArity == mhArity-1)
-        desc = " cannot be called without a receiver argument as ";
-      else
-        desc = " cannot be called with a different arity as ";
-    }
-    message = generate_class_cast_message(mhName, targetType,
-                                          desc != NULL ? desc :
-                                          " cannot be called as ");
-  }
-  if (TraceMethodHandles) {
-    tty->print_cr("WrongMethodType => message=%s", message);
-  }
-  return message;
-}
-
-oop SharedRuntime::wrong_method_type_is_for_single_argument(JavaThread* thr,
-                                                            oopDesc* required) {
-  if (required == NULL)  return NULL;
-  if (required->klass() == SystemDictionary::Class_klass())
-    return required;
-  if (required->is_klass())
-    return Klass::cast(klassOop(required))->java_mirror();
-  return NULL;
-}
-
-
 char* SharedRuntime::generate_class_cast_message(
     const char* objName, const char* targetKlassName, const char* desc) {
   size_t msglen = strlen(objName) + strlen(desc) + strlen(targetKlassName) + 1;
@@ -2117,10 +1992,19 @@
 
 // A simple wrapper class around the calling convention information
 // that allows sharing of adapters for the same calling convention.
-class AdapterFingerPrint : public CHeapObj {
+class AdapterFingerPrint : public CHeapObj<mtCode> {
  private:
+  enum {
+    _basic_type_bits = 4,
+    _basic_type_mask = right_n_bits(_basic_type_bits),
+    _basic_types_per_int = BitsPerInt / _basic_type_bits,
+    _compact_int_count = 3
+  };
+  // TO DO:  Consider integrating this with a more global scheme for compressing signatures.
+  // For now, 4 bits per components (plus T_VOID gaps after double/long) is not excessive.
+
   union {
-    int  _compact[3];
+    int  _compact[_compact_int_count];
     int* _fingerprint;
   } _value;
   int _length; // A negative length indicates the fingerprint is in the compact form,
@@ -2129,8 +2013,7 @@
   // Remap BasicTypes that are handled equivalently by the adapters.
   // These are correct for the current system but someday it might be
   // necessary to make this mapping platform dependent.
-  static BasicType adapter_encoding(BasicType in) {
-    assert((~0xf & in) == 0, "must fit in 4 bits");
+  static int adapter_encoding(BasicType in) {
     switch(in) {
       case T_BOOLEAN:
       case T_BYTE:
@@ -2141,6 +2024,8 @@
 
       case T_OBJECT:
       case T_ARRAY:
+        // In other words, we assume that any register good enough for
+        // an int or long is good enough for a managed pointer.
 #ifdef _LP64
         return T_LONG;
 #else
@@ -2165,8 +2050,9 @@
     // The fingerprint is based on the BasicType signature encoded
     // into an array of ints with eight entries per int.
     int* ptr;
-    int len = (total_args_passed + 7) >> 3;
-    if (len <= (int)(sizeof(_value._compact) / sizeof(int))) {
+    int len = (total_args_passed + (_basic_types_per_int-1)) / _basic_types_per_int;
+    if (len <= _compact_int_count) {
+      assert(_compact_int_count == 3, "else change next line");
       _value._compact[0] = _value._compact[1] = _value._compact[2] = 0;
       // Storing the signature encoded as signed chars hits about 98%
       // of the time.
@@ -2174,7 +2060,7 @@
       ptr = _value._compact;
     } else {
       _length = len;
-      _value._fingerprint = NEW_C_HEAP_ARRAY(int, _length);
+      _value._fingerprint = NEW_C_HEAP_ARRAY(int, _length, mtCode);
       ptr = _value._fingerprint;
     }
 
@@ -2182,10 +2068,12 @@
     int sig_index = 0;
     for (int index = 0; index < len; index++) {
       int value = 0;
-      for (int byte = 0; byte < 8; byte++) {
-        if (sig_index < total_args_passed) {
-          value = (value << 4) | adapter_encoding(sig_bt[sig_index++]);
-        }
+      for (int byte = 0; byte < _basic_types_per_int; byte++) {
+        int bt = ((sig_index < total_args_passed)
+                  ? adapter_encoding(sig_bt[sig_index++])
+                  : 0);
+        assert((bt & _basic_type_mask) == bt, "must fit in 4 bits");
+        value = (value << _basic_type_bits) | bt;
       }
       ptr[index] = value;
     }
@@ -2193,7 +2081,7 @@
 
   ~AdapterFingerPrint() {
     if (_length > 0) {
-      FREE_C_HEAP_ARRAY(int, _value._fingerprint);
+      FREE_C_HEAP_ARRAY(int, _value._fingerprint, mtCode);
     }
   }
 
@@ -2235,6 +2123,7 @@
       return false;
     }
     if (_length < 0) {
+      assert(_compact_int_count == 3, "else change next line");
       return _value._compact[0] == other->_value._compact[0] &&
              _value._compact[1] == other->_value._compact[1] &&
              _value._compact[2] == other->_value._compact[2];
@@ -2251,7 +2140,7 @@
 
 
 // A hashtable mapping from AdapterFingerPrints to AdapterHandlerEntries
-class AdapterHandlerTable : public BasicHashtable {
+class AdapterHandlerTable : public BasicHashtable<mtCode> {
   friend class AdapterHandlerTableIterator;
 
  private:
@@ -2265,16 +2154,16 @@
 #endif
 
   AdapterHandlerEntry* bucket(int i) {
-    return (AdapterHandlerEntry*)BasicHashtable::bucket(i);
+    return (AdapterHandlerEntry*)BasicHashtable<mtCode>::bucket(i);
   }
 
  public:
   AdapterHandlerTable()
-    : BasicHashtable(293, sizeof(AdapterHandlerEntry)) { }
+    : BasicHashtable<mtCode>(293, sizeof(AdapterHandlerEntry)) { }
 
   // Create a new entry suitable for insertion in the table
   AdapterHandlerEntry* new_entry(AdapterFingerPrint* fingerprint, address i2c_entry, address c2i_entry, address c2i_unverified_entry) {
-    AdapterHandlerEntry* entry = (AdapterHandlerEntry*)BasicHashtable::new_entry(fingerprint->compute_hash());
+    AdapterHandlerEntry* entry = (AdapterHandlerEntry*)BasicHashtable<mtCode>::new_entry(fingerprint->compute_hash());
     entry->init(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
     return entry;
   }
@@ -2287,7 +2176,7 @@
 
   void free_entry(AdapterHandlerEntry* entry) {
     entry->deallocate();
-    BasicHashtable::free_entry(entry);
+    BasicHashtable<mtCode>::free_entry(entry);
   }
 
   // Find a entry with the same fingerprint if it exists
@@ -2531,13 +2420,17 @@
     entry->relocate(B->content_begin());
 #ifndef PRODUCT
     // debugging suppport
-    if (PrintAdapterHandlers) {
-      tty->cr();
-      tty->print_cr("i2c argument handler #%d for: %s %s (fingerprint = %s, %d bytes generated)",
+    if (PrintAdapterHandlers || PrintStubCode) {
+      entry->print_adapter_on(tty);
+      tty->print_cr("i2c argument handler #%d for: %s %s (%d bytes generated)",
                     _adapters->number_of_entries(), (method->is_static() ? "static" : "receiver"),
-                    method->signature()->as_C_string(), fingerprint->as_string(), insts_size );
+                    method->signature()->as_C_string(), insts_size);
       tty->print_cr("c2i argument handler starts at %p",entry->get_c2i_entry());
-      Disassembler::decode(entry->get_i2c_entry(), entry->get_i2c_entry() + insts_size);
+      if (Verbose || PrintStubCode) {
+        address first_pc = entry->base_address();
+        if (first_pc != NULL)
+          Disassembler::decode(first_pc, first_pc + insts_size);
+      }
     }
 #endif
 
@@ -2561,19 +2454,33 @@
   return entry;
 }
 
+address AdapterHandlerEntry::base_address() {
+  address base = _i2c_entry;
+  if (base == NULL)  base = _c2i_entry;
+  assert(base <= _c2i_entry || _c2i_entry == NULL, "");
+  assert(base <= _c2i_unverified_entry || _c2i_unverified_entry == NULL, "");
+  return base;
+}
+
 void AdapterHandlerEntry::relocate(address new_base) {
-    ptrdiff_t delta = new_base - _i2c_entry;
+  address old_base = base_address();
+  assert(old_base != NULL, "");
+  ptrdiff_t delta = new_base - old_base;
+  if (_i2c_entry != NULL)
     _i2c_entry += delta;
+  if (_c2i_entry != NULL)
     _c2i_entry += delta;
+  if (_c2i_unverified_entry != NULL)
     _c2i_unverified_entry += delta;
+  assert(base_address() == new_base, "");
 }
 
 
 void AdapterHandlerEntry::deallocate() {
   delete _fingerprint;
 #ifdef ASSERT
-  if (_saved_code) FREE_C_HEAP_ARRAY(unsigned char, _saved_code);
-  if (_saved_sig)  FREE_C_HEAP_ARRAY(Basictype, _saved_sig);
+  if (_saved_code) FREE_C_HEAP_ARRAY(unsigned char, _saved_code, mtCode);
+  if (_saved_sig)  FREE_C_HEAP_ARRAY(Basictype, _saved_sig, mtCode);
 #endif
 }
 
@@ -2583,11 +2490,11 @@
 // against other versions.  If the code is captured after relocation
 // then relative instructions won't be equivalent.
 void AdapterHandlerEntry::save_code(unsigned char* buffer, int length, int total_args_passed, BasicType* sig_bt) {
-  _saved_code = NEW_C_HEAP_ARRAY(unsigned char, length);
+  _saved_code = NEW_C_HEAP_ARRAY(unsigned char, length, mtCode);
   _code_length = length;
   memcpy(_saved_code, buffer, length);
   _total_args_passed = total_args_passed;
-  _saved_sig = NEW_C_HEAP_ARRAY(BasicType, _total_args_passed);
+  _saved_sig = NEW_C_HEAP_ARRAY(BasicType, _total_args_passed, mtCode);
   memcpy(_saved_sig, sig_bt, _total_args_passed * sizeof(BasicType));
 }
 
@@ -2614,7 +2521,9 @@
   ResourceMark rm;
   nmethod* nm = NULL;
 
-  assert(method->has_native_function(), "must have something valid to call!");
+  assert(method->is_native(), "must be native");
+  assert(method->is_method_handle_intrinsic() ||
+         method->has_native_function(), "must have something valid to call!");
 
   {
     // perform the work while holding the lock, but perform any printing outside the lock
@@ -2651,9 +2560,11 @@
       assert( i==total_args_passed, "" );
       BasicType ret_type = ss.type();
 
-      // Now get the compiled-Java layout as input arguments
-      int comp_args_on_stack;
-      comp_args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, total_args_passed, false);
+      // Now get the compiled-Java layout as input (or output) arguments.
+      // NOTE: Stubs for compiled entry points of method handle intrinsics
+      // are just trampolines so the argument registers must be outgoing ones.
+      const bool is_outgoing = method->is_method_handle_intrinsic();
+      int comp_args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, total_args_passed, is_outgoing);
 
       // Generate the compiled-to-native wrapper code
       nm = SharedRuntime::generate_native_wrapper(&_masm,
@@ -2893,7 +2804,7 @@
   int max_locals = moop->max_locals();
   // Allocate temp buffer, 1 word per local & 2 per active monitor
   int buf_size_words = max_locals + active_monitor_count*2;
-  intptr_t *buf = NEW_C_HEAP_ARRAY(intptr_t,buf_size_words);
+  intptr_t *buf = NEW_C_HEAP_ARRAY(intptr_t,buf_size_words, mtCode);
 
   // Copy the locals.  Order is preserved so that loading of longs works.
   // Since there's no GC I can copy the oops blindly.
@@ -2923,7 +2834,7 @@
 JRT_END
 
 JRT_LEAF(void, SharedRuntime::OSR_migration_end( intptr_t* buf) )
-  FREE_C_HEAP_ARRAY(intptr_t,buf);
+  FREE_C_HEAP_ARRAY(intptr_t,buf, mtCode);
 JRT_END
 
 bool AdapterHandlerLibrary::contains(CodeBlob* b) {
@@ -2939,18 +2850,22 @@
   AdapterHandlerTableIterator iter(_adapters);
   while (iter.has_next()) {
     AdapterHandlerEntry* a = iter.next();
-    if ( b == CodeCache::find_blob(a->get_i2c_entry()) ) {
+    if (b == CodeCache::find_blob(a->get_i2c_entry())) {
       st->print("Adapter for signature: ");
-      st->print_cr("%s i2c: " INTPTR_FORMAT " c2i: " INTPTR_FORMAT " c2iUV: " INTPTR_FORMAT,
-                   a->fingerprint()->as_string(),
-                   a->get_i2c_entry(), a->get_c2i_entry(), a->get_c2i_unverified_entry());
-
+      a->print_adapter_on(tty);
       return;
     }
   }
   assert(false, "Should have found handler");
 }
 
+void AdapterHandlerEntry::print_adapter_on(outputStream* st) const {
+  st->print_cr("AHE@" INTPTR_FORMAT ": %s i2c: " INTPTR_FORMAT " c2i: " INTPTR_FORMAT " c2iUV: " INTPTR_FORMAT,
+               (intptr_t) this, fingerprint()->as_string(),
+               get_i2c_entry(), get_c2i_entry(), get_c2i_unverified_entry());
+
+}
+
 #ifndef PRODUCT
 
 void AdapterHandlerLibrary::print_statistics() {
--- a/src/share/vm/runtime/sharedRuntime.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -61,7 +61,6 @@
   static RuntimeStub*        _resolve_static_call_blob;
 
   static DeoptimizationBlob* _deopt_blob;
-  static RicochetBlob*       _ricochet_blob;
 
   static SafepointBlob*      _polling_page_safepoint_handler_blob;
   static SafepointBlob*      _polling_page_return_handler_blob;
@@ -187,7 +186,6 @@
   static void    throw_NullPointerException(JavaThread* thread);
   static void    throw_NullPointerException_at_call(JavaThread* thread);
   static void    throw_StackOverflowError(JavaThread* thread);
-  static void    throw_WrongMethodTypeException(JavaThread* thread, oopDesc* required, oopDesc* actual);
   static address continuation_for_implicit_exception(JavaThread* thread,
                                                      address faulting_pc,
                                                      ImplicitExceptionKind exception_kind);
@@ -223,16 +221,6 @@
     return _resolve_static_call_blob->entry_point();
   }
 
-  static RicochetBlob* ricochet_blob() {
-#ifdef X86
-    // Currently only implemented on x86
-    assert(!EnableInvokeDynamic || _ricochet_blob != NULL, "oops");
-#endif
-    return _ricochet_blob;
-  }
-
-  static void generate_ricochet_blob();
-
   static SafepointBlob* polling_page_return_handler_blob()     { return _polling_page_return_handler_blob; }
   static SafepointBlob* polling_page_safepoint_handler_blob()  { return _polling_page_safepoint_handler_blob; }
 
@@ -250,6 +238,7 @@
 
   // To be used as the entry point for unresolved native methods.
   static address native_method_throw_unsatisfied_link_error_entry();
+  static address native_method_throw_unsupported_operation_exception_entry();
 
   // bytecode tracing is only used by the TraceBytecodes
   static intptr_t trace_bytecode(JavaThread* thread, intptr_t preserve_this_value, intptr_t tos, intptr_t tos2) PRODUCT_RETURN0;
@@ -291,27 +280,6 @@
   static char* generate_class_cast_message(JavaThread* thr, const char* name);
 
   /**
-   * Fill in the message for a WrongMethodTypeException
-   *
-   * @param thr the current thread
-   * @param mtype (optional) expected method type (or argument class)
-   * @param mhandle (optional) actual method handle (or argument)
-   * @return the dynamically allocated exception message
-   *
-   * BCP for the frame on top of the stack must refer to an
-   * 'invokevirtual' op for a method handle, or an 'invokedyamic' op.
-   * The caller (or one of its callers) must use a ResourceMark
-   * in order to correctly free the result.
-   */
-  static char* generate_wrong_method_type_message(JavaThread* thr,
-                                                  oopDesc* mtype = NULL,
-                                                  oopDesc* mhandle = NULL);
-
-  /** Return non-null if the mtype is a klass or Class, not a MethodType. */
-  static oop wrong_method_type_is_for_single_argument(JavaThread* thr,
-                                                      oopDesc* mtype);
-
-  /**
    * Fill in the "X cannot be cast to a Y" message for ClassCastException
    *
    * @param name the name of the class of the object attempted to be cast
@@ -453,6 +421,10 @@
   // convention (handlizes oops, etc), transitions to native, makes the call,
   // returns to java state (possibly blocking), unhandlizes any result and
   // returns.
+  //
+  // The wrapper may contain special-case code if the given method
+  // is a JNI critical method, or a compiled method handle adapter,
+  // such as _invokeBasic, _linkToVirtual, etc.
   static nmethod *generate_native_wrapper(MacroAssembler* masm,
                                           methodHandle method,
                                           int compile_id,
@@ -610,7 +582,7 @@
 // used by the adapters.  The code generation happens here because it's very
 // similar to what the adapters have to do.
 
-class AdapterHandlerEntry : public BasicHashtableEntry {
+class AdapterHandlerEntry : public BasicHashtableEntry<mtCode> {
   friend class AdapterHandlerTable;
 
  private:
@@ -647,16 +619,17 @@
   AdapterHandlerEntry();
 
  public:
-  address get_i2c_entry()            { return _i2c_entry; }
-  address get_c2i_entry()            { return _c2i_entry; }
-  address get_c2i_unverified_entry() { return _c2i_unverified_entry; }
+  address get_i2c_entry()            const { return _i2c_entry; }
+  address get_c2i_entry()            const { return _c2i_entry; }
+  address get_c2i_unverified_entry() const { return _c2i_unverified_entry; }
 
+  address base_address();
   void relocate(address new_base);
 
-  AdapterFingerPrint* fingerprint()  { return _fingerprint; }
+  AdapterFingerPrint* fingerprint() const { return _fingerprint; }
 
   AdapterHandlerEntry* next() {
-    return (AdapterHandlerEntry*)BasicHashtableEntry::next();
+    return (AdapterHandlerEntry*)BasicHashtableEntry<mtCode>::next();
   }
 
 #ifdef ASSERT
@@ -665,7 +638,8 @@
   bool compare_code(unsigned char* code, int length, int total_args_passed, BasicType* sig_bt);
 #endif
 
-  void print();
+  //virtual void print_on(outputStream* st) const;  DO NOT USE
+  void print_adapter_on(outputStream* st) const;
 };
 
 class AdapterHandlerLibrary: public AllStatic {
--- a/src/share/vm/runtime/signature.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/signature.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -396,6 +396,8 @@
   enum FailureMode { ReturnNull, CNFException, NCDFError };
   klassOop as_klass(Handle class_loader, Handle protection_domain, FailureMode failure_mode, TRAPS);
   oop as_java_mirror(Handle class_loader, Handle protection_domain, FailureMode failure_mode, TRAPS);
+  const jbyte* raw_bytes()  { return _signature->bytes() + _begin; }
+  int          raw_length() { return _end - _begin; }
 
   // return same as_symbol except allocation of new symbols is avoided.
   Symbol* as_symbol_or_null();
--- a/src/share/vm/runtime/stubCodeGenerator.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/stubCodeGenerator.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -36,7 +36,7 @@
 // Currently, code descriptors are simply chained in a linked list,
 // this may have to change if searching becomes too slow.
 
-class StubCodeDesc: public CHeapObj {
+class StubCodeDesc: public CHeapObj<mtCode> {
  protected:
   static StubCodeDesc* _list;                  // the list of all descriptors
   static int           _count;                 // length of list
--- a/src/share/vm/runtime/stubRoutines.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/stubRoutines.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -53,7 +53,6 @@
 address StubRoutines::_throw_IncompatibleClassChangeError_entry = NULL;
 address StubRoutines::_throw_NullPointerException_at_call_entry = NULL;
 address StubRoutines::_throw_StackOverflowError_entry           = NULL;
-address StubRoutines::_throw_WrongMethodTypeException_entry     = NULL;
 address StubRoutines::_handler_for_unsafe_access_entry          = NULL;
 jint    StubRoutines::_verify_oop_count                         = 0;
 address StubRoutines::_verify_oop_subroutine_entry              = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/stubRoutines.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -130,7 +130,6 @@
   static address _throw_IncompatibleClassChangeError_entry;
   static address _throw_NullPointerException_at_call_entry;
   static address _throw_StackOverflowError_entry;
-  static address _throw_WrongMethodTypeException_entry;
   static address _handler_for_unsafe_access_entry;
 
   static address _atomic_xchg_entry;
@@ -225,6 +224,9 @@
       (_code2 != NULL && _code2->blob_contains(addr)) ;
   }
 
+  static CodeBlob* code1() { return _code1; }
+  static CodeBlob* code2() { return _code2; }
+
   // Debugging
   static jint    verify_oop_count()                        { return _verify_oop_count; }
   static jint*   verify_oop_count_addr()                   { return &_verify_oop_count; }
@@ -254,7 +256,6 @@
   static address throw_IncompatibleClassChangeError_entry(){ return _throw_IncompatibleClassChangeError_entry; }
   static address throw_NullPointerException_at_call_entry(){ return _throw_NullPointerException_at_call_entry; }
   static address throw_StackOverflowError_entry()          { return _throw_StackOverflowError_entry; }
-  static address throw_WrongMethodTypeException_entry()    { return _throw_WrongMethodTypeException_entry; }
 
   // Exceptions during unsafe access - should throw Java exception rather
   // than crash.
--- a/src/share/vm/runtime/sweeper.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/sweeper.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -228,7 +228,7 @@
 #ifdef ASSERT
     if (LogSweeper && _records == NULL) {
       // Create the ring buffer for the logging code
-      _records = NEW_C_HEAP_ARRAY(SweeperRecord, SweeperLogEntries);
+      _records = NEW_C_HEAP_ARRAY(SweeperRecord, SweeperLogEntries, mtGC);
       memset(_records, 0, sizeof(SweeperRecord) * SweeperLogEntries);
     }
 #endif
--- a/src/share/vm/runtime/task.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/task.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,7 @@
 //   ...
 //   pf.disenroll();
 
-class PeriodicTask: public CHeapObj {
+class PeriodicTask: public CHeapObj<mtInternal> {
  public:
   // Useful constants.
   // The interval constants are used to ensure the declared interval
--- a/src/share/vm/runtime/thread.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/thread.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -73,6 +73,7 @@
 #include "runtime/vm_operations.hpp"
 #include "services/attachListener.hpp"
 #include "services/management.hpp"
+#include "services/memTracker.hpp"
 #include "services/threadService.hpp"
 #include "trace/traceEventTypes.hpp"
 #include "utilities/defaultStream.hpp"
@@ -159,6 +160,7 @@
 
 #endif // ndef DTRACE_ENABLED
 
+
 // Class hierarchy
 // - Thread
 //   - VMThread
@@ -168,13 +170,13 @@
 //     - CompilerThread
 
 // ======= Thread ========
-
 // Support for forcing alignment of thread objects for biased locking
-void* Thread::operator new(size_t size) {
+void* Thread::allocate(size_t size, bool throw_excpt, MEMFLAGS flags) {
   if (UseBiasedLocking) {
     const int alignment = markOopDesc::biased_lock_alignment;
     size_t aligned_size = size + (alignment - sizeof(intptr_t));
-    void* real_malloc_addr = CHeapObj::operator new(aligned_size);
+    void* real_malloc_addr = throw_excpt? AllocateHeap(aligned_size, flags, CURRENT_PC)
+                                          : os::malloc(aligned_size, flags, CURRENT_PC);
     void* aligned_addr     = (void*) align_size_up((intptr_t) real_malloc_addr, alignment);
     assert(((uintptr_t) aligned_addr + (uintptr_t) size) <=
            ((uintptr_t) real_malloc_addr + (uintptr_t) aligned_size),
@@ -187,16 +189,17 @@
     ((Thread*) aligned_addr)->_real_malloc_address = real_malloc_addr;
     return aligned_addr;
   } else {
-    return CHeapObj::operator new(size);
+    return throw_excpt? AllocateHeap(size, flags, CURRENT_PC)
+                       : os::malloc(size, flags, CURRENT_PC);
   }
 }
 
 void Thread::operator delete(void* p) {
   if (UseBiasedLocking) {
     void* real_malloc_addr = ((Thread*) p)->_real_malloc_address;
-    CHeapObj::operator delete(real_malloc_addr);
+    FreeHeap(real_malloc_addr, mtThread);
   } else {
-    CHeapObj::operator delete(p);
+    FreeHeap(p, mtThread);
   }
 }
 
@@ -214,8 +217,8 @@
 
   // allocated data structures
   set_osthread(NULL);
-  set_resource_area(new ResourceArea());
-  set_handle_area(new HandleArea(NULL));
+  set_resource_area(new (mtThread)ResourceArea());
+  set_handle_area(new (mtThread) HandleArea(NULL));
   set_active_handles(NULL);
   set_free_handle_block(NULL);
   set_last_handle_mark(NULL);
@@ -306,12 +309,17 @@
 
   // set up any platform-specific state.
   os::initialize_thread();
-
 }
 
 void Thread::record_stack_base_and_size() {
   set_stack_base(os::current_stack_base());
   set_stack_size(os::current_stack_size());
+
+  // record thread's native stack, stack grows downward
+  address vm_base = _stack_base - _stack_size;
+  MemTracker::record_virtual_memory_reserve(vm_base, _stack_size,
+    CURRENT_PC, this);
+  MemTracker::record_virtual_memory_type(vm_base, mtThreadStack);
 }
 
 
@@ -319,6 +327,9 @@
   // Reclaim the objectmonitors from the omFreeList of the moribund thread.
   ObjectSynchronizer::omFlush (this) ;
 
+  MemTracker::record_virtual_memory_release((_stack_base - _stack_size),
+    _stack_size, this);
+
   // deallocate data structures
   delete resource_area();
   // since the handle marks are using the handle area, we have to deallocated the root
@@ -996,6 +1007,29 @@
                                          vmSymbols::void_method_signature(), CHECK);
 }
 
+char java_runtime_name[128] = "";
+
+// extract the JRE name from sun.misc.Version.java_runtime_name
+static const char* get_java_runtime_name(TRAPS) {
+  klassOop k = SystemDictionary::find(vmSymbols::sun_misc_Version(),
+                                      Handle(), Handle(), CHECK_AND_CLEAR_NULL);
+  fieldDescriptor fd;
+  bool found = k != NULL &&
+               instanceKlass::cast(k)->find_local_field(vmSymbols::java_runtime_name_name(),
+                                                        vmSymbols::string_signature(), &fd);
+  if (found) {
+    oop name_oop = k->java_mirror()->obj_field(fd.offset());
+    if (name_oop == NULL)
+      return NULL;
+    const char* name = java_lang_String::as_utf8_string(name_oop,
+                                                        java_runtime_name,
+                                                        sizeof(java_runtime_name));
+    return name;
+  } else {
+    return NULL;
+  }
+}
+
 // General purpose hook into Java code, run once when the VM is initialized.
 // The Java library method itself may be changed independently from the VM.
 static void call_postVMInitHook(TRAPS) {
@@ -1105,14 +1139,14 @@
 
 NamedThread::~NamedThread() {
   if (_name != NULL) {
-    FREE_C_HEAP_ARRAY(char, _name);
+    FREE_C_HEAP_ARRAY(char, _name, mtThread);
     _name = NULL;
   }
 }
 
 void NamedThread::set_name(const char* format, ...) {
   guarantee(_name == NULL, "Only get to set name once.");
-  _name = NEW_C_HEAP_ARRAY(char, max_name_len);
+  _name = NEW_C_HEAP_ARRAY(char, max_name_len, mtThread);
   guarantee(_name != NULL, "alloc failure");
   va_list ap;
   va_start(ap, format);
@@ -1295,6 +1329,7 @@
   set_monitor_chunks(NULL);
   set_next(NULL);
   set_thread_state(_thread_new);
+  set_recorder(NULL);
   _terminated = _not_terminated;
   _privileged_stack_top = NULL;
   _array_for_gc = NULL;
@@ -1370,6 +1405,7 @@
     _jni_attach_state = _not_attaching_via_jni;
   }
   assert(_deferred_card_mark.is_empty(), "Default MemRegion ctor");
+  _safepoint_visible = false;
 }
 
 bool JavaThread::reguard_stack(address cur_sp) {
@@ -1432,7 +1468,7 @@
   thr_type = entry_point == &compiler_thread_entry ? os::compiler_thread :
                                                      os::java_thread;
   os::create_thread(this, thr_type, stack_sz);
-
+  _safepoint_visible = false;
   // The _osthread may be NULL here because we ran out of memory (too many threads active).
   // We need to throw and OutOfMemoryError - however we cannot do this here because the caller
   // may hold a lock and all locks must be unlocked before throwing the exception (throwing
@@ -1450,6 +1486,11 @@
       tty->print_cr("terminate thread %p", this);
   }
 
+  // Info NMT that this JavaThread is exiting, its memory
+  // recorder should be collected
+  assert(!is_safepoint_visible(), "wrong state");
+  MemTracker::thread_exiting(this);
+
   // JSR166 -- return the parker to the free list
   Parker::Release(_parker);
   _parker = NULL ;
@@ -2892,7 +2933,7 @@
 void JavaThread::popframe_preserve_args(ByteSize size_in_bytes, void* start) {
   assert(_popframe_preserved_args == NULL, "should not wipe out old PopFrame preserved arguments");
   if (in_bytes(size_in_bytes) != 0) {
-    _popframe_preserved_args = NEW_C_HEAP_ARRAY(char, in_bytes(size_in_bytes));
+    _popframe_preserved_args = NEW_C_HEAP_ARRAY(char, in_bytes(size_in_bytes), mtThread);
     _popframe_preserved_args_size = in_bytes(size_in_bytes);
     Copy::conjoint_jbytes(start, _popframe_preserved_args, _popframe_preserved_args_size);
   }
@@ -2914,7 +2955,7 @@
 
 void JavaThread::popframe_free_preserved_args() {
   assert(_popframe_preserved_args != NULL, "should not free PopFrame preserved arguments twice");
-  FREE_C_HEAP_ARRAY(char, (char*) _popframe_preserved_args);
+  FREE_C_HEAP_ARRAY(char, (char*) _popframe_preserved_args, mtThread);
   _popframe_preserved_args = NULL;
   _popframe_preserved_args_size = 0;
 }
@@ -3163,6 +3204,14 @@
   jint os_init_2_result = os::init_2();
   if (os_init_2_result != JNI_OK) return os_init_2_result;
 
+  // intialize TLS
+  ThreadLocalStorage::init();
+
+  // Bootstrap native memory tracking, so it can start recording memory
+  // activities before worker thread is started. This is the first phase
+  // of bootstrapping, VM is currently running in single-thread mode.
+  MemTracker::bootstrap_single_thread();
+
   // Initialize output stream logging
   ostream_init_log();
 
@@ -3182,9 +3231,6 @@
   _number_of_threads = 0;
   _number_of_non_daemon_threads = 0;
 
-  // Initialize TLS
-  ThreadLocalStorage::init();
-
   // Initialize global data structures and create system classes in heap
   vm_init_globals();
 
@@ -3216,6 +3262,9 @@
   // Initialize Java-Level synchronization subsystem
   ObjectMonitor::Initialize() ;
 
+  // Second phase of bootstrapping, VM is about entering multi-thread mode
+  MemTracker::bootstrap_multi_thread();
+
   // Initialize global modules
   jint status = init_globals();
   if (status != JNI_OK) {
@@ -3243,6 +3292,9 @@
     Universe::verify();   // make sure we're starting with a clean slate
   }
 
+  // Fully start NMT
+  MemTracker::start();
+
   // Create the VMThread
   { TraceTime timer("Start VMThread", TraceStartupTime);
     VMThread::create();
@@ -3352,6 +3404,9 @@
       // The VM creates & returns objects of this class. Make sure it's initialized.
       initialize_class(vmSymbols::java_lang_Class(), CHECK_0);
       call_initializeSystemClass(CHECK_0);
+
+      // get the Java runtime name after java.lang.System is initialized
+      JDK_Version::set_runtime_name(get_java_runtime_name(THREAD));
     } else {
       warning("java.lang.System not initialized");
     }
@@ -3468,13 +3523,13 @@
     create_vm_init_libraries();
   }
 
+  // Notify JVMTI agents that VM initialization is complete - nop if no agents.
+  JvmtiExport::post_vm_initialized();
+
   if (!TRACE_START()) {
     vm_exit_during_initialization(Handle(THREAD, PENDING_EXCEPTION));
   }
 
-  // Notify JVMTI agents that VM initialization is complete - nop if no agents.
-  JvmtiExport::post_vm_initialized();
-
   if (CleanChunkPoolAsync) {
     Chunk::start_chunk_pool_cleaner_task();
   }
@@ -3544,11 +3599,11 @@
       if (library == NULL) {
         const char *sub_msg = " in absolute path, with error: ";
         size_t len = strlen(msg) + strlen(name) + strlen(sub_msg) + strlen(ebuf) + 1;
-        char *buf = NEW_C_HEAP_ARRAY(char, len);
+        char *buf = NEW_C_HEAP_ARRAY(char, len, mtThread);
         jio_snprintf(buf, len, "%s%s%s%s", msg, name, sub_msg, ebuf);
         // If we can't find the agent, exit.
         vm_exit_during_initialization(buf, NULL);
-        FREE_C_HEAP_ARRAY(char, buf);
+        FREE_C_HEAP_ARRAY(char, buf, mtThread);
       }
     } else {
       // Try to load the agent from the standard dll directory
@@ -3562,7 +3617,7 @@
         const char *fmt   = "%s/bin/java %s -Dkernel.background.download=false"
                       " sun.jkernel.DownloadManager -download client_jvm";
         size_t length = strlen(props) + strlen(home) + strlen(fmt) + 1;
-        char *cmd = NEW_C_HEAP_ARRAY(char, length);
+        char *cmd = NEW_C_HEAP_ARRAY(char, length, mtThread);
         jio_snprintf(cmd, length, fmt, home, props);
         int status = os::fork_and_exec(cmd);
         FreeHeap(props);
@@ -3571,7 +3626,7 @@
           vm_exit_during_initialization("fork_and_exec failed: %s",
                                          strerror(errno));
         }
-        FREE_C_HEAP_ARRAY(char, cmd);
+        FREE_C_HEAP_ARRAY(char, cmd, mtThread);
         // when this comes back the instrument.dll should be where it belongs.
         library = os::dll_load(buffer, ebuf, sizeof ebuf);
       }
@@ -3583,11 +3638,11 @@
         if (library == NULL) {
           const char *sub_msg = " on the library path, with error: ";
           size_t len = strlen(msg) + strlen(name) + strlen(sub_msg) + strlen(ebuf) + 1;
-          char *buf = NEW_C_HEAP_ARRAY(char, len);
+          char *buf = NEW_C_HEAP_ARRAY(char, len, mtThread);
           jio_snprintf(buf, len, "%s%s%s%s", msg, name, sub_msg, ebuf);
           // If we can't find the agent, exit.
           vm_exit_during_initialization(buf, NULL);
-          FREE_C_HEAP_ARRAY(char, buf);
+          FREE_C_HEAP_ARRAY(char, buf, mtThread);
         }
       }
     }
@@ -3756,6 +3811,7 @@
 // and VM_Exit op at VM level.
 //
 // Shutdown sequence:
+//   + Shutdown native memory tracking if it is on
 //   + Wait until we are the last non-daemon thread to execute
 //     <-- every thing is still working at this moment -->
 //   + Call java.lang.Shutdown.shutdown(), which will invoke Java level
@@ -3801,6 +3857,10 @@
                          Mutex::_as_suspend_equivalent_flag);
   }
 
+  // Shutdown NMT before exit. Otherwise,
+  // it will run into trouble when system destroys static variables.
+  MemTracker::shutdown(MemTracker::NMT_normal);
+
   // Hang forever on exit if we are reporting an error.
   if (ShowMessageBoxOnError && is_error_reported()) {
     os::infinite_sleep();
@@ -3907,6 +3967,8 @@
     daemon = false;
   }
 
+  p->set_safepoint_visible(true);
+
   ThreadService::add_thread(p, daemon);
 
   // Possible GC point.
@@ -3952,6 +4014,10 @@
     // to do callbacks into the safepoint code. However, the safepoint code is not aware
     // of this thread since it is removed from the queue.
     p->set_terminated_value();
+
+    // Now, this thread is not visible to safepoint
+    p->set_safepoint_visible(false);
+
   } // unlock Threads_lock
 
   // Since Events::log uses a lock, we grab it outside the Threads_lock
--- a/src/share/vm/runtime/thread.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/thread.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -41,6 +41,7 @@
 #include "runtime/stubRoutines.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/unhandledOops.hpp"
+#include "services/memRecorder.hpp"
 #include "trace/tracing.hpp"
 #include "utilities/exceptions.hpp"
 #include "utilities/top.hpp"
@@ -100,12 +101,16 @@
   //oop       _pending_exception;                // pending exception for current thread
   // const char* _exception_file;                   // file information for exception (debugging only)
   // int         _exception_line;                   // line information for exception (debugging only)
-
+ protected:
   // Support for forcing alignment of thread objects for biased locking
   void*       _real_malloc_address;
  public:
-  void* operator new(size_t size);
+  void* operator new(size_t size) { return allocate(size, true); }
+  void* operator new(size_t size, std::nothrow_t& nothrow_constant) { return allocate(size, false); }
   void  operator delete(void* p);
+
+ protected:
+   static void* allocate(size_t size, bool throw_excpt, MEMFLAGS flags = mtThread);
  private:
 
   // ***************************************************************
@@ -548,7 +553,6 @@
   virtual void print_on_error(outputStream* st, char* buf, int buflen) const;
 
   // Debug-only code
-
 #ifdef ASSERT
  private:
   // Deadlock detection support for Mutex locks. List of locks own by thread.
@@ -1027,9 +1031,15 @@
   bool do_not_unlock_if_synchronized()             { return _do_not_unlock_if_synchronized; }
   void set_do_not_unlock_if_synchronized(bool val) { _do_not_unlock_if_synchronized = val; }
 
+  // native memory tracking
+  inline MemRecorder* get_recorder() const          { return (MemRecorder*)_recorder; }
+  inline void         set_recorder(MemRecorder* rc) { _recorder = (volatile MemRecorder*)rc; }
+
+ private:
+  // per-thread memory recorder
+  volatile MemRecorder* _recorder;
 
   // Suspend/resume support for JavaThread
-
  private:
   void set_ext_suspended()       { set_suspend_flag (_ext_suspended);  }
   void clear_ext_suspended()     { clear_suspend_flag(_ext_suspended); }
@@ -1453,6 +1463,18 @@
      return result;
    }
 
+ // NMT (Native memory tracking) support.
+ // This flag helps NMT to determine if this JavaThread will be blocked
+ // at safepoint. If not, ThreadCritical is needed for writing memory records.
+ // JavaThread is only safepoint visible when it is in Threads' thread list,
+ // it is not visible until it is added to the list and becomes invisible
+ // once it is removed from the list.
+ public:
+  bool is_safepoint_visible() const { return _safepoint_visible; }
+  void set_safepoint_visible(bool visible) { _safepoint_visible = visible; }
+ private:
+  bool _safepoint_visible;
+
   // Static operations
  public:
   // Returns the running thread as a JavaThread
--- a/src/share/vm/runtime/unhandledOops.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/unhandledOops.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -37,7 +37,7 @@
 
 UnhandledOops::UnhandledOops(Thread* thread) {
   _thread = thread;
-  _oop_list = new (ResourceObj::C_HEAP)
+  _oop_list = new (ResourceObj::C_HEAP, mtInternal)
                     GrowableArray<UnhandledOopEntry>(free_list_size, true);
   _level = 0;
 }
--- a/src/share/vm/runtime/vframe.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vframe.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -410,8 +410,9 @@
               Klass::cast(method()->method_holder())
                  ->is_subclass_of(SystemDictionary::reflect_MethodAccessorImpl_klass())) {
       // This is an auxilary frame -- skip it
-    } else if (method()->is_method_handle_adapter()) {
-      // This is an internal adapter frame from the MethodHandleCompiler -- skip it
+    } else if (method()->is_method_handle_intrinsic() ||
+               method()->is_compiled_lambda_form()) {
+      // This is an internal adapter frame for method handles -- skip it
     } else {
       // This is non-excluded frame, we need to count it against the depth
       if (depth-- <= 0) {
--- a/src/share/vm/runtime/vframeArray.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vframeArray.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "interpreter/bytecode.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
@@ -443,7 +444,7 @@
   // Allocate the vframeArray
   vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
                                                      sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part
-                                                     "vframeArray::allocate");
+                                                     mtCompiler);
   result->_frames = chunk->length();
   result->_owner_thread = thread;
   result->_sender = sender;
@@ -510,7 +511,8 @@
   //  in the above picture.
 
   // Find the skeletal interpreter frames to unpack into
-  RegisterMap map(JavaThread::current(), false);
+  JavaThread* THREAD = JavaThread::current();
+  RegisterMap map(THREAD, false);
   // Get the youngest frame we will unpack (last to be unpacked)
   frame me = unpack_frame.sender(&map);
   int index;
@@ -520,29 +522,37 @@
     me = me.sender(&map);
   }
 
+  // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
+  // Unpack the frames from the oldest (frames() -1) to the youngest (0)
   frame caller_frame = me;
-
-  // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
-
-  // Unpack the frames from the oldest (frames() -1) to the youngest (0)
-
   for (index = frames() - 1; index >= 0 ; index--) {
-    int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters();
-    int callee_locals     = index == 0 ? 0 : element(index-1)->method()->max_locals();
-    element(index)->unpack_on_stack(caller_actual_parameters,
-                                    callee_parameters,
-                                    callee_locals,
-                                    &caller_frame,
-                                    index == 0,
-                                    exec_mode);
+    vframeArrayElement* elem = element(index);  // caller
+    int callee_parameters, callee_locals;
+    if (index == 0) {
+      callee_parameters = callee_locals = 0;
+    } else {
+      methodHandle caller = elem->method();
+      methodHandle callee = element(index - 1)->method();
+      Bytecode_invoke inv(caller, elem->bci());
+      // invokedynamic instructions don't have a class but obviously don't have a MemberName appendix.
+      // NOTE:  Use machinery here that avoids resolving of any kind.
+      const bool has_member_arg =
+          !inv.is_invokedynamic() && MethodHandles::has_member_arg(inv.klass(), inv.name());
+      callee_parameters = callee->size_of_parameters() + (has_member_arg ? 1 : 0);
+      callee_locals     = callee->max_locals();
+    }
+    elem->unpack_on_stack(caller_actual_parameters,
+                          callee_parameters,
+                          callee_locals,
+                          &caller_frame,
+                          index == 0,
+                          exec_mode);
     if (index == frames() - 1) {
-      Deoptimization::unwind_callee_save_values(element(index)->iframe(), this);
+      Deoptimization::unwind_callee_save_values(elem->iframe(), this);
     }
-    caller_frame = *element(index)->iframe();
+    caller_frame = *elem->iframe();
     caller_actual_parameters = callee_parameters;
   }
-
-
   deallocate_monitor_chunks();
 }
 
--- a/src/share/vm/runtime/vframeArray.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vframeArray.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -108,7 +108,7 @@
 // but it does make debugging easier even if we can't look
 // at the data in each vframeElement
 
-class vframeArray: public CHeapObj {
+class vframeArray: public CHeapObj<mtCompiler> {
   friend class VMStructs;
 
  private:
--- a/src/share/vm/runtime/vframe_hp.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vframe_hp.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -154,7 +154,7 @@
   } else {
     // No deferred updates pending for this thread.
     // allocate in C heap
-    deferred =  new(ResourceObj::C_HEAP) GrowableArray<jvmtiDeferredLocalVariableSet*> (1, true);
+    deferred =  new(ResourceObj::C_HEAP, mtCompiler) GrowableArray<jvmtiDeferredLocalVariableSet*> (1, true);
     thread()->set_deferred_locals(deferred);
   }
   deferred->push(new jvmtiDeferredLocalVariableSet(method(), bci(), fr().id()));
@@ -323,7 +323,7 @@
   _bci = bci;
   _id = id;
   // Alway will need at least one, must be on C heap
-  _locals = new(ResourceObj::C_HEAP) GrowableArray<jvmtiDeferredLocalVariable*> (1, true);
+  _locals = new(ResourceObj::C_HEAP, mtCompiler) GrowableArray<jvmtiDeferredLocalVariable*> (1, true);
 }
 
 jvmtiDeferredLocalVariableSet::~jvmtiDeferredLocalVariableSet() {
--- a/src/share/vm/runtime/vframe_hp.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vframe_hp.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -89,7 +89,7 @@
 // any updated locals.
 
 class jvmtiDeferredLocalVariable;
-class jvmtiDeferredLocalVariableSet : public CHeapObj {
+class jvmtiDeferredLocalVariableSet : public CHeapObj<mtCompiler> {
 private:
 
   methodOop _method;           // must be GC'd
@@ -119,7 +119,7 @@
 
 };
 
-class jvmtiDeferredLocalVariable : public CHeapObj {
+class jvmtiDeferredLocalVariable : public CHeapObj<mtCompiler> {
   public:
 
     jvmtiDeferredLocalVariable(int index, BasicType type, jvalue value);
--- a/src/share/vm/runtime/virtualspace.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/virtualspace.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -26,6 +26,7 @@
 #include "oops/markOop.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/virtualspace.hpp"
+#include "services/memTracker.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
 #endif
@@ -489,6 +490,10 @@
                 (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
                  Universe::narrow_oop_use_implicit_null_checks()) ?
                   lcm(os::vm_page_size(), alignment) : 0) {
+  if (base() > 0) {
+    MemTracker::record_virtual_memory_type((address)base(), mtJavaHeap);
+  }
+
   // Only reserved space for the java heap should have a noaccess_prefix
   // if using compressed oops.
   protect_noaccess_prefix(size);
@@ -504,6 +509,10 @@
                 (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
                  Universe::narrow_oop_use_implicit_null_checks()) ?
                   lcm(os::vm_page_size(), prefix_align) : 0) {
+  if (base() > 0) {
+    MemTracker::record_virtual_memory_type((address)base(), mtJavaHeap);
+  }
+
   protect_noaccess_prefix(prefix_size+suffix_size);
 }
 
@@ -513,6 +522,7 @@
                                      size_t rs_align,
                                      bool large) :
   ReservedSpace(r_size, rs_align, large, /*executable*/ true) {
+  MemTracker::record_virtual_memory_type((address)base(), mtCode);
 }
 
 // VirtualSpace
--- a/src/share/vm/runtime/vmStructs.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vmStructs.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -44,7 +44,6 @@
 #include "code/vmreg.hpp"
 #include "compiler/oopMap.hpp"
 #include "compiler/compileBroker.hpp"
-#include "gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp"
 #include "gc_implementation/shared/immutableSpace.hpp"
 #include "gc_implementation/shared/markSweep.hpp"
 #include "gc_implementation/shared/mutableSpace.hpp"
@@ -55,6 +54,7 @@
 #include "memory/cardTableRS.hpp"
 #include "memory/compactPermGen.hpp"
 #include "memory/defNewGeneration.hpp"
+#include "memory/freeBlockDictionary.hpp"
 #include "memory/genCollectedHeap.hpp"
 #include "memory/generation.hpp"
 #include "memory/generationSpec.hpp"
@@ -235,7 +235,6 @@
 #ifndef REG_COUNT
   #define REG_COUNT 0
 #endif
-
 // whole purpose of this function is to work around bug c++/27724 in gcc 4.1.1
 // with optimization turned on it doesn't affect produced code
 static inline uint64_t cast_uint64_t(size_t x)
@@ -244,6 +243,16 @@
 }
 
 
+typedef HashtableEntry<intptr_t, mtInternal>  IntptrHashtableEntry;
+typedef Hashtable<intptr_t, mtInternal>       IntptrHashtable;
+typedef Hashtable<Symbol*, mtSymbol>          SymbolHashtable;
+typedef HashtableEntry<Symbol*, mtClass>      SymbolHashtableEntry;
+typedef Hashtable<oop, mtSymbol>              StringHashtable;
+typedef TwoOopHashtable<klassOop, mtClass>    klassOopTwoOopHashtable;
+typedef Hashtable<klassOop, mtClass>          klassOopHashtable;
+typedef HashtableEntry<klassOop, mtClass>     klassHashtableEntry;
+typedef TwoOopHashtable<Symbol*, mtClass>     SymbolTwoOopHashtable;
+
 //--------------------------------------------------------------------------------
 // VM_STRUCTS
 //
@@ -292,8 +301,6 @@
   nonstatic_field(instanceKlass,               _method_ordering,                              typeArrayOop)                          \
   nonstatic_field(instanceKlass,               _local_interfaces,                             objArrayOop)                           \
   nonstatic_field(instanceKlass,               _transitive_interfaces,                        objArrayOop)                           \
-  nonstatic_field(instanceKlass,               _nof_implementors,                             int)                                   \
-  nonstatic_field(instanceKlass,               _implementors[0],                              klassOop)                              \
   nonstatic_field(instanceKlass,               _fields,                                       typeArrayOop)                          \
   nonstatic_field(instanceKlass,               _java_fields_count,                            u2)                                    \
   nonstatic_field(instanceKlass,               _constants,                                    constantPoolOop)                       \
@@ -360,7 +367,6 @@
   nonstatic_field(methodDataOopDesc,           _arg_stack,                                    intx)                                  \
   nonstatic_field(methodDataOopDesc,           _arg_returned,                                 intx)                                  \
   nonstatic_field(methodOopDesc,               _constMethod,                                  constMethodOop)                        \
-  nonstatic_field(methodOopDesc,               _constants,                                    constantPoolOop)                       \
   nonstatic_field(methodOopDesc,               _method_data,                                  methodDataOop)                         \
   nonstatic_field(methodOopDesc,               _interpreter_invocation_count,                 int)                                   \
   nonstatic_field(methodOopDesc,               _access_flags,                                 AccessFlags)                           \
@@ -380,9 +386,8 @@
   volatile_nonstatic_field(methodOopDesc,      _from_compiled_entry,                          address)                               \
   volatile_nonstatic_field(methodOopDesc,      _from_interpreted_entry,                       address)                               \
   volatile_nonstatic_field(constMethodOopDesc, _fingerprint,                                  uint64_t)                              \
-  nonstatic_field(constMethodOopDesc,          _method,                                       methodOop)                             \
+  nonstatic_field(constMethodOopDesc,          _constants,                                    constantPoolOop)                       \
   nonstatic_field(constMethodOopDesc,          _stackmap_data,                                typeArrayOop)                          \
-  nonstatic_field(constMethodOopDesc,          _exception_table,                              typeArrayOop)                          \
   nonstatic_field(constMethodOopDesc,          _constMethod_size,                             int)                                   \
   nonstatic_field(constMethodOopDesc,          _interpreter_kind,                             jbyte)                                 \
   nonstatic_field(constMethodOopDesc,          _flags,                                        jbyte)                                 \
@@ -419,6 +424,10 @@
   nonstatic_field(LocalVariableTableElement,   descriptor_cp_index,                           u2)                                    \
   nonstatic_field(LocalVariableTableElement,   signature_cp_index,                            u2)                                    \
   nonstatic_field(LocalVariableTableElement,   slot,                                          u2)                                    \
+  nonstatic_field(ExceptionTableElement,       start_pc,                                      u2)                                    \
+  nonstatic_field(ExceptionTableElement,       end_pc,                                        u2)                                    \
+  nonstatic_field(ExceptionTableElement,       handler_pc,                                    u2)                                    \
+  nonstatic_field(ExceptionTableElement,       catch_type_index,                              u2)                                    \
   nonstatic_field(BreakpointInfo,              _orig_bytecode,                                Bytecodes::Code)                       \
   nonstatic_field(BreakpointInfo,              _bci,                                          int)                                   \
   nonstatic_field(BreakpointInfo,              _name_index,                                   u2)                                    \
@@ -714,26 +723,26 @@
   /* HashtableBucket */                                                                                                              \
   /*******************/                                                                                                              \
                                                                                                                                      \
-  nonstatic_field(HashtableBucket,             _entry,                                        BasicHashtableEntry*)                  \
+  nonstatic_field(HashtableBucket<mtInternal>,  _entry,                                        BasicHashtableEntry<mtInternal>*)     \
                                                                                                                                      \
   /******************/                                                                                                               \
   /* HashtableEntry */                                                                                                               \
   /******************/                                                                                                               \
                                                                                                                                      \
-  nonstatic_field(BasicHashtableEntry,         _next,                                         BasicHashtableEntry*)                  \
-  nonstatic_field(BasicHashtableEntry,         _hash,                                         unsigned int)                          \
-  nonstatic_field(HashtableEntry<intptr_t>,    _literal,                                      intptr_t) \
+  nonstatic_field(BasicHashtableEntry<mtInternal>, _next,                                     BasicHashtableEntry<mtInternal>*)      \
+  nonstatic_field(BasicHashtableEntry<mtInternal>, _hash,                                     unsigned int)                          \
+  nonstatic_field(IntptrHashtableEntry,            _literal,                                  intptr_t)                              \
                                                                                                                                      \
   /*************/                                                                                                                    \
   /* Hashtable */                                                                                                                    \
   /*************/                                                                                                                    \
                                                                                                                                      \
-  nonstatic_field(BasicHashtable,              _table_size,                                   int)                                   \
-  nonstatic_field(BasicHashtable,              _buckets,                                      HashtableBucket*)                      \
-  nonstatic_field(BasicHashtable,              _free_list,                                    BasicHashtableEntry*)                  \
-  nonstatic_field(BasicHashtable,              _first_free_entry,                             char*)                                 \
-  nonstatic_field(BasicHashtable,              _end_block,                                    char*)                                 \
-  nonstatic_field(BasicHashtable,              _entry_size,                                   int)                                   \
+  nonstatic_field(BasicHashtable<mtInternal>, _table_size,                                   int)                                   \
+  nonstatic_field(BasicHashtable<mtInternal>, _buckets,                                      HashtableBucket<mtInternal>*)          \
+  nonstatic_field(BasicHashtable<mtInternal>, _free_list,                                    BasicHashtableEntry<mtInternal>*)      \
+  nonstatic_field(BasicHashtable<mtInternal>, _first_free_entry,                             char*)                                 \
+  nonstatic_field(BasicHashtable<mtInternal>, _end_block,                                    char*)                                 \
+  nonstatic_field(BasicHashtable<mtInternal>, _entry_size,                                   int)                                   \
                                                                                                                                      \
   /*******************/                                                                                                              \
   /* DictionaryEntry */                                                                                                              \
@@ -827,13 +836,6 @@
   /* CodeBlobs (NOTE: incomplete, but only a little) */                                                                              \
   /***************************************************/                                                                              \
                                                                                                                                      \
-  X86_ONLY(nonstatic_field(MethodHandles::RicochetFrame, _sender_pc,                                     address))                   \
-  X86_ONLY(nonstatic_field(MethodHandles::RicochetFrame, _exact_sender_sp,                              intptr_t*))                  \
-  X86_ONLY(nonstatic_field(MethodHandles::RicochetFrame, _sender_link,                                  intptr_t*))                  \
-  X86_ONLY(nonstatic_field(MethodHandles::RicochetFrame, _saved_args_base,                              intptr_t*))                  \
-                                                                                                                                     \
-     static_field(SharedRuntime,               _ricochet_blob,                                RicochetBlob*)                         \
-                                                                                                                                     \
   nonstatic_field(CodeBlob,                    _name,                                         const char*)                           \
   nonstatic_field(CodeBlob,                    _size,                                         int)                                   \
   nonstatic_field(CodeBlob,                    _header_size,                                  int)                                   \
@@ -878,11 +880,8 @@
   nonstatic_field(nmethod,             _compile_id,                                   int)                                   \
   nonstatic_field(nmethod,             _exception_cache,                              ExceptionCache*)                       \
   nonstatic_field(nmethod,             _marked_for_deoptimization,                    bool)                                  \
-                                                                                                                                     \
-  nonstatic_field(RicochetBlob,        _bounce_offset,                                int)                                           \
-  nonstatic_field(RicochetBlob,        _exception_offset,                             int)                                           \
-                                                                                                                                     \
-  unchecked_c2_static_field(Deoptimization,         _trap_reason_name,                   void*)                                         \
+                                                                                                                             \
+  unchecked_c2_static_field(Deoptimization,         _trap_reason_name,                   void*)                              \
                                                                                                                                      \
   /********************************/                                                                                                 \
   /* JavaCalls (NOTE: incomplete) */                                                                                                 \
@@ -1454,6 +1453,7 @@
                                                                           \
   declare_toplevel_type(CheckedExceptionElement)                          \
   declare_toplevel_type(LocalVariableTableElement)                        \
+  declare_toplevel_type(ExceptionTableElement)                            \
                                                                           \
   /******************************************/                            \
   /* Generation and space hierarchies       */                            \
@@ -1541,20 +1541,20 @@
   /* SymbolTable, SystemDictionary */                                     \
   /*********************************/                                     \
                                                                           \
-  declare_toplevel_type(BasicHashtable)                                   \
-    declare_type(Hashtable<intptr_t>, BasicHashtable)                     \
-  declare_type(SymbolTable, Hashtable<Symbol*>)                           \
-  declare_type(StringTable, Hashtable<oop>)                               \
-    declare_type(LoaderConstraintTable, Hashtable<klassOop>)              \
-    declare_type(TwoOopHashtable<klassOop>, Hashtable<klassOop>)          \
-    declare_type(Dictionary, TwoOopHashtable<klassOop>)                   \
-    declare_type(PlaceholderTable, TwoOopHashtable<Symbol*>)              \
-  declare_toplevel_type(BasicHashtableEntry)                              \
-  declare_type(HashtableEntry<intptr_t>, BasicHashtableEntry)             \
-    declare_type(DictionaryEntry, HashtableEntry<klassOop>)               \
-    declare_type(PlaceholderEntry, HashtableEntry<Symbol*>)               \
-    declare_type(LoaderConstraintEntry, HashtableEntry<klassOop>)         \
-  declare_toplevel_type(HashtableBucket)                                  \
+  declare_toplevel_type(BasicHashtable<mtInternal>)                       \
+    declare_type(IntptrHashtable, BasicHashtable<mtInternal>)             \
+  declare_type(SymbolTable, SymbolHashtable)                              \
+  declare_type(StringTable, StringHashtable)                              \
+    declare_type(LoaderConstraintTable, klassOopHashtable)                \
+    declare_type(klassOopTwoOopHashtable, klassOopHashtable)              \
+    declare_type(Dictionary, klassOopTwoOopHashtable)                     \
+    declare_type(PlaceholderTable, SymbolTwoOopHashtable)                 \
+  declare_toplevel_type(BasicHashtableEntry<mtInternal>)                  \
+  declare_type(IntptrHashtableEntry, BasicHashtableEntry<mtInternal>)     \
+    declare_type(DictionaryEntry, klassHashtableEntry)                    \
+    declare_type(PlaceholderEntry, SymbolHashtableEntry)                  \
+    declare_type(LoaderConstraintEntry, klassHashtableEntry)              \
+  declare_toplevel_type(HashtableBucket<mtInternal>)                      \
   declare_toplevel_type(SystemDictionary)                                 \
   declare_toplevel_type(vmSymbols)                                        \
   declare_toplevel_type(ProtectionDomainEntry)                            \
@@ -1623,7 +1623,6 @@
   /*************************************************************/         \
                                                                           \
   declare_toplevel_type(SharedRuntime)                                    \
-  X86_ONLY(declare_toplevel_type(MethodHandles::RicochetFrame))           \
                                                                           \
   declare_toplevel_type(CodeBlob)                                         \
   declare_type(BufferBlob,               CodeBlob)                        \
@@ -1634,7 +1633,6 @@
   declare_type(SingletonBlob,            CodeBlob)                        \
   declare_type(SafepointBlob,            SingletonBlob)                   \
   declare_type(DeoptimizationBlob,       SingletonBlob)                   \
-  declare_type(RicochetBlob,             SingletonBlob)                   \
   declare_c2_type(ExceptionBlob,         SingletonBlob)                   \
   declare_c2_type(UncommonTrapBlob,      CodeBlob)                        \
                                                                           \
@@ -1878,7 +1876,6 @@
   declare_c2_type(StoreNNode, StoreNode)                                  \
   declare_c2_type(StoreCMNode, StoreNode)                                 \
   declare_c2_type(LoadPLockedNode, LoadPNode)                             \
-  declare_c2_type(LoadLLockedNode, LoadLNode)                             \
   declare_c2_type(SCMemProjNode, ProjNode)                                \
   declare_c2_type(LoadStoreNode, Node)                                    \
   declare_c2_type(StorePConditionalNode, LoadStoreNode)                   \
@@ -1948,14 +1945,12 @@
   declare_c2_type(ReverseBytesLNode, Node)                                \
   declare_c2_type(VectorNode, Node)                                       \
   declare_c2_type(AddVBNode, VectorNode)                                  \
-  declare_c2_type(AddVCNode, VectorNode)                                  \
   declare_c2_type(AddVSNode, VectorNode)                                  \
   declare_c2_type(AddVINode, VectorNode)                                  \
   declare_c2_type(AddVLNode, VectorNode)                                  \
   declare_c2_type(AddVFNode, VectorNode)                                  \
   declare_c2_type(AddVDNode, VectorNode)                                  \
   declare_c2_type(SubVBNode, VectorNode)                                  \
-  declare_c2_type(SubVCNode, VectorNode)                                  \
   declare_c2_type(SubVSNode, VectorNode)                                  \
   declare_c2_type(SubVINode, VectorNode)                                  \
   declare_c2_type(SubVLNode, VectorNode)                                  \
@@ -1966,73 +1961,33 @@
   declare_c2_type(DivVFNode, VectorNode)                                  \
   declare_c2_type(DivVDNode, VectorNode)                                  \
   declare_c2_type(LShiftVBNode, VectorNode)                               \
-  declare_c2_type(LShiftVCNode, VectorNode)                               \
   declare_c2_type(LShiftVSNode, VectorNode)                               \
   declare_c2_type(LShiftVINode, VectorNode)                               \
-  declare_c2_type(URShiftVBNode, VectorNode)                              \
-  declare_c2_type(URShiftVCNode, VectorNode)                              \
-  declare_c2_type(URShiftVSNode, VectorNode)                              \
-  declare_c2_type(URShiftVINode, VectorNode)                              \
+  declare_c2_type(RShiftVBNode, VectorNode)                               \
+  declare_c2_type(RShiftVSNode, VectorNode)                               \
+  declare_c2_type(RShiftVINode, VectorNode)                               \
   declare_c2_type(AndVNode, VectorNode)                                   \
   declare_c2_type(OrVNode, VectorNode)                                    \
   declare_c2_type(XorVNode, VectorNode)                                   \
-  declare_c2_type(VectorLoadNode, LoadNode)                               \
-  declare_c2_type(Load16BNode, VectorLoadNode)                            \
-  declare_c2_type(Load8BNode, VectorLoadNode)                             \
-  declare_c2_type(Load4BNode, VectorLoadNode)                             \
-  declare_c2_type(Load8CNode, VectorLoadNode)                             \
-  declare_c2_type(Load4CNode, VectorLoadNode)                             \
-  declare_c2_type(Load2CNode, VectorLoadNode)                             \
-  declare_c2_type(Load8SNode, VectorLoadNode)                             \
-  declare_c2_type(Load4SNode, VectorLoadNode)                             \
-  declare_c2_type(Load2SNode, VectorLoadNode)                             \
-  declare_c2_type(Load4INode, VectorLoadNode)                             \
-  declare_c2_type(Load2INode, VectorLoadNode)                             \
-  declare_c2_type(Load2LNode, VectorLoadNode)                             \
-  declare_c2_type(Load4FNode, VectorLoadNode)                             \
-  declare_c2_type(Load2FNode, VectorLoadNode)                             \
-  declare_c2_type(Load2DNode, VectorLoadNode)                             \
-  declare_c2_type(VectorStoreNode, StoreNode)                             \
-  declare_c2_type(Store16BNode, VectorStoreNode)                          \
-  declare_c2_type(Store8BNode, VectorStoreNode)                           \
-  declare_c2_type(Store4BNode, VectorStoreNode)                           \
-  declare_c2_type(Store8CNode, VectorStoreNode)                           \
-  declare_c2_type(Store4CNode, VectorStoreNode)                           \
-  declare_c2_type(Store2CNode, VectorStoreNode)                           \
-  declare_c2_type(Store4INode, VectorStoreNode)                           \
-  declare_c2_type(Store2INode, VectorStoreNode)                           \
-  declare_c2_type(Store2LNode, VectorStoreNode)                           \
-  declare_c2_type(Store4FNode, VectorStoreNode)                           \
-  declare_c2_type(Store2FNode, VectorStoreNode)                           \
-  declare_c2_type(Store2DNode, VectorStoreNode)                           \
-  declare_c2_type(Replicate16BNode, VectorNode)                           \
-  declare_c2_type(Replicate8BNode, VectorNode)                            \
-  declare_c2_type(Replicate4BNode, VectorNode)                            \
-  declare_c2_type(Replicate8CNode, VectorNode)                            \
-  declare_c2_type(Replicate4CNode, VectorNode)                            \
-  declare_c2_type(Replicate2CNode, VectorNode)                            \
-  declare_c2_type(Replicate8SNode, VectorNode)                            \
-  declare_c2_type(Replicate4SNode, VectorNode)                            \
-  declare_c2_type(Replicate2SNode, VectorNode)                            \
-  declare_c2_type(Replicate4INode, VectorNode)                            \
-  declare_c2_type(Replicate2INode, VectorNode)                            \
-  declare_c2_type(Replicate2LNode, VectorNode)                            \
-  declare_c2_type(Replicate4FNode, VectorNode)                            \
-  declare_c2_type(Replicate2FNode, VectorNode)                            \
-  declare_c2_type(Replicate2DNode, VectorNode)                            \
+  declare_c2_type(LoadVectorNode, LoadNode)                               \
+  declare_c2_type(StoreVectorNode, StoreNode)                             \
+  declare_c2_type(ReplicateBNode, VectorNode)                             \
+  declare_c2_type(ReplicateSNode, VectorNode)                             \
+  declare_c2_type(ReplicateINode, VectorNode)                             \
+  declare_c2_type(ReplicateLNode, VectorNode)                             \
+  declare_c2_type(ReplicateFNode, VectorNode)                             \
+  declare_c2_type(ReplicateDNode, VectorNode)                             \
   declare_c2_type(PackNode, VectorNode)                                   \
   declare_c2_type(PackBNode, PackNode)                                    \
-  declare_c2_type(PackCNode, PackNode)                                    \
   declare_c2_type(PackSNode, PackNode)                                    \
   declare_c2_type(PackINode, PackNode)                                    \
   declare_c2_type(PackLNode, PackNode)                                    \
   declare_c2_type(PackFNode, PackNode)                                    \
   declare_c2_type(PackDNode, PackNode)                                    \
-  declare_c2_type(Pack2x1BNode, PackNode)                                 \
-  declare_c2_type(Pack2x2BNode, PackNode)                                 \
+  declare_c2_type(Pack2LNode, PackNode)                                   \
+  declare_c2_type(Pack2DNode, PackNode)                                   \
   declare_c2_type(ExtractNode, Node)                                      \
   declare_c2_type(ExtractBNode, ExtractNode)                              \
-  declare_c2_type(ExtractCNode, ExtractNode)                              \
   declare_c2_type(ExtractSNode, ExtractNode)                              \
   declare_c2_type(ExtractINode, ExtractNode)                              \
   declare_c2_type(ExtractLNode, ExtractNode)                              \
@@ -2180,6 +2135,7 @@
   /******************/                                                    \
                                                                           \
   declare_constant(UseTLAB)                                               \
+  declare_constant(EnableInvokeDynamic)                                   \
                                                                           \
   /**************/                                                        \
   /* Stack bias */                                                        \
@@ -2338,12 +2294,12 @@
   declare_constant(constMethodOopDesc::_has_linenumber_table)             \
   declare_constant(constMethodOopDesc::_has_checked_exceptions)           \
   declare_constant(constMethodOopDesc::_has_localvariable_table)          \
+  declare_constant(constMethodOopDesc::_has_exception_table)              \
                                                                           \
   /*************************************/                                 \
   /* instanceKlass enum                */                                 \
   /*************************************/                                 \
                                                                           \
-  declare_constant(instanceKlass::implementors_limit)                     \
                                                                           \
   /*************************************/                                 \
   /* FieldInfo FieldOffset enum        */                                 \
@@ -2355,7 +2311,6 @@
   declare_constant(FieldInfo::initval_index_offset)                       \
   declare_constant(FieldInfo::low_offset)                                 \
   declare_constant(FieldInfo::high_offset)                                \
-  declare_constant(FieldInfo::generic_signature_offset)                   \
   declare_constant(FieldInfo::field_slots)                                \
                                                                           \
   /************************************************/                      \
@@ -2381,7 +2336,7 @@
   declare_constant(instanceKlass::initialization_error)                   \
                                                                           \
   /*********************************/                                     \
-  /* Symbol* - symbol max length */                                     \
+  /* Symbol* - symbol max length */                                       \
   /*********************************/                                     \
                                                                           \
   declare_constant(Symbol::max_symbol_length)                             \
@@ -2394,21 +2349,16 @@
   declare_constant(constantPoolOopDesc::_indy_argc_offset)                \
   declare_constant(constantPoolOopDesc::_indy_argv_offset)                \
                                                                           \
-  /*********************************************/                         \
-  /* ConstantPoolCacheEntry FlagBitValues enum */                         \
-  /*********************************************/                         \
+  /********************************/                                      \
+  /* ConstantPoolCacheEntry enums */                                      \
+  /********************************/                                      \
                                                                           \
-  declare_constant(ConstantPoolCacheEntry::hotSwapBit)                    \
-  declare_constant(ConstantPoolCacheEntry::methodInterface)               \
-  declare_constant(ConstantPoolCacheEntry::volatileField)                 \
-  declare_constant(ConstantPoolCacheEntry::vfinalMethod)                  \
-  declare_constant(ConstantPoolCacheEntry::finalField)                    \
-                                                                          \
-  /******************************************/                            \
-  /* ConstantPoolCacheEntry FlagValues enum */                            \
-  /******************************************/                            \
-                                                                          \
-  declare_constant(ConstantPoolCacheEntry::tosBits)                       \
+  declare_constant(ConstantPoolCacheEntry::is_volatile_shift)             \
+  declare_constant(ConstantPoolCacheEntry::is_final_shift)                \
+  declare_constant(ConstantPoolCacheEntry::is_forced_virtual_shift)       \
+  declare_constant(ConstantPoolCacheEntry::is_vfinal_shift)               \
+  declare_constant(ConstantPoolCacheEntry::is_field_entry_shift)          \
+  declare_constant(ConstantPoolCacheEntry::tos_state_shift)               \
                                                                           \
   /***************************************/                               \
   /* java_lang_Thread::ThreadStatus enum */                               \
--- a/src/share/vm/runtime/vmThread.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vmThread.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -304,7 +304,7 @@
     os::check_heap();
     // Silent verification so as not to pollute normal output,
     // unless we really asked for it.
-    Universe::verify(true, !(PrintGCDetails || Verbose));
+    Universe::verify(!(PrintGCDetails || Verbose));
   }
 
   CompileBroker::set_should_block();
--- a/src/share/vm/runtime/vmThread.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vmThread.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -46,7 +46,7 @@
 // Encapsulates both queue management and
 // and priority policy
 //
-class VMOperationQueue : public CHeapObj {
+class VMOperationQueue : public CHeapObj<mtInternal> {
  private:
   enum Priorities {
      SafepointPriority, // Highest priority (operation executed at a safepoint)
--- a/src/share/vm/runtime/vm_operations.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/runtime/vm_operations.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -96,7 +96,7 @@
   template(JFRCheckpoint)                         \
   template(Exit)                                  \
 
-class VM_Operation: public CHeapObj {
+class VM_Operation: public CHeapObj<mtInternal> {
  public:
   enum Mode {
     _safepoint,       // blocking,        safepoint, vm_op C-heap allocated
--- a/src/share/vm/services/attachListener.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/attachListener.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -320,7 +320,7 @@
   }
   bool res = CommandLineFlags::ccstrAtPut((char*)name, &value, ATTACH_ON_DEMAND);
   if (res) {
-    FREE_C_HEAP_ARRAY(char, value);
+    FREE_C_HEAP_ARRAY(char, value, mtInternal);
   } else {
     out->print_cr("setting flag %s failed", name);
   }
--- a/src/share/vm/services/attachListener.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/attachListener.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -98,7 +98,7 @@
 };
 
 #ifndef SERVICES_KERNEL
-class AttachOperation: public CHeapObj {
+class AttachOperation: public CHeapObj<mtInternal> {
  public:
   enum {
     name_length_max = 16,       // maximum length of  name
--- a/src/share/vm/services/diagnosticArgument.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/diagnosticArgument.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,6 +43,47 @@
   set_is_set(true);
 }
 
+void GenDCmdArgument::to_string(jlong l, char* buf, size_t len) {
+  jio_snprintf(buf, len, INT64_FORMAT, l);
+}
+
+void GenDCmdArgument::to_string(bool b, char* buf, size_t len) {
+  jio_snprintf(buf, len, b ? "true" : "false");
+}
+
+void GenDCmdArgument::to_string(NanoTimeArgument n, char* buf, size_t len) {
+  jio_snprintf(buf, len, INT64_FORMAT, n._nanotime);
+}
+
+void GenDCmdArgument::to_string(MemorySizeArgument m, char* buf, size_t len) {
+  jio_snprintf(buf, len, INT64_FORMAT, m._size);
+}
+
+void GenDCmdArgument::to_string(char* c, char* buf, size_t len) {
+  jio_snprintf(buf, len, "%s", c);
+}
+
+void GenDCmdArgument::to_string(StringArrayArgument* f, char* buf, size_t len) {
+  int length = f->array()->length();
+  size_t written = 0;
+  buf[0] = 0;
+  for (int i = 0; i < length; i++) {
+    char* next_str = f->array()->at(i);
+    size_t next_size = strlen(next_str);
+    //Check if there's room left to write next element
+    if (written + next_size > len) {
+      return;
+    }
+    //Actually write element
+    strcat(buf, next_str);
+    written += next_size;
+    //Check if there's room left for the comma
+    if (i < length-1 && len - written > 0) {
+      strcat(buf, ",");
+    }
+  }
+}
+
 template <> void DCmdArgument<jlong>::parse_value(const char* str,
                                                   size_t len, TRAPS) {
     if (str == NULL || sscanf(str, INT64_FORMAT, &_value) != 1) {
@@ -99,7 +140,7 @@
   if (str == NULL) {
     _value = NULL;
   } else {
-    _value = NEW_C_HEAP_ARRAY(char, len+1);
+    _value = NEW_C_HEAP_ARRAY(char, len+1, mtInternal);
     strncpy(_value, str, len);
     _value[len] = 0;
   }
@@ -118,7 +159,7 @@
 
 template <> void DCmdArgument<char*>::destroy_value() {
   if (_value != NULL) {
-    FREE_C_HEAP_ARRAY(char, _value);
+    FREE_C_HEAP_ARRAY(char, _value, mtInternal);
     set_value(NULL);
   }
 }
--- a/src/share/vm/services/diagnosticArgument.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/diagnosticArgument.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,17 +31,17 @@
 #include "runtime/thread.hpp"
 #include "utilities/exceptions.hpp"
 
-class StringArrayArgument : public CHeapObj {
+class StringArrayArgument : public CHeapObj<mtInternal> {
 private:
   GrowableArray<char*>* _array;
 public:
   StringArrayArgument() {
-    _array = new(ResourceObj::C_HEAP)GrowableArray<char *>(32, true);
+    _array = new(ResourceObj::C_HEAP, mtInternal)GrowableArray<char *>(32, true);
     assert(_array != NULL, "Sanity check");
   }
   void add(const char* str, size_t len) {
     if (str != NULL) {
-      char* ptr = NEW_C_HEAP_ARRAY(char, len+1);
+      char* ptr = NEW_C_HEAP_ARRAY(char, len+1, mtInternal);
       strncpy(ptr, str, len);
       ptr[len] = 0;
       _array->append(ptr);
@@ -53,7 +53,7 @@
   ~StringArrayArgument() {
     for (int i=0; i<_array->length(); i++) {
       if(_array->at(i) != NULL) { // Safety check
-        FREE_C_HEAP_ARRAY(char, _array->at(i));
+        FREE_C_HEAP_ARRAY(char, _array->at(i), mtInternal);
       }
     }
     delete _array;
@@ -110,12 +110,20 @@
   virtual void init_value(TRAPS) = 0;
   virtual void reset(TRAPS) = 0;
   virtual void cleanup() = 0;
+  virtual void value_as_str(char* buf, size_t len) = 0;
   void set_next(GenDCmdArgument* arg) {
     _next = arg;
   }
   GenDCmdArgument* next() {
     return _next;
   }
+
+  void to_string(jlong l, char* buf, size_t len);
+  void to_string(bool b, char* buf, size_t len);
+  void to_string(char* c, char* buf, size_t len);
+  void to_string(NanoTimeArgument n, char* buf, size_t len);
+  void to_string(MemorySizeArgument f, char* buf, size_t len);
+  void to_string(StringArrayArgument* s, char* buf, size_t len);
 };
 
 template <class ArgType> class DCmdArgument: public GenDCmdArgument {
@@ -143,6 +151,7 @@
   void parse_value(const char* str, size_t len, TRAPS);
   void init_value(TRAPS);
   void destroy_value();
+  void value_as_str(char *buf, size_t len) { return to_string(_value, buf, len);}
 };
 
 #endif  /* SHARE_VM_SERVICES_DIAGNOSTICARGUMENT_HPP */
--- a/src/share/vm/services/diagnosticCommand.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/diagnosticCommand.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -48,7 +48,7 @@
            "With no argument this will show a list of available commands. "
            "'help all' will show help for all commands.";
   }
-  static const char* impact() { return "Low: "; }
+  static const char* impact() { return "Low"; }
   static int num_arguments();
   virtual void execute(TRAPS);
 };
@@ -60,7 +60,7 @@
   static const char* description() {
     return "Print JVM version information.";
   }
-  static const char* impact() { return "Low: "; }
+  static const char* impact() { return "Low"; }
   static int num_arguments() { return 0; }
   virtual void execute(TRAPS);
 };
@@ -72,7 +72,7 @@
   static const char* description() {
     return "Print the command line used to start this VM instance.";
   }
-  static const char* impact() { return "Low: "; }
+  static const char* impact() { return "Low"; }
   static int num_arguments() { return 0; }
   virtual void execute(TRAPS) {
     Arguments::print_on(_output);
@@ -88,7 +88,7 @@
       return "Print system properties.";
     }
     static const char* impact() {
-      return "Low: ";
+      return "Low";
     }
     static int num_arguments() { return 0; }
     virtual void execute(TRAPS);
@@ -105,7 +105,7 @@
     return "Print VM flag options and their current values.";
   }
   static const char* impact() {
-    return "Low: ";
+    return "Low";
   }
   static int num_arguments();
   virtual void execute(TRAPS);
@@ -121,7 +121,7 @@
     return "Print VM uptime.";
   }
   static const char* impact() {
-    return "Low: ";
+    return "Low";
   }
   static int num_arguments();
   virtual void execute(TRAPS);
--- a/src/share/vm/services/diagnosticFramework.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/diagnosticFramework.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -75,11 +75,13 @@
   }
   // extracting first item, argument or option name
   _key_addr = &_buffer[_cursor];
+  bool arg_had_quotes = false;
   while (_cursor <= _len - 1 && _buffer[_cursor] != '=' && _buffer[_cursor] != _delim) {
     // argument can be surrounded by single or double quotes
     if (_buffer[_cursor] == '\"' || _buffer[_cursor] == '\'') {
       _key_addr++;
       char quote = _buffer[_cursor];
+      arg_had_quotes = true;
       while (_cursor < _len - 1) {
         _cursor++;
         if (_buffer[_cursor] == quote && _buffer[_cursor - 1] != '\\') {
@@ -95,16 +97,22 @@
     _cursor++;
   }
   _key_len = &_buffer[_cursor] - _key_addr;
+  if (arg_had_quotes) {
+    // if the argument was quoted, we need to step past the last quote here
+    _cursor++;
+  }
   // check if the argument has the <key>=<value> format
   if (_cursor <= _len -1 && _buffer[_cursor] == '=') {
     _cursor++;
     _value_addr = &_buffer[_cursor];
+    bool value_had_quotes = false;
     // extract the value
     while (_cursor <= _len - 1 && _buffer[_cursor] != _delim) {
       // value can be surrounded by simple or double quotes
       if (_buffer[_cursor] == '\"' || _buffer[_cursor] == '\'') {
         _value_addr++;
         char quote = _buffer[_cursor];
+        value_had_quotes = true;
         while (_cursor < _len - 1) {
           _cursor++;
           if (_buffer[_cursor] == quote && _buffer[_cursor - 1] != '\\') {
@@ -120,6 +128,10 @@
       _cursor++;
     }
     _value_len = &_buffer[_cursor] - _value_addr;
+    if (value_had_quotes) {
+      // if the value was quoted, we need to step past the last quote here
+      _cursor++;
+    }
   } else {
     _value_addr = NULL;
     _value_len = 0;
@@ -185,8 +197,17 @@
         arg->read_value(iter.key_addr(), iter.key_length(), CHECK);
         next_argument = next_argument->next();
       } else {
-        THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
-          "Unknown argument in diagnostic command");
+        const size_t buflen    = 120;
+        const size_t argbuflen = 30;
+        char buf[buflen];
+        char argbuf[argbuflen];
+        size_t len = MIN2<size_t>(iter.key_length(), argbuflen - 1);
+
+        strncpy(argbuf, iter.key_addr(), len);
+        argbuf[len] = '\0';
+        jio_snprintf(buf, buflen - 1, "Unknown argument '%s' in diagnostic command.", argbuf);
+
+        THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), buf);
       }
     }
     cont = iter.next(CHECK);
@@ -207,19 +228,21 @@
 }
 
 void DCmdParser::check(TRAPS) {
+  const size_t buflen = 256;
+  char buf[buflen];
   GenDCmdArgument* arg = _arguments_list;
   while (arg != NULL) {
     if (arg->is_mandatory() && !arg->has_value()) {
-      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
-              "Missing argument for diagnostic command");
+      jio_snprintf(buf, buflen - 1, "The argument '%s' is mandatory.", arg->name());
+      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), buf);
     }
     arg = arg->next();
   }
   arg = _options;
   while (arg != NULL) {
     if (arg->is_mandatory() && !arg->has_value()) {
-      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
-              "Missing option for diagnostic command");
+      jio_snprintf(buf, buflen - 1, "The option '%s' is mandatory.", arg->name());
+      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), buf);
     }
     arg = arg->next();
   }
--- a/src/share/vm/services/diagnosticFramework.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/diagnosticFramework.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -238,6 +238,16 @@
   static const char* name() { return "No Name";}
   static const char* description() { return "No Help";}
   static const char* disabled_message() { return "Diagnostic command currently disabled"; }
+  // The impact() method returns a description of the intrusiveness of the diagnostic
+  // command on the Java Virtual Machine behavior. The rational for this method is that some
+  // diagnostic commands can seriously disrupt the behavior of the Java Virtual Machine
+  // (for instance a Thread Dump for an application with several tens of thousands of threads,
+  // or a Head Dump with a 40GB+ heap size) and other diagnostic commands have no serious
+  // impact on the JVM (for instance, getting the command line arguments or the JVM version).
+  // The recommended format for the description is <impact level>: [longer description],
+  // where the impact level is selected among this list: {Low, Medium, High}. The optional
+  // longer description can provide more specific details like the fact that Thread Dump
+  // impact depends on the heap size.
   static const char* impact() { return "Low: No impact"; }
   static int num_arguments() { return 0; }
   outputStream* output() { return _output; }
@@ -250,7 +260,7 @@
     bool has_arg = iter.next(CHECK);
     if (has_arg) {
       THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
-                "Unknown argument in diagnostic command");
+                "The argument list of this diagnostic command should be empty.");
     }
   }
   virtual void execute(TRAPS) { }
@@ -310,7 +320,7 @@
 // manages the status of the diagnostic command (hidden, enabled). A DCmdFactory
 // has to be registered to make the diagnostic command available (see
 // management.cpp)
-class DCmdFactory: public CHeapObj {
+class DCmdFactory: public CHeapObj<mtInternal> {
 private:
   static Mutex*       _dcmdFactory_lock;
   // Pointer to the next factory in the singly-linked list of registered
@@ -368,7 +378,7 @@
     DCmdFactory(DCmdClass::num_arguments(), enabled, hidden) { }
   // Returns a C-heap allocated instance
   virtual DCmd* create_Cheap_instance(outputStream* output) {
-    return new (ResourceObj::C_HEAP) DCmdClass(output, true);
+    return new (ResourceObj::C_HEAP, mtInternal) DCmdClass(output, true);
   }
   // Returns a resourceArea allocated instance
   virtual DCmd* create_resource_instance(outputStream* output) {
--- a/src/share/vm/services/gcNotifier.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/gcNotifier.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -45,7 +45,7 @@
   // GC may occur between now and the creation of the notification
   int num_pools = MemoryService::num_memory_pools();
   // stat is deallocated inside GCNotificationRequest
-  GCStatInfo* stat = new(ResourceObj::C_HEAP) GCStatInfo(num_pools);
+  GCStatInfo* stat = new(ResourceObj::C_HEAP, mtGC) GCStatInfo(num_pools);
   mgr->get_last_gc_stat(stat);
   GCNotificationRequest *request = new GCNotificationRequest(os::javaTimeMillis(),mgr,action,cause,stat);
   addRequest(request);
@@ -163,8 +163,8 @@
   constructor_args.push_oop(gcInfo_instance);
   constructor_args.push_oop(getGcInfoBuilder(gcManager,THREAD));
   constructor_args.push_long(gcStatInfo->gc_index());
-  constructor_args.push_long(gcStatInfo->start_time());
-  constructor_args.push_long(gcStatInfo->end_time());
+  constructor_args.push_long(Management::ticks_to_ms(gcStatInfo->start_time()));
+  constructor_args.push_long(Management::ticks_to_ms(gcStatInfo->end_time()));
   constructor_args.push_oop(usage_before_gc_ah);
   constructor_args.push_oop(usage_after_gc_ah);
   constructor_args.push_oop(extra_array);
--- a/src/share/vm/services/gcNotifier.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/gcNotifier.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -30,7 +30,7 @@
 #include "services/memoryService.hpp"
 #include "services/memoryManager.hpp"
 
-class GCNotificationRequest : public CHeapObj {
+class GCNotificationRequest : public CHeapObj<mtInternal> {
   friend class GCNotifier;
   GCNotificationRequest *next;
   jlong timestamp;
--- a/src/share/vm/services/heapDumper.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/heapDumper.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -436,7 +436,7 @@
   // sufficient memory then reduce size until we can allocate something.
   _size = io_buffer_size;
   do {
-    _buffer = (char*)os::malloc(_size);
+    _buffer = (char*)os::malloc(_size, mtInternal);
     if (_buffer == NULL) {
       _size = _size >> 1;
     }
@@ -1405,7 +1405,7 @@
     _gc_before_heap_dump = gc_before_heap_dump;
     _is_segmented_dump = false;
     _dump_start = (jlong)-1;
-    _klass_map = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(INITIAL_CLASS_COUNT, true);
+    _klass_map = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<Klass*>(INITIAL_CLASS_COUNT, true);
     _stack_traces = NULL;
     _num_threads = 0;
     if (oome) {
@@ -1426,7 +1426,7 @@
       for (int i=0; i < _num_threads; i++) {
         delete _stack_traces[i];
       }
-      FREE_C_HEAP_ARRAY(ThreadStackTrace*, _stack_traces);
+      FREE_C_HEAP_ARRAY(ThreadStackTrace*, _stack_traces, mtInternal);
     }
     delete _klass_map;
   }
@@ -1650,9 +1650,6 @@
         if (fr->is_entry_frame()) {
           last_entry_frame = fr;
         }
-        if (fr->is_ricochet_frame()) {
-          fr->oops_ricochet_do(&blk, vf->register_map());
-        }
       }
       vf = vf->sender();
     }
@@ -1806,7 +1803,7 @@
   writer()->write_u4(0);                    // thread number
   writer()->write_u4(0);                    // frame count
 
-  _stack_traces = NEW_C_HEAP_ARRAY(ThreadStackTrace*, Threads::number_of_threads());
+  _stack_traces = NEW_C_HEAP_ARRAY(ThreadStackTrace*, Threads::number_of_threads(), mtInternal);
   int frame_serial_num = 0;
   for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) {
     oop threadObj = thread->threadObj();
@@ -2005,7 +2002,7 @@
                    dump_file_name, os::current_process_id(), dump_file_ext);
     }
     const size_t len = strlen(base_path) + 1;
-    my_path = (char*)os::malloc(len);
+    my_path = (char*)os::malloc(len, mtInternal);
     if (my_path == NULL) {
       warning("Cannot create heap dump file.  Out of system memory.");
       return;
@@ -2014,7 +2011,7 @@
   } else {
     // Append a sequence number id for dumps following the first
     const size_t len = strlen(base_path) + max_digit_chars + 2; // for '.' and \0
-    my_path = (char*)os::malloc(len);
+    my_path = (char*)os::malloc(len, mtInternal);
     if (my_path == NULL) {
       warning("Cannot create heap dump file.  Out of system memory.");
       return;
--- a/src/share/vm/services/lowMemoryDetector.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/lowMemoryDetector.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -63,7 +63,7 @@
 class OopClosure;
 class MemoryPool;
 
-class ThresholdSupport : public CHeapObj {
+class ThresholdSupport : public CHeapObj<mtInternal> {
  private:
   bool            _support_high_threshold;
   bool            _support_low_threshold;
@@ -112,7 +112,7 @@
   }
 };
 
-class SensorInfo : public CHeapObj {
+class SensorInfo : public CHeapObj<mtInternal> {
 private:
   instanceOop     _sensor_obj;
   bool            _sensor_on;
--- a/src/share/vm/services/management.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/management.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -47,6 +47,7 @@
 #include "services/jmm.h"
 #include "services/lowMemoryDetector.hpp"
 #include "services/gcNotifier.hpp"
+#include "services/nmtDCmd.hpp"
 #include "services/management.hpp"
 #include "services/memoryManager.hpp"
 #include "services/memoryPool.hpp"
@@ -121,6 +122,7 @@
   // Registration of the diagnostic commands
   DCmdRegistrant::register_dcmds();
   DCmdRegistrant::register_dcmds_ext();
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<NMTDCmd>(true, false));
 }
 
 void Management::initialize(TRAPS) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memBaseline.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "memory/allocation.hpp"
+#include "services/memBaseline.hpp"
+#include "services/memTracker.hpp"
+
+MemType2Name MemBaseline::MemType2NameMap[NUMBER_OF_MEMORY_TYPE] = {
+  {mtJavaHeap,   "Java Heap"},
+  {mtClass,      "Class"},
+  {mtThreadStack,"Thread Stack"},
+  {mtThread,     "Thread"},
+  {mtCode,       "Code"},
+  {mtGC,         "GC"},
+  {mtCompiler,   "Compiler"},
+  {mtInternal,   "Internal"},
+  {mtOther,      "Other"},
+  {mtSymbol,     "Symbol"},
+  {mtNMT,        "Memory Tracking"},
+  {mtChunk,      "Pooled Free Chunks"},
+  {mtNone,       "Unknown"}  // It can happen when type tagging records are lagging
+                             // behind
+};
+
+MemBaseline::MemBaseline() {
+  _baselined = false;
+
+  for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
+    _malloc_data[index].set_type(MemType2NameMap[index]._flag);
+    _vm_data[index].set_type(MemType2NameMap[index]._flag);
+    _arena_data[index].set_type(MemType2NameMap[index]._flag);
+  }
+
+  _malloc_cs = NULL;
+  _vm_cs = NULL;
+
+  _number_of_classes = 0;
+  _number_of_threads = 0;
+}
+
+
+void MemBaseline::clear() {
+  if (_malloc_cs != NULL) {
+    delete _malloc_cs;
+    _malloc_cs = NULL;
+  }
+
+  if (_vm_cs != NULL) {
+    delete _vm_cs;
+    _vm_cs = NULL;
+  }
+
+  reset();
+}
+
+
+void MemBaseline::reset() {
+  _baselined = false;
+  _total_vm_reserved = 0;
+  _total_vm_committed = 0;
+  _total_malloced = 0;
+  _number_of_classes = 0;
+
+  if (_malloc_cs != NULL) _malloc_cs->clear();
+  if (_vm_cs != NULL) _vm_cs->clear();
+
+  for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
+    _malloc_data[index].clear();
+    _vm_data[index].clear();
+    _arena_data[index].clear();
+  }
+}
+
+MemBaseline::~MemBaseline() {
+  if (_malloc_cs != NULL) {
+    delete _malloc_cs;
+  }
+
+  if (_vm_cs != NULL) {
+    delete _vm_cs;
+  }
+}
+
+// baseline malloc'd memory records, generate overall summary and summaries by
+// memory types
+bool MemBaseline::baseline_malloc_summary(const MemPointerArray* malloc_records) {
+  MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records);
+  MemPointerRecord* mptr = (MemPointerRecord*)mItr.current();
+  size_t used_arena_size = 0;
+  int index;
+  while (mptr != NULL) {
+    index = flag2index(FLAGS_TO_MEMORY_TYPE(mptr->flags()));
+    size_t size = mptr->size();
+    _total_malloced += size;
+    _malloc_data[index].inc(size);
+    if (MemPointerRecord::is_arena_record(mptr->flags())) {
+      // see if arena size record present
+      MemPointerRecord* next_p = (MemPointerRecordEx*)mItr.peek_next();
+      if (MemPointerRecord::is_arena_size_record(next_p->flags())) {
+        assert(next_p->is_size_record_of_arena(mptr), "arena records do not match");
+        size = next_p->size();
+        _arena_data[index].inc(size);
+        used_arena_size += size;
+        mItr.next();
+      }
+    }
+    mptr = (MemPointerRecordEx*)mItr.next();
+  }
+
+  // substract used arena size to get size of arena chunk in free list
+  index = flag2index(mtChunk);
+  _malloc_data[index].reduce(used_arena_size);
+  // we really don't know how many chunks in free list, so just set to
+  // 0
+  _malloc_data[index].overwrite_counter(0);
+
+  return true;
+}
+
+// baseline mmap'd memory records, generate overall summary and summaries by
+// memory types
+bool MemBaseline::baseline_vm_summary(const MemPointerArray* vm_records) {
+  MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records);
+  VMMemRegion* vptr = (VMMemRegion*)vItr.current();
+  int index;
+  while (vptr != NULL) {
+    index = flag2index(FLAGS_TO_MEMORY_TYPE(vptr->flags()));
+
+    // we use the number of thread stack to count threads
+    if (IS_MEMORY_TYPE(vptr->flags(), mtThreadStack)) {
+      _number_of_threads ++;
+    }
+    _total_vm_reserved += vptr->reserved_size();
+    _total_vm_committed += vptr->committed_size();
+    _vm_data[index].inc(vptr->reserved_size(), vptr->committed_size());
+    vptr = (VMMemRegion*)vItr.next();
+  }
+  return true;
+}
+
+// baseline malloc'd memory by callsites, but only the callsites with memory allocation
+// over 1KB are stored.
+bool MemBaseline::baseline_malloc_details(const MemPointerArray* malloc_records) {
+  assert(MemTracker::track_callsite(), "detail tracking is off");
+
+  MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records);
+  MemPointerRecordEx* mptr = (MemPointerRecordEx*)mItr.current();
+  MallocCallsitePointer mp;
+
+  if (_malloc_cs == NULL) {
+    _malloc_cs = new (std::nothrow) MemPointerArrayImpl<MallocCallsitePointer>(64);
+    // out of native memory
+    if (_malloc_cs == NULL) {
+      return false;
+    }
+  } else {
+    _malloc_cs->clear();
+  }
+
+  // baseline memory that is totaled over 1 KB
+  while (mptr != NULL) {
+    if (!MemPointerRecord::is_arena_size_record(mptr->flags())) {
+      // skip thread stacks
+      if (!IS_MEMORY_TYPE(mptr->flags(), mtThreadStack)) {
+        if (mp.addr() != mptr->pc()) {
+          if ((mp.amount()/K) > 0) {
+            if (!_malloc_cs->append(&mp)) {
+              return false;
+            }
+          }
+          mp = MallocCallsitePointer(mptr->pc());
+        }
+        mp.inc(mptr->size());
+      }
+    }
+    mptr = (MemPointerRecordEx*)mItr.next();
+  }
+
+  if (mp.addr() != 0 && (mp.amount()/K) > 0) {
+    if (!_malloc_cs->append(&mp)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// baseline mmap'd memory by callsites
+bool MemBaseline::baseline_vm_details(const MemPointerArray* vm_records) {
+  assert(MemTracker::track_callsite(), "detail tracking is off");
+
+  VMCallsitePointer vp;
+  MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records);
+  VMMemRegionEx* vptr = (VMMemRegionEx*)vItr.current();
+
+  if (_vm_cs == NULL) {
+    _vm_cs = new (std::nothrow) MemPointerArrayImpl<VMCallsitePointer>(64);
+    if (_vm_cs == NULL) {
+      return false;
+    }
+  } else {
+    _vm_cs->clear();
+  }
+
+  while (vptr != NULL) {
+    if (vp.addr() != vptr->pc()) {
+      if (!_vm_cs->append(&vp)) {
+        return false;
+      }
+      vp = VMCallsitePointer(vptr->pc());
+    }
+    vp.inc(vptr->size(), vptr->committed_size());
+    vptr = (VMMemRegionEx*)vItr.next();
+  }
+  if (vp.addr() != 0) {
+    if (!_vm_cs->append(&vp)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// baseline a snapshot. If summary_only = false, memory usages aggregated by
+// callsites are also baselined.
+bool MemBaseline::baseline(MemSnapshot& snapshot, bool summary_only) {
+  MutexLockerEx snapshot_locker(snapshot._lock, true);
+  reset();
+  _baselined = baseline_malloc_summary(snapshot._alloc_ptrs) &&
+               baseline_vm_summary(snapshot._vm_ptrs);
+  _number_of_classes = SystemDictionary::number_of_classes();
+
+  if (!summary_only && MemTracker::track_callsite() && _baselined) {
+    ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_pc);
+    ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_pc);
+    _baselined =  baseline_malloc_details(snapshot._alloc_ptrs) &&
+      baseline_vm_details(snapshot._vm_ptrs);
+    ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_addr);
+    ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_addr);
+  }
+  return _baselined;
+}
+
+
+int MemBaseline::flag2index(MEMFLAGS flag) const {
+  for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
+    if (MemType2NameMap[index]._flag == flag) {
+      return index;
+    }
+  }
+  assert(false, "no type");
+  return -1;
+}
+
+const char* MemBaseline::type2name(MEMFLAGS type) {
+  for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
+    if (MemType2NameMap[index]._flag == type) {
+      return MemType2NameMap[index]._name;
+    }
+  }
+  assert(false, "no type");
+  return NULL;
+}
+
+
+MemBaseline& MemBaseline::operator=(const MemBaseline& other) {
+  _total_malloced = other._total_malloced;
+  _total_vm_reserved = other._total_vm_reserved;
+  _total_vm_committed = other._total_vm_committed;
+
+  _baselined = other._baselined;
+  _number_of_classes = other._number_of_classes;
+
+  for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
+    _malloc_data[index] = other._malloc_data[index];
+    _vm_data[index] = other._vm_data[index];
+    _arena_data[index] = other._arena_data[index];
+  }
+
+  if (MemTracker::track_callsite()) {
+    assert(_malloc_cs != NULL && _vm_cs != NULL, "out of memory");
+    assert(other._malloc_cs != NULL && other._vm_cs != NULL,
+           "not properly baselined");
+    _malloc_cs->clear();
+    _vm_cs->clear();
+    int index;
+    for (index = 0; index < other._malloc_cs->length(); index ++) {
+      _malloc_cs->append(other._malloc_cs->at(index));
+    }
+
+    for (index = 0; index < other._vm_cs->length(); index ++) {
+      _vm_cs->append(other._vm_cs->at(index));
+    }
+  }
+  return *this;
+}
+
+/* compare functions for sorting */
+
+// sort snapshot malloc'd records in callsite pc order
+int MemBaseline::malloc_sort_by_pc(const void* p1, const void* p2) {
+  assert(MemTracker::track_callsite(),"Just check");
+  const MemPointerRecordEx* mp1 = (const MemPointerRecordEx*)p1;
+  const MemPointerRecordEx* mp2 = (const MemPointerRecordEx*)p2;
+  return UNSIGNED_COMPARE(mp1->pc(), mp2->pc());
+}
+
+// sort baselined malloc'd records in size order
+int MemBaseline::bl_malloc_sort_by_size(const void* p1, const void* p2) {
+  assert(MemTracker::is_on(), "Just check");
+  const MallocCallsitePointer* mp1 = (const MallocCallsitePointer*)p1;
+  const MallocCallsitePointer* mp2 = (const MallocCallsitePointer*)p2;
+  return UNSIGNED_COMPARE(mp2->amount(), mp1->amount());
+}
+
+// sort baselined malloc'd records in callsite pc order
+int MemBaseline::bl_malloc_sort_by_pc(const void* p1, const void* p2) {
+  assert(MemTracker::is_on(), "Just check");
+  const MallocCallsitePointer* mp1 = (const MallocCallsitePointer*)p1;
+  const MallocCallsitePointer* mp2 = (const MallocCallsitePointer*)p2;
+  return UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
+}
+
+// sort snapshot mmap'd records in callsite pc order
+int MemBaseline::vm_sort_by_pc(const void* p1, const void* p2) {
+  assert(MemTracker::track_callsite(),"Just check");
+  const VMMemRegionEx* mp1 = (const VMMemRegionEx*)p1;
+  const VMMemRegionEx* mp2 = (const VMMemRegionEx*)p2;
+  return UNSIGNED_COMPARE(mp1->pc(), mp2->pc());
+}
+
+// sort baselined mmap'd records in size (reserved size) order
+int MemBaseline::bl_vm_sort_by_size(const void* p1, const void* p2) {
+  assert(MemTracker::is_on(), "Just check");
+  const VMCallsitePointer* mp1 = (const VMCallsitePointer*)p1;
+  const VMCallsitePointer* mp2 = (const VMCallsitePointer*)p2;
+  return UNSIGNED_COMPARE(mp2->reserved_amount(), mp1->reserved_amount());
+}
+
+// sort baselined mmap'd records in callsite pc order
+int MemBaseline::bl_vm_sort_by_pc(const void* p1, const void* p2) {
+  assert(MemTracker::is_on(), "Just check");
+  const VMCallsitePointer* mp1 = (const VMCallsitePointer*)p1;
+  const VMCallsitePointer* mp2 = (const VMCallsitePointer*)p2;
+  return UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
+}
+
+
+// sort snapshot malloc'd records in memory block address order
+int MemBaseline::malloc_sort_by_addr(const void* p1, const void* p2) {
+  assert(MemTracker::is_on(), "Just check");
+  const MemPointerRecord* mp1 = (const MemPointerRecord*)p1;
+  const MemPointerRecord* mp2 = (const MemPointerRecord*)p2;
+  int delta = UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
+  assert(delta != 0, "dup pointer");
+  return delta;
+}
+
+// sort snapshot mmap'd records in memory block address order
+int MemBaseline::vm_sort_by_addr(const void* p1, const void* p2) {
+  assert(MemTracker::is_on(), "Just check");
+  const VMMemRegion* mp1 = (const VMMemRegion*)p1;
+  const VMMemRegion* mp2 = (const VMMemRegion*)p2;
+  int delta = UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
+  assert(delta != 0, "dup pointer");
+  return delta;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memBaseline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,447 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_MEM_BASELINE_HPP
+#define SHARE_VM_SERVICES_MEM_BASELINE_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/mutex.hpp"
+#include "services/memPtr.hpp"
+#include "services/memSnapshot.hpp"
+
+// compare unsigned number
+#define UNSIGNED_COMPARE(a, b)  ((a > b) ? 1 : ((a == b) ? 0 : -1))
+
+/*
+ * MallocCallsitePointer and VMCallsitePointer are used
+ * to baseline memory blocks with their callsite information.
+ * They are only available when detail tracking is turned
+ * on.
+ */
+
+/* baselined malloc record aggregated by callsite */
+class MallocCallsitePointer : public MemPointer {
+ private:
+  size_t    _count;   // number of malloc invocation from this callsite
+  size_t    _amount;  // total amount of memory malloc-ed from this callsite
+
+ public:
+  MallocCallsitePointer() {
+    _count = 0;
+    _amount = 0;
+  }
+
+  MallocCallsitePointer(address pc) : MemPointer(pc) {
+    _count = 0;
+    _amount = 0;
+  }
+
+  MallocCallsitePointer& operator=(const MallocCallsitePointer& p) {
+    MemPointer::operator=(p);
+    _count = p.count();
+    _amount = p.amount();
+    return *this;
+  }
+
+  inline void inc(size_t size) {
+    _count ++;
+    _amount += size;
+  };
+
+  inline size_t count() const {
+    return _count;
+  }
+
+  inline size_t amount() const {
+    return _amount;
+  }
+};
+
+// baselined virtual memory record aggregated by callsite
+class VMCallsitePointer : public MemPointer {
+ private:
+  size_t     _count;              // number of invocation from this callsite
+  size_t     _reserved_amount;    // total reserved amount
+  size_t     _committed_amount;   // total committed amount
+
+ public:
+  VMCallsitePointer() {
+    _count = 0;
+    _reserved_amount = 0;
+    _committed_amount = 0;
+  }
+
+  VMCallsitePointer(address pc) : MemPointer(pc) {
+    _count = 0;
+    _reserved_amount = 0;
+    _committed_amount = 0;
+  }
+
+  VMCallsitePointer& operator=(const VMCallsitePointer& p) {
+    MemPointer::operator=(p);
+    _count = p.count();
+    _reserved_amount = p.reserved_amount();
+    _committed_amount = p.committed_amount();
+    return *this;
+  }
+
+  inline void inc(size_t reserved, size_t committed) {
+    _count ++;
+    _reserved_amount += reserved;
+    _committed_amount += committed;
+  }
+
+  inline size_t count() const {
+    return _count;
+  }
+
+  inline size_t reserved_amount() const {
+    return _reserved_amount;
+  }
+
+  inline size_t committed_amount() const {
+    return _committed_amount;
+  }
+};
+
+// maps a memory type flag to readable name
+typedef struct _memType2Name {
+  MEMFLAGS     _flag;
+  const char*  _name;
+} MemType2Name;
+
+
+// This class aggregates malloc'd records by memory type
+class MallocMem : public _ValueObj {
+ private:
+  MEMFLAGS       _type;
+
+  size_t         _count;
+  size_t         _amount;
+
+ public:
+  MallocMem() {
+    _type = mtNone;
+    _count = 0;
+    _amount = 0;
+  }
+
+  MallocMem(MEMFLAGS flags) {
+    assert(HAS_VALID_MEMORY_TYPE(flags), "no type");
+    _type = FLAGS_TO_MEMORY_TYPE(flags);
+    _count = 0;
+    _amount = 0;
+  }
+
+  inline void set_type(MEMFLAGS flag) {
+    _type = flag;
+  }
+
+  inline void clear() {
+    _count = 0;
+    _amount = 0;
+    _type = mtNone;
+  }
+
+  MallocMem& operator=(const MallocMem& m) {
+    assert(_type == m.type(), "different type");
+    _count = m.count();
+    _amount = m.amount();
+    return *this;
+  }
+
+  inline void inc(size_t amt) {
+    _amount += amt;
+    _count ++;
+  }
+
+  inline void reduce(size_t amt) {
+    assert(_amount >= amt, "Just check");
+    _amount -= amt;
+  }
+
+  inline void overwrite_counter(size_t count) {
+    _count = count;
+  }
+
+  inline MEMFLAGS type() const {
+    return _type;
+  }
+
+  inline bool is_type(MEMFLAGS flags) const {
+    return FLAGS_TO_MEMORY_TYPE(flags) == _type;
+  }
+
+  inline size_t count() const {
+    return _count;
+  }
+
+  inline size_t amount() const {
+    return _amount;
+  }
+};
+
+// This class records live arena's memory usage
+class ArenaMem : public MallocMem {
+ public:
+  ArenaMem(MEMFLAGS typeflag): MallocMem(typeflag) {
+  }
+  ArenaMem() { }
+};
+
+// This class aggregates virtual memory by its memory type
+class VMMem : public _ValueObj {
+ private:
+  MEMFLAGS       _type;
+
+  size_t         _count;
+  size_t         _reserved_amount;
+  size_t         _committed_amount;
+
+ public:
+  VMMem() {
+    _type = mtNone;
+    _count = 0;
+    _reserved_amount = 0;
+    _committed_amount = 0;
+  }
+
+  VMMem(MEMFLAGS flags) {
+    assert(HAS_VALID_MEMORY_TYPE(flags), "no type");
+    _type = FLAGS_TO_MEMORY_TYPE(flags);
+    _count = 0;
+    _reserved_amount = 0;
+    _committed_amount = 0;
+  }
+
+  inline void clear() {
+    _type = mtNone;
+    _count = 0;
+    _reserved_amount = 0;
+    _committed_amount = 0;
+  }
+
+  inline void set_type(MEMFLAGS flags) {
+    _type = FLAGS_TO_MEMORY_TYPE(flags);
+  }
+
+  VMMem& operator=(const VMMem& m) {
+    assert(_type == m.type(), "different type");
+
+    _count = m.count();
+    _reserved_amount = m.reserved_amount();
+    _committed_amount = m.committed_amount();
+    return *this;
+  }
+
+
+  inline MEMFLAGS type() const {
+    return _type;
+  }
+
+  inline bool is_type(MEMFLAGS flags) const {
+    return FLAGS_TO_MEMORY_TYPE(flags) == _type;
+  }
+
+  inline void inc(size_t reserved_amt, size_t committed_amt) {
+    _reserved_amount += reserved_amt;
+    _committed_amount += committed_amt;
+    _count ++;
+  }
+
+  inline size_t count() const {
+    return _count;
+  }
+
+  inline size_t reserved_amount() const {
+    return _reserved_amount;
+  }
+
+  inline size_t committed_amount() const {
+    return _committed_amount;
+  }
+};
+
+
+
+#define NUMBER_OF_MEMORY_TYPE    (mt_number_of_types + 1)
+
+class BaselineReporter;
+class BaselineComparisonReporter;
+
+/*
+ * This class baselines current memory snapshot.
+ * A memory baseline summarizes memory usage by memory type,
+ * aggregates memory usage by callsites when detail tracking
+ * is on.
+ */
+class MemBaseline : public _ValueObj {
+  friend class BaselineReporter;
+  friend class BaselineComparisonReporter;
+
+ private:
+  // overall summaries
+  size_t        _total_malloced;
+  size_t        _total_vm_reserved;
+  size_t        _total_vm_committed;
+  size_t        _number_of_classes;
+  size_t        _number_of_threads;
+
+  // if it has properly baselined
+  bool          _baselined;
+
+  // we categorize memory into three categories within the memory type
+  MallocMem     _malloc_data[NUMBER_OF_MEMORY_TYPE];
+  VMMem         _vm_data[NUMBER_OF_MEMORY_TYPE];
+  ArenaMem      _arena_data[NUMBER_OF_MEMORY_TYPE];
+
+  // memory records that aggregate memory usage by callsites.
+  // only available when detail tracking is on.
+  MemPointerArray*  _malloc_cs;
+  MemPointerArray*  _vm_cs;
+
+ private:
+  static MemType2Name  MemType2NameMap[NUMBER_OF_MEMORY_TYPE];
+
+ private:
+  // should not use copy constructor
+  MemBaseline(MemBaseline& copy) { ShouldNotReachHere(); }
+
+ public:
+  // create a memory baseline
+  MemBaseline();
+
+  virtual ~MemBaseline();
+
+  inline bool baselined() const {
+    return _baselined;
+  }
+
+  MemBaseline& operator=(const MemBaseline& other);
+
+  // reset the baseline for reuse
+  void clear();
+
+  // baseline the snapshot
+  bool baseline(MemSnapshot& snapshot, bool summary_only = true);
+
+  bool baseline(const MemPointerArray* malloc_records,
+                const MemPointerArray* vm_records,
+                bool summary_only = true);
+
+  // total malloc'd memory of specified memory type
+  inline size_t malloc_amount(MEMFLAGS flag) const {
+    return _malloc_data[flag2index(flag)].amount();
+  }
+  // number of malloc'd memory blocks of specified memory type
+  inline size_t malloc_count(MEMFLAGS flag) const {
+    return _malloc_data[flag2index(flag)].count();
+  }
+  // total memory used by arenas of specified memory type
+  inline size_t arena_amount(MEMFLAGS flag) const {
+    return _arena_data[flag2index(flag)].amount();
+  }
+  // number of arenas of specified memory type
+  inline size_t arena_count(MEMFLAGS flag) const {
+    return _arena_data[flag2index(flag)].count();
+  }
+  // total reserved memory of specified memory type
+  inline size_t reserved_amount(MEMFLAGS flag) const {
+    return _vm_data[flag2index(flag)].reserved_amount();
+  }
+  // total committed memory of specified memory type
+  inline size_t committed_amount(MEMFLAGS flag) const {
+    return _vm_data[flag2index(flag)].committed_amount();
+  }
+  // total memory (malloc'd + mmap'd + arena) of specified
+  // memory type
+  inline size_t total_amount(MEMFLAGS flag) const {
+    int index = flag2index(flag);
+    return _malloc_data[index].amount() +
+           _vm_data[index].reserved_amount() +
+           _arena_data[index].amount();
+  }
+
+  /* overall summaries */
+
+  // total malloc'd memory in snapshot
+  inline size_t total_malloc_amount() const {
+    return _total_malloced;
+  }
+  // total mmap'd memory in snapshot
+  inline size_t total_reserved_amount() const {
+    return _total_vm_reserved;
+  }
+  // total committed memory in snapshot
+  inline size_t total_committed_amount() const {
+    return _total_vm_committed;
+  }
+  // number of loaded classes
+  inline size_t number_of_classes() const {
+    return _number_of_classes;
+  }
+  // number of running threads
+  inline size_t number_of_threads() const {
+    return _number_of_threads;
+  }
+  // lookup human readable name of a memory type
+  static const char* type2name(MEMFLAGS type);
+
+ private:
+  // convert memory flag to the index to mapping table
+  int         flag2index(MEMFLAGS flag) const;
+
+  // reset baseline values
+  void reset();
+
+  // summarize the records in global snapshot
+  bool baseline_malloc_summary(const MemPointerArray* malloc_records);
+  bool baseline_vm_summary(const MemPointerArray* vm_records);
+  bool baseline_malloc_details(const MemPointerArray* malloc_records);
+  bool baseline_vm_details(const MemPointerArray* vm_records);
+
+  // print a line of malloc'd memory aggregated by callsite
+  void print_malloc_callsite(outputStream* st, address pc, size_t size,
+    size_t count, int diff_amt, int diff_count) const;
+  // print a line of mmap'd memory aggregated by callsite
+  void print_vm_callsite(outputStream* st, address pc, size_t rsz,
+    size_t csz, int diff_rsz, int diff_csz) const;
+
+  // sorting functions for raw records
+  static int malloc_sort_by_pc(const void* p1, const void* p2);
+  static int malloc_sort_by_addr(const void* p1, const void* p2);
+
+  static int vm_sort_by_pc(const void* p1, const void* p2);
+  static int vm_sort_by_addr(const void* p1, const void* p2);
+
+ private:
+  // sorting functions for baselined records
+  static int bl_malloc_sort_by_size(const void* p1, const void* p2);
+  static int bl_vm_sort_by_size(const void* p1, const void* p2);
+  static int bl_malloc_sort_by_pc(const void* p1, const void* p2);
+  static int bl_vm_sort_by_pc(const void* p1, const void* p2);
+};
+
+
+#endif // SHARE_VM_SERVICES_MEM_BASELINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memPtr.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "services/memPtr.hpp"
+#include "services/memTracker.hpp"
+
+volatile jint SequenceGenerator::_seq_number = 1;
+NOT_PRODUCT(jint SequenceGenerator::_max_seq_number = 1;)
+DEBUG_ONLY(volatile unsigned long SequenceGenerator::_generation = 0;)
+
+jint SequenceGenerator::next() {
+  jint seq = Atomic::add(1, &_seq_number);
+  if (seq < 0) {
+    MemTracker::shutdown(MemTracker::NMT_sequence_overflow);
+  }
+  assert(seq > 0, "counter overflow");
+  NOT_PRODUCT(_max_seq_number = (seq > _max_seq_number) ? seq : _max_seq_number;)
+  return seq;
+}
+
+
+
+bool VMMemRegion::contains(const VMMemRegion* mr) const {
+  assert(base() != 0, "no base address");
+  assert(size() != 0 || committed_size() != 0,
+    "no range");
+  address base_addr = base();
+  address end_addr = base_addr +
+    (is_reserve_record()? reserved_size(): committed_size());
+  if (mr->is_reserve_record()) {
+    if (mr->base() == base_addr && mr->size() == size()) {
+      // the same range
+      return true;
+    }
+    return false;
+  } else if (mr->is_commit_record() || mr->is_uncommit_record()) {
+    assert(mr->base() != 0 && mr->committed_size() > 0,
+      "bad record");
+    return (mr->base() >= base_addr &&
+      (mr->base() + mr->committed_size()) <= end_addr);
+  } else if (mr->is_type_tagging_record()) {
+    assert(mr->base() != 0, "no base");
+    return mr->base() == base_addr;
+  } else if (mr->is_release_record()) {
+    assert(mr->base() != 0 && mr->size() > 0,
+      "bad record");
+    return (mr->base() == base_addr && mr->size() == size());
+  } else {
+    assert(false, "what happened?");
+    return false;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memPtr.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,509 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_MEM_PTR_HPP
+#define SHARE_VM_SERVICES_MEM_PTR_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/os.hpp"
+#include "runtime/safepoint.hpp"
+
+/*
+ * global sequence generator that generates sequence numbers to serialize
+ * memory records.
+ */
+class SequenceGenerator : AllStatic {
+ public:
+  static jint next();
+
+  // peek last sequence number
+  static jint peek() {
+    return _seq_number;
+  }
+
+  // reset sequence number
+  static void reset() {
+    assert(SafepointSynchronize::is_at_safepoint(), "Safepoint required");
+    _seq_number = 1;
+    DEBUG_ONLY(_generation ++;)
+  };
+
+  DEBUG_ONLY(static unsigned long current_generation() { return (unsigned long)_generation; })
+  NOT_PRODUCT(static jint max_seq_num() { return _max_seq_number; })
+
+ private:
+  static volatile jint _seq_number;
+  NOT_PRODUCT(static jint _max_seq_number; )
+  DEBUG_ONLY(static volatile unsigned long _generation; )
+};
+
+/*
+ * followings are the classes that are used to hold memory activity records in different stages.
+ *   MemPointer
+ *     |--------MemPointerRecord
+ *                     |
+ *                     |----MemPointerRecordEx
+ *                     |           |
+ *                     |           |-------SeqMemPointerRecordEx
+ *                     |
+ *                     |----SeqMemPointerRecord
+ *                     |
+ *                     |----VMMemRegion
+ *                               |
+ *                               |-----VMMemRegionEx
+ *
+ *
+ *  prefix 'Seq' - sequenced, the record contains a sequence number
+ *  surfix 'Ex'  - extension, the record contains a caller's pc
+ *
+ *  per-thread recorder : SeqMemPointerRecord(Ex)
+ *  snapshot staging    : SeqMemPointerRecord(Ex)
+ *  snapshot            : MemPointerRecord(Ex) and VMMemRegion(Ex)
+ *
+ */
+
+/*
+ * class that wraps an address to a memory block,
+ * the memory pointer either points to a malloc'd
+ * memory block, or a mmap'd memory block
+ */
+class MemPointer : public _ValueObj {
+ public:
+  MemPointer(): _addr(0) { }
+  MemPointer(address addr): _addr(addr) { }
+
+  MemPointer(const MemPointer& copy_from) {
+    _addr = copy_from.addr();
+  }
+
+  inline address addr() const {
+    return _addr;
+  }
+
+  inline operator address() const {
+    return addr();
+  }
+
+  inline bool operator == (const MemPointer& other) const {
+    return addr() == other.addr();
+  }
+
+  inline MemPointer& operator = (const MemPointer& other) {
+    _addr = other.addr();
+    return *this;
+  }
+
+ protected:
+  inline void set_addr(address addr) { _addr = addr; }
+
+ protected:
+  // memory address
+  address    _addr;
+};
+
+/* MemPointerRecord records an activityand associated
+ * attributes on a memory block.
+ */
+class MemPointerRecord : public MemPointer {
+ private:
+  MEMFLAGS       _flags;
+  size_t         _size;
+
+public:
+  /* extension of MemoryType enum
+   * see share/vm/memory/allocation.hpp for details.
+   *
+   * The tag values are associated to sorting orders, so be
+   * careful if changes are needed.
+   * The allocation records should be sorted ahead of tagging
+   * records, which in turn ahead of deallocation records
+   */
+  enum MemPointerTags {
+    tag_alloc            = 0x0001, // malloc or reserve record
+    tag_commit           = 0x0002, // commit record
+    tag_type             = 0x0003, // tag virtual memory to a memory type
+    tag_uncommit         = 0x0004, // uncommit record
+    tag_release          = 0x0005, // free or release record
+    tag_size             = 0x0006, // arena size
+    tag_masks            = 0x0007, // all tag bits
+    vmBit                = 0x0008
+  };
+
+  /* helper functions to interpret the tagging flags */
+
+  inline static bool is_allocation_record(MEMFLAGS flags) {
+    return (flags & tag_masks) == tag_alloc;
+  }
+
+  inline static bool is_deallocation_record(MEMFLAGS flags) {
+    return (flags & tag_masks) == tag_release;
+  }
+
+  inline static bool is_arena_record(MEMFLAGS flags) {
+    return (flags & (otArena | tag_size)) == otArena;
+  }
+
+  inline static bool is_arena_size_record(MEMFLAGS flags) {
+    return (flags & (otArena | tag_size)) == (otArena | tag_size);
+  }
+
+  inline static bool is_virtual_memory_record(MEMFLAGS flags) {
+    return (flags & vmBit) != 0;
+  }
+
+  inline static bool is_virtual_memory_reserve_record(MEMFLAGS flags) {
+    return (flags & 0x0F) == (tag_alloc | vmBit);
+  }
+
+  inline static bool is_virtual_memory_commit_record(MEMFLAGS flags) {
+    return (flags & 0x0F) == (tag_commit | vmBit);
+  }
+
+  inline static bool is_virtual_memory_uncommit_record(MEMFLAGS flags) {
+    return (flags & 0x0F) == (tag_uncommit | vmBit);
+  }
+
+  inline static bool is_virtual_memory_release_record(MEMFLAGS flags) {
+    return (flags & 0x0F) == (tag_release | vmBit);
+  }
+
+  inline static bool is_virtual_memory_type_record(MEMFLAGS flags) {
+    return (flags & 0x0F) == (tag_type | vmBit);
+  }
+
+  /* tagging flags */
+  inline static MEMFLAGS malloc_tag()                 { return tag_alloc;   }
+  inline static MEMFLAGS free_tag()                   { return tag_release; }
+  inline static MEMFLAGS arena_size_tag()             { return tag_size | otArena; }
+  inline static MEMFLAGS virtual_memory_tag()         { return vmBit; }
+  inline static MEMFLAGS virtual_memory_reserve_tag() { return (tag_alloc | vmBit); }
+  inline static MEMFLAGS virtual_memory_commit_tag()  { return (tag_commit | vmBit); }
+  inline static MEMFLAGS virtual_memory_uncommit_tag(){ return (tag_uncommit | vmBit); }
+  inline static MEMFLAGS virtual_memory_release_tag() { return (tag_release | vmBit); }
+  inline static MEMFLAGS virtual_memory_type_tag()    { return (tag_type | vmBit); }
+
+ public:
+  MemPointerRecord(): _size(0), _flags(mtNone) { }
+
+  MemPointerRecord(address addr, MEMFLAGS memflags, size_t size = 0):
+      MemPointer(addr), _flags(memflags), _size(size) { }
+
+  MemPointerRecord(const MemPointerRecord& copy_from):
+    MemPointer(copy_from), _flags(copy_from.flags()),
+    _size(copy_from.size()) {
+  }
+
+  /* MemPointerRecord is not sequenced, it always return
+   * 0 to indicate non-sequenced
+   */
+  virtual jint seq() const               { return 0; }
+
+  inline size_t   size()  const          { return _size; }
+  inline void set_size(size_t size)      { _size = size; }
+
+  inline MEMFLAGS flags() const          { return _flags; }
+  inline void set_flags(MEMFLAGS flags)  { _flags = flags; }
+
+  MemPointerRecord& operator= (const MemPointerRecord& ptr) {
+    MemPointer::operator=(ptr);
+    _flags = ptr.flags();
+#ifdef ASSERT
+    if (IS_ARENA_OBJ(_flags)) {
+      assert(!is_vm_pointer(), "wrong flags");
+      assert((_flags & ot_masks) == otArena, "wrong flags");
+    }
+#endif
+    _size = ptr.size();
+    return *this;
+  }
+
+  // if the pointer represents a malloc-ed memory address
+  inline bool is_malloced_pointer() const {
+    return !is_vm_pointer();
+  }
+
+  // if the pointer represents a virtual memory address
+  inline bool is_vm_pointer() const {
+    return is_virtual_memory_record(_flags);
+  }
+
+  // if this record records a 'malloc' or virtual memory
+  // 'reserve' call
+  inline bool is_allocation_record() const {
+    return is_allocation_record(_flags);
+  }
+
+  // if this record records a size information of an arena
+  inline bool is_arena_size_record() const {
+    return is_arena_size_record(_flags);
+  }
+
+  // if this pointer represents an address to an arena object
+  inline bool is_arena_record() const {
+    return is_arena_record(_flags);
+  }
+
+  // if this record represents a size information of specific arena
+  inline bool is_size_record_of_arena(const MemPointerRecord* arena_rc) {
+    assert(is_arena_size_record(), "not size record");
+    assert(arena_rc->is_arena_record(), "not arena record");
+    return (arena_rc->addr() + sizeof(void*)) == addr();
+  }
+
+  // if this record records a 'free' or virtual memory 'free' call
+  inline bool is_deallocation_record() const {
+    return is_deallocation_record(_flags);
+  }
+
+  // if this record records a virtual memory 'commit' call
+  inline bool is_commit_record() const {
+    return is_virtual_memory_commit_record(_flags);
+  }
+
+  // if this record records a virtual memory 'uncommit' call
+  inline bool is_uncommit_record() const {
+    return is_virtual_memory_uncommit_record(_flags);
+  }
+
+  // if this record is a tagging record of a virtual memory block
+  inline bool is_type_tagging_record() const {
+    return is_virtual_memory_type_record(_flags);
+  }
+};
+
+// MemPointerRecordEx also records callsite pc, from where
+// the memory block is allocated
+class MemPointerRecordEx : public MemPointerRecord {
+ private:
+  address      _pc;  // callsite pc
+
+ public:
+  MemPointerRecordEx(): _pc(0) { }
+
+  MemPointerRecordEx(address addr, MEMFLAGS memflags, size_t size = 0, address pc = 0):
+    MemPointerRecord(addr, memflags, size), _pc(pc) {}
+
+  MemPointerRecordEx(const MemPointerRecordEx& copy_from):
+    MemPointerRecord(copy_from), _pc(copy_from.pc()) {}
+
+  inline address pc() const { return _pc; }
+
+  void init(const MemPointerRecordEx* mpe) {
+    MemPointerRecord::operator=(*mpe);
+    _pc = mpe->pc();
+  }
+
+  void init(const MemPointerRecord* mp) {
+    MemPointerRecord::operator=(*mp);
+    _pc = 0;
+  }
+};
+
+// a virtual memory region
+class VMMemRegion : public MemPointerRecord {
+ private:
+  // committed size
+  size_t       _committed_size;
+
+public:
+  VMMemRegion(): _committed_size(0) { }
+
+  void init(const MemPointerRecord* mp) {
+    assert(mp->is_vm_pointer(), "not virtual memory pointer");
+    _addr = mp->addr();
+    if (mp->is_commit_record() || mp->is_uncommit_record()) {
+      _committed_size = mp->size();
+      set_size(_committed_size);
+    } else {
+      set_size(mp->size());
+      _committed_size = 0;
+    }
+    set_flags(mp->flags());
+  }
+
+  VMMemRegion& operator=(const VMMemRegion& other) {
+    MemPointerRecord::operator=(other);
+    _committed_size = other.committed_size();
+    return *this;
+  }
+
+  inline bool is_reserve_record() const {
+    return is_virtual_memory_reserve_record(flags());
+  }
+
+  inline bool is_release_record() const {
+    return is_virtual_memory_release_record(flags());
+  }
+
+  // resize reserved VM range
+  inline void set_reserved_size(size_t new_size) {
+    assert(new_size >= committed_size(), "resize");
+    set_size(new_size);
+  }
+
+  inline void commit(size_t size) {
+    _committed_size += size;
+  }
+
+  inline void uncommit(size_t size) {
+    if (_committed_size >= size) {
+      _committed_size -= size;
+    } else {
+      _committed_size = 0;
+    }
+  }
+
+  /*
+   * if this virtual memory range covers whole range of
+   * the other VMMemRegion
+   */
+  bool contains(const VMMemRegion* mr) const;
+
+  /* base address of this virtual memory range */
+  inline address base() const {
+    return addr();
+  }
+
+  /* tag this virtual memory range to the specified memory type */
+  inline void tag(MEMFLAGS f) {
+    set_flags(flags() | (f & mt_masks));
+  }
+
+  // release part of memory range
+  inline void partial_release(address add, size_t sz) {
+    assert(add >= addr() && add < addr() + size(), "not valid address");
+    // for now, it can partially release from the both ends,
+    // but not in the middle
+    assert(add == addr() || (add + sz) == (addr() + size()),
+      "release in the middle");
+    if (add == addr()) {
+      set_addr(add + sz);
+      set_size(size() - sz);
+    } else {
+      set_size(size() - sz);
+    }
+  }
+
+  // the committed size of the virtual memory block
+  inline size_t committed_size() const {
+    return _committed_size;
+  }
+
+  // the reserved size of the virtual memory block
+  inline size_t reserved_size() const {
+    return size();
+  }
+};
+
+class VMMemRegionEx : public VMMemRegion {
+ private:
+  jint   _seq;  // sequence number
+
+ public:
+  VMMemRegionEx(): _pc(0) { }
+
+  void init(const MemPointerRecordEx* mpe) {
+    VMMemRegion::init(mpe);
+    _pc = mpe->pc();
+  }
+
+  void init(const MemPointerRecord* mpe) {
+    VMMemRegion::init(mpe);
+    _pc = 0;
+  }
+
+  VMMemRegionEx& operator=(const VMMemRegionEx& other) {
+    VMMemRegion::operator=(other);
+    _pc = other.pc();
+    return *this;
+  }
+
+  inline address pc() const { return _pc; }
+ private:
+  address   _pc;
+};
+
+/*
+ * Sequenced memory record
+ */
+class SeqMemPointerRecord : public MemPointerRecord {
+ private:
+   jint _seq;  // sequence number
+
+ public:
+  SeqMemPointerRecord(): _seq(0){ }
+
+  SeqMemPointerRecord(address addr, MEMFLAGS flags, size_t size)
+    : MemPointerRecord(addr, flags, size) {
+    _seq = SequenceGenerator::next();
+  }
+
+  SeqMemPointerRecord(const SeqMemPointerRecord& copy_from)
+    : MemPointerRecord(copy_from) {
+    _seq = copy_from.seq();
+  }
+
+  SeqMemPointerRecord& operator= (const SeqMemPointerRecord& ptr) {
+    MemPointerRecord::operator=(ptr);
+    _seq = ptr.seq();
+    return *this;
+  }
+
+  inline jint seq() const {
+    return _seq;
+  }
+};
+
+
+
+class SeqMemPointerRecordEx : public MemPointerRecordEx {
+ private:
+  jint    _seq;  // sequence number
+
+ public:
+  SeqMemPointerRecordEx(): _seq(0) { }
+
+  SeqMemPointerRecordEx(address addr, MEMFLAGS flags, size_t size,
+    address pc): MemPointerRecordEx(addr, flags, size, pc) {
+    _seq = SequenceGenerator::next();
+  }
+
+  SeqMemPointerRecordEx(const SeqMemPointerRecordEx& copy_from)
+    : MemPointerRecordEx(copy_from) {
+    _seq = copy_from.seq();
+  }
+
+  SeqMemPointerRecordEx& operator= (const SeqMemPointerRecordEx& ptr) {
+    MemPointerRecordEx::operator=(ptr);
+    _seq = ptr.seq();
+    return *this;
+  }
+
+  inline jint seq() const {
+    return _seq;
+  }
+};
+
+#endif // SHARE_VM_SERVICES_MEM_PTR_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memPtrArray.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_UTILITIES_MEM_PTR_ARRAY_HPP
+#define SHARE_VM_UTILITIES_MEM_PTR_ARRAY_HPP
+
+#include "memory/allocation.hpp"
+#include "services/memPtr.hpp"
+
+class MemPtr;
+class MemRecorder;
+class ArenaInfo;
+class MemSnapshot;
+
+extern "C" {
+  typedef int (*FN_SORT)(const void *, const void *);
+}
+
+
+// Memory pointer array interface. This array is used by NMT to hold
+// various memory block information.
+// The memory pointer arrays are usually walked with their iterators.
+
+class MemPointerArray : public CHeapObj<mtNMT> {
+ public:
+  virtual ~MemPointerArray() { }
+
+  // return true if it can not allocate storage for the data
+  virtual bool out_of_memory() const = 0;
+  virtual bool is_empty() const = 0;
+  virtual bool is_full() = 0;
+  virtual int  length() const = 0;
+  virtual void clear() = 0;
+  virtual bool append(MemPointer* ptr) = 0;
+  virtual bool insert_at(MemPointer* ptr, int pos) = 0;
+  virtual bool remove_at(int pos) = 0;
+  virtual MemPointer* at(int index) const = 0;
+  virtual void sort(FN_SORT fn) = 0;
+  virtual size_t instance_size() const = 0;
+  virtual bool shrink() = 0;
+
+  NOT_PRODUCT(virtual int capacity() const = 0;)
+};
+
+// Iterator interface
+class MemPointerArrayIterator VALUE_OBJ_CLASS_SPEC {
+ public:
+  // return the pointer at current position
+  virtual MemPointer* current() const = 0;
+  // return the next pointer and advance current position
+  virtual MemPointer* next() = 0;
+  // return next pointer without advancing current position
+  virtual MemPointer* peek_next() const = 0;
+  // return previous pointer without changing current position
+  virtual MemPointer* peek_prev() const = 0;
+  // remove the pointer at current position
+  virtual void        remove() = 0;
+  // insert the pointer at current position
+  virtual bool        insert(MemPointer* ptr) = 0;
+  // insert specified element after current position and
+  // move current position to newly inserted position
+  virtual bool        insert_after(MemPointer* ptr) = 0;
+};
+
+// implementation class
+class MemPointerArrayIteratorImpl : public MemPointerArrayIterator {
+#ifdef ASSERT
+ protected:
+#else
+ private:
+#endif
+  MemPointerArray*  _array;
+  int               _pos;
+
+ public:
+  MemPointerArrayIteratorImpl(MemPointerArray* arr) {
+    assert(arr != NULL, "Parameter check");
+    _array = arr;
+    _pos = 0;
+  }
+
+  virtual MemPointer* current() const {
+    if (_pos < _array->length()) {
+      return _array->at(_pos);
+    }
+    return NULL;
+  }
+
+  virtual MemPointer* next() {
+    if (_pos + 1 < _array->length()) {
+      return _array->at(++_pos);
+    }
+    _pos = _array->length();
+    return NULL;
+  }
+
+  virtual MemPointer* peek_next() const {
+    if (_pos + 1 < _array->length()) {
+      return _array->at(_pos + 1);
+    }
+    return NULL;
+  }
+
+  virtual MemPointer* peek_prev() const {
+    if (_pos > 0) {
+      return _array->at(_pos - 1);
+    }
+    return NULL;
+  }
+
+  virtual void remove() {
+    if (_pos < _array->length()) {
+      _array->remove_at(_pos);
+    }
+  }
+
+  virtual bool insert(MemPointer* ptr) {
+    return _array->insert_at(ptr, _pos);
+  }
+
+  virtual bool insert_after(MemPointer* ptr) {
+    if (_array->insert_at(ptr, _pos + 1)) {
+      _pos ++;
+      return true;
+    }
+    return false;
+  }
+};
+
+
+
+// Memory pointer array implementation.
+// This implementation implements expandable array
+#define DEFAULT_PTR_ARRAY_SIZE 1024
+
+template <class E> class MemPointerArrayImpl : public MemPointerArray {
+ private:
+  int                   _max_size;
+  int                   _size;
+  bool                  _init_elements;
+  E*                    _data;
+
+ public:
+  MemPointerArrayImpl(int initial_size = DEFAULT_PTR_ARRAY_SIZE, bool init_elements = true):
+   _max_size(initial_size), _size(0), _init_elements(init_elements) {
+    _data = (E*)raw_allocate(sizeof(E), initial_size);
+    if (_init_elements) {
+      for (int index = 0; index < _max_size; index ++) {
+        ::new ((void*)&_data[index]) E();
+      }
+    }
+  }
+
+  virtual ~MemPointerArrayImpl() {
+    if (_data != NULL) {
+      raw_free(_data);
+    }
+  }
+
+ public:
+  bool out_of_memory() const {
+    return (_data == NULL);
+  }
+
+  size_t instance_size() const {
+    return sizeof(MemPointerArrayImpl<E>) + _max_size * sizeof(E);
+  }
+
+  bool is_empty() const {
+    assert(_data != NULL, "Just check");
+    return _size == 0;
+  }
+
+  bool is_full() {
+    assert(_data != NULL, "Just check");
+    if (_size < _max_size) {
+      return false;
+    } else {
+      return !expand_array();
+    }
+  }
+
+  int length() const {
+    assert(_data != NULL, "Just check");
+    return _size;
+  }
+
+  NOT_PRODUCT(int capacity() const { return _max_size; })
+
+  void clear() {
+    assert(_data != NULL, "Just check");
+    _size = 0;
+  }
+
+  bool append(MemPointer* ptr) {
+    assert(_data != NULL, "Just check");
+    if (is_full()) {
+      return false;
+    }
+    _data[_size ++] = *(E*)ptr;
+    return true;
+  }
+
+  bool insert_at(MemPointer* ptr, int pos) {
+    assert(_data != NULL, "Just check");
+    if (is_full()) {
+      return false;
+    }
+    for (int index = _size; index > pos; index --) {
+      _data[index] = _data[index - 1];
+    }
+    _data[pos] = *(E*)ptr;
+    _size ++;
+    return true;
+  }
+
+  bool remove_at(int pos) {
+    assert(_data != NULL, "Just check");
+    if (_size <= pos && pos >= 0) {
+      return false;
+    }
+    -- _size;
+
+    for (int index = pos; index < _size; index ++) {
+      _data[index] = _data[index + 1];
+    }
+    return true;
+  }
+
+  MemPointer* at(int index) const {
+    assert(_data != NULL, "Just check");
+    assert(index >= 0 && index < _size, "illegal index");
+    return &_data[index];
+  }
+
+  bool shrink() {
+    float used = ((float)_size) / ((float)_max_size);
+    if (used < 0.40) {
+      E* old_ptr = _data;
+      int new_size = ((_max_size) / (2 * DEFAULT_PTR_ARRAY_SIZE) + 1) * DEFAULT_PTR_ARRAY_SIZE;
+      _data = (E*)raw_reallocate(_data, sizeof(E), new_size);
+      if (_data == NULL) {
+        _data = old_ptr;
+        return false;
+      } else {
+        _max_size = new_size;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  void sort(FN_SORT fn) {
+    assert(_data != NULL, "Just check");
+    qsort((void*)_data, _size, sizeof(E), fn);
+  }
+
+ private:
+  bool  expand_array() {
+    assert(_data != NULL, "Not yet allocated");
+    E* old_ptr = _data;
+    if ((_data = (E*)raw_reallocate((void*)_data, sizeof(E),
+      _max_size + DEFAULT_PTR_ARRAY_SIZE)) == NULL) {
+      _data = old_ptr;
+      return false;
+    } else {
+      _max_size += DEFAULT_PTR_ARRAY_SIZE;
+      if (_init_elements) {
+        for (int index = _size; index < _max_size; index ++) {
+          ::new ((void*)&_data[index]) E();
+        }
+      }
+      return true;
+    }
+  }
+
+  void* raw_allocate(size_t elementSize, int items) {
+    return os::malloc(elementSize * items, mtNMT);
+  }
+
+  void* raw_reallocate(void* ptr, size_t elementSize, int items) {
+    return os::realloc(ptr, elementSize * items, mtNMT);
+  }
+
+  void  raw_free(void* ptr) {
+    os::free(ptr, mtNMT);
+  }
+};
+
+#endif // SHARE_VM_UTILITIES_MEM_PTR_ARRAY_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memRecorder.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "runtime/atomic.hpp"
+#include "services/memBaseline.hpp"
+#include "services/memRecorder.hpp"
+#include "services/memPtr.hpp"
+#include "services/memTracker.hpp"
+
+MemPointer* SequencedRecordIterator::next_record() {
+  MemPointer* itr_cur = _itr.current();
+  if (itr_cur == NULL) return NULL;
+  MemPointer* itr_next = _itr.next();
+
+  while (itr_next != NULL &&
+    same_kind((MemPointerRecord*)itr_cur, (MemPointerRecord*)itr_next)) {
+    itr_cur = itr_next;
+    itr_next = _itr.next();
+  }
+
+  return itr_cur;
+}
+
+
+volatile jint MemRecorder::_instance_count = 0;
+
+MemRecorder::MemRecorder() {
+  assert(MemTracker::is_on(), "Native memory tracking is off");
+  Atomic::inc(&_instance_count);
+  debug_only(set_generation();)
+
+  if (MemTracker::track_callsite()) {
+    _pointer_records = new (std::nothrow)FixedSizeMemPointerArray<SeqMemPointerRecordEx,
+        DEFAULT_RECORDER_PTR_ARRAY_SIZE>();
+  } else {
+    _pointer_records = new (std::nothrow)FixedSizeMemPointerArray<SeqMemPointerRecord,
+        DEFAULT_RECORDER_PTR_ARRAY_SIZE>();
+  }
+  _next = NULL;
+
+
+  if (_pointer_records != NULL) {
+    // recode itself
+    record((address)this, (MemPointerRecord::malloc_tag()|mtNMT|otNMTRecorder),
+        sizeof(MemRecorder), CALLER_PC);
+    record((address)_pointer_records, (MemPointerRecord::malloc_tag()|mtNMT|otNMTRecorder),
+        _pointer_records->instance_size(),CURRENT_PC);
+  }
+}
+
+MemRecorder::~MemRecorder() {
+  if (_pointer_records != NULL) {
+    if (MemTracker::is_on()) {
+      MemTracker::record_free((address)_pointer_records, mtNMT);
+      MemTracker::record_free((address)this, mtNMT);
+    }
+    delete _pointer_records;
+  }
+  if (_next != NULL) {
+    delete _next;
+  }
+
+  Atomic::dec(&_instance_count);
+}
+
+// Sorting order:
+//   1. memory block address
+//   2. mem pointer record tags
+//   3. sequence number
+int MemRecorder::sort_record_fn(const void* e1, const void* e2) {
+  const MemPointerRecord* p1 = (const MemPointerRecord*)e1;
+  const MemPointerRecord* p2 = (const MemPointerRecord*)e2;
+  int delta = UNSIGNED_COMPARE(p1->addr(), p2->addr());
+  if (delta == 0) {
+    int df = UNSIGNED_COMPARE((p1->flags() & MemPointerRecord::tag_masks),
+                              (p2->flags() & MemPointerRecord::tag_masks));
+    if (df == 0) {
+      assert(p1->seq() != p2->seq(), "dup seq");
+      return p1->seq() - p2->seq();
+    } else {
+      return df;
+    }
+  } else {
+    return delta;
+  }
+}
+
+bool MemRecorder::record(address p, MEMFLAGS flags, size_t size, address pc) {
+#ifdef ASSERT
+  if (MemPointerRecord::is_virtual_memory_record(flags)) {
+    assert((flags & MemPointerRecord::tag_masks) != 0, "bad virtual memory record");
+  } else {
+    assert((flags & MemPointerRecord::tag_masks) == MemPointerRecord::malloc_tag() ||
+           (flags & MemPointerRecord::tag_masks) == MemPointerRecord::free_tag() ||
+           IS_ARENA_OBJ(flags),
+           "bad malloc record");
+  }
+  // a recorder should only hold records within the same generation
+  unsigned long cur_generation = SequenceGenerator::current_generation();
+  assert(cur_generation == _generation,
+         "this thread did not enter sync point");
+#endif
+
+  if (MemTracker::track_callsite()) {
+    SeqMemPointerRecordEx ap(p, flags, size, pc);
+    debug_only(check_dup_seq(ap.seq());)
+    return _pointer_records->append(&ap);
+  } else {
+    SeqMemPointerRecord ap(p, flags, size);
+    debug_only(check_dup_seq(ap.seq());)
+    return _pointer_records->append(&ap);
+  }
+}
+
+  // iterator for alloc pointers
+SequencedRecordIterator MemRecorder::pointer_itr() {
+  assert(_pointer_records != NULL, "just check");
+  _pointer_records->sort((FN_SORT)sort_record_fn);
+  return SequencedRecordIterator(_pointer_records);
+}
+
+
+#ifdef ASSERT
+void MemRecorder::set_generation() {
+  _generation = SequenceGenerator::current_generation();
+}
+
+void MemRecorder::check_dup_seq(jint seq) const {
+  MemPointerArrayIteratorImpl itr(_pointer_records);
+  MemPointerRecord* rc = (MemPointerRecord*)itr.current();
+  while (rc != NULL) {
+    assert(rc->seq() != seq, "dup seq");
+    rc = (MemPointerRecord*)itr.next();
+  }
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memRecorder.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_MEM_RECORDER_HPP
+#define SHARE_VM_SERVICES_MEM_RECORDER_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/os.hpp"
+#include "services/memPtrArray.hpp"
+
+class MemSnapshot;
+class MemTracker;
+class MemTrackWorker;
+
+// Fixed size memory pointer array implementation
+template <class E, int SIZE> class FixedSizeMemPointerArray :
+  public MemPointerArray {
+  // This implementation is for memory recorder only
+  friend class MemRecorder;
+
+ private:
+  E      _data[SIZE];
+  int    _size;
+
+ protected:
+  FixedSizeMemPointerArray(bool init_elements = false):
+   _size(0){
+    if (init_elements) {
+      for (int index = 0; index < SIZE; index ++) {
+        ::new ((void*)&_data[index]) E();
+      }
+    }
+  }
+
+  void* operator new(size_t size, const std::nothrow_t& nothrow_constant) {
+    // the instance is part of memRecorder, needs to be tagged with 'otNMTRecorder'
+    // to avoid recursion
+    return os::malloc(size, (mtNMT | otNMTRecorder));
+  }
+
+  void* operator new(size_t size) {
+    assert(false, "use nothrow version");
+    return NULL;
+  }
+
+  void operator delete(void* p) {
+    os::free(p, (mtNMT | otNMTRecorder));
+  }
+
+  // instance size
+  inline size_t instance_size() const {
+    return sizeof(FixedSizeMemPointerArray<E, SIZE>);
+  }
+
+  NOT_PRODUCT(int capacity() const { return SIZE; })
+
+ public:
+  // implementation of public interface
+  bool out_of_memory() const { return false; }
+  bool is_empty()      const { return _size == 0; }
+  bool is_full()             { return length() >= SIZE; }
+  int  length()        const { return _size; }
+
+  void clear() {
+    _size = 0;
+  }
+
+  bool append(MemPointer* ptr) {
+    if (is_full()) return false;
+    _data[_size ++] = *(E*)ptr;
+    return true;
+  }
+
+  virtual bool insert_at(MemPointer* p, int pos) {
+    assert(false, "append only");
+    return false;
+  }
+
+  virtual bool remove_at(int pos) {
+    assert(false, "not supported");
+    return false;
+  }
+
+  MemPointer* at(int index) const {
+    assert(index >= 0 && index < length(),
+      "parameter check");
+    return ((E*)&_data[index]);
+  }
+
+  void sort(FN_SORT fn) {
+    qsort((void*)_data, _size, sizeof(E), fn);
+  }
+
+  bool shrink() {
+    return false;
+  }
+};
+
+
+// This iterator requires pre-sorted MemPointerArray, which is sorted by:
+//  1. address
+//  2. allocation type
+//  3. sequence number
+// During the array walking, iterator collapses pointers with the same
+// address and allocation type, and only returns the one with highest
+// sequence number.
+//
+// This is read-only iterator, update methods are asserted.
+class SequencedRecordIterator : public MemPointerArrayIterator {
+ private:
+   MemPointerArrayIteratorImpl _itr;
+   MemPointer*                 _cur;
+
+ public:
+  SequencedRecordIterator(const MemPointerArray* arr):
+    _itr(const_cast<MemPointerArray*>(arr)) {
+    _cur = next_record();
+  }
+
+  SequencedRecordIterator(const SequencedRecordIterator& itr):
+    _itr(itr._itr) {
+    _cur = next_record();
+  }
+
+  // return the pointer at current position
+  virtual MemPointer* current() const {
+    return _cur;
+  };
+
+  // return the next pointer and advance current position
+  virtual MemPointer* next() {
+    _cur = next_record();
+    return _cur;
+  }
+
+  // return the next pointer without advancing current position
+  virtual MemPointer* peek_next() const {
+    assert(false, "not implemented");
+    return NULL;
+
+  }
+  // return the previous pointer without changing current position
+  virtual MemPointer* peek_prev() const {
+    assert(false, "not implemented");
+    return NULL;
+  }
+
+  // remove the pointer at current position
+  virtual void remove() {
+    assert(false, "read-only iterator");
+  };
+  // insert the pointer at current position
+  virtual bool insert(MemPointer* ptr) {
+    assert(false, "read-only iterator");
+    return false;
+  }
+
+  virtual bool insert_after(MemPointer* ptr) {
+    assert(false, "read-only iterator");
+    return false;
+  }
+ private:
+  // collapse the 'same kind' of records, and return this 'kind' of
+  // record with highest sequence number
+  MemPointer* next_record();
+
+  // Test if the two records are the same kind: the same memory block and allocation
+  // type.
+  inline bool same_kind(const MemPointerRecord* p1, const MemPointerRecord* p2) const {
+    return (p1->addr() == p2->addr() &&
+      (p1->flags() &MemPointerRecord::tag_masks) ==
+      (p2->flags() & MemPointerRecord::tag_masks));
+  }
+};
+
+
+
+#define DEFAULT_RECORDER_PTR_ARRAY_SIZE 512
+
+class MemRecorder : public CHeapObj<mtNMT|otNMTRecorder> {
+  friend class MemSnapshot;
+  friend class MemTracker;
+  friend class MemTrackWorker;
+
+ protected:
+  // the array that holds memory records
+  MemPointerArray*         _pointer_records;
+
+ private:
+  // used for linked list
+  MemRecorder*             _next;
+  // active recorder can only record a certain generation data
+  debug_only(unsigned long _generation;)
+
+ protected:
+  _NOINLINE_ MemRecorder();
+  ~MemRecorder();
+
+  // record a memory operation
+  bool record(address addr, MEMFLAGS flags, size_t size, address caller_pc = 0);
+
+  // linked list support
+  inline void set_next(MemRecorder* rec) {
+    _next = rec;
+  }
+
+  inline MemRecorder* next() const {
+    return _next;
+  }
+
+  // if the recorder is full
+  inline bool is_full() const {
+    assert(_pointer_records != NULL, "just check");
+    return _pointer_records->is_full();
+  }
+
+  // if running out of memory when initializing recorder's internal
+  // data
+  inline bool out_of_memory() const {
+    return (_pointer_records == NULL ||
+      _pointer_records->out_of_memory());
+  }
+
+  inline void clear() {
+    assert(_pointer_records != NULL, "Just check");
+    _pointer_records->clear();
+  }
+
+  SequencedRecordIterator pointer_itr();
+
+ protected:
+  // number of MemRecorder instance
+  static volatile jint _instance_count;
+
+ private:
+  // sorting function, sort records into following order
+  // 1. memory address
+  // 2. allocation type
+  // 3. sequence number
+  static int sort_record_fn(const void* e1, const void* e2);
+
+  debug_only(void check_dup_seq(jint seq) const;)
+  debug_only(void set_generation();)
+};
+
+#endif // SHARE_VM_SERVICES_MEM_RECORDER_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memReporter.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "runtime/os.hpp"
+#include "services/memReporter.hpp"
+#include "services/memPtrArray.hpp"
+#include "services/memTracker.hpp"
+
+const char* BaselineOutputer::memory_unit(size_t scale) {
+  switch(scale) {
+    case K: return "KB";
+    case M: return "MB";
+    case G: return "GB";
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+
+void BaselineReporter::report_baseline(const MemBaseline& baseline, bool summary_only) {
+  assert(MemTracker::is_on(), "Native memory tracking is off");
+  _outputer.start(scale());
+  _outputer.total_usage(
+    amount_in_current_scale(baseline.total_malloc_amount() + baseline.total_reserved_amount()),
+    amount_in_current_scale(baseline.total_malloc_amount() + baseline.total_committed_amount()));
+
+  _outputer.num_of_classes(baseline.number_of_classes());
+  _outputer.num_of_threads(baseline.number_of_threads());
+
+  report_summaries(baseline);
+  if (!summary_only && MemTracker::track_callsite()) {
+    report_callsites(baseline);
+  }
+  _outputer.done();
+}
+
+void BaselineReporter::report_summaries(const MemBaseline& baseline) {
+  _outputer.start_category_summary();
+  MEMFLAGS type;
+
+  for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
+    type = MemBaseline::MemType2NameMap[index]._flag;
+    _outputer.category_summary(type,
+      amount_in_current_scale(baseline.reserved_amount(type)),
+      amount_in_current_scale(baseline.committed_amount(type)),
+      amount_in_current_scale(baseline.malloc_amount(type)),
+      baseline.malloc_count(type),
+      amount_in_current_scale(baseline.arena_amount(type)),
+      baseline.arena_count(type));
+  }
+
+  _outputer.done_category_summary();
+}
+
+void BaselineReporter::report_callsites(const MemBaseline& baseline) {
+  _outputer.start_callsite();
+  MemBaseline* pBL = const_cast<MemBaseline*>(&baseline);
+
+  pBL->_malloc_cs->sort((FN_SORT)MemBaseline::bl_malloc_sort_by_size);
+  pBL->_vm_cs->sort((FN_SORT)MemBaseline::bl_vm_sort_by_size);
+
+  // walk malloc callsites
+  MemPointerArrayIteratorImpl malloc_itr(pBL->_malloc_cs);
+  MallocCallsitePointer*      malloc_callsite =
+                  (MallocCallsitePointer*)malloc_itr.current();
+  while (malloc_callsite != NULL) {
+    _outputer.malloc_callsite(malloc_callsite->addr(),
+        amount_in_current_scale(malloc_callsite->amount()), malloc_callsite->count());
+    malloc_callsite = (MallocCallsitePointer*)malloc_itr.next();
+  }
+
+  // walk virtual memory callsite
+  MemPointerArrayIteratorImpl vm_itr(pBL->_vm_cs);
+  VMCallsitePointer*          vm_callsite = (VMCallsitePointer*)vm_itr.current();
+  while (vm_callsite != NULL) {
+    _outputer.virtual_memory_callsite(vm_callsite->addr(),
+      amount_in_current_scale(vm_callsite->reserved_amount()),
+      amount_in_current_scale(vm_callsite->committed_amount()));
+    vm_callsite = (VMCallsitePointer*)vm_itr.next();
+  }
+  pBL->_malloc_cs->sort((FN_SORT)MemBaseline::bl_malloc_sort_by_pc);
+  pBL->_vm_cs->sort((FN_SORT)MemBaseline::bl_vm_sort_by_pc);
+  _outputer.done_callsite();
+}
+
+void BaselineReporter::diff_baselines(const MemBaseline& cur, const MemBaseline& prev,
+  bool summary_only) {
+  assert(MemTracker::is_on(), "Native memory tracking is off");
+  _outputer.start(scale());
+  size_t total_reserved = cur.total_malloc_amount() + cur.total_reserved_amount();
+  size_t total_committed = cur.total_malloc_amount() + cur.total_committed_amount();
+
+  _outputer.diff_total_usage(
+    amount_in_current_scale(total_reserved), amount_in_current_scale(total_committed),
+    diff_in_current_scale(total_reserved,  (prev.total_malloc_amount() + prev.total_reserved_amount())),
+    diff_in_current_scale(total_committed, (prev.total_committed_amount() + prev.total_malloc_amount())));
+
+  _outputer.diff_num_of_classes(cur.number_of_classes(),
+       diff(cur.number_of_classes(), prev.number_of_classes()));
+  _outputer.diff_num_of_threads(cur.number_of_threads(),
+       diff(cur.number_of_threads(), prev.number_of_threads()));
+
+  diff_summaries(cur, prev);
+  if (!summary_only && MemTracker::track_callsite()) {
+    diff_callsites(cur, prev);
+  }
+  _outputer.done();
+}
+
+void BaselineReporter::diff_summaries(const MemBaseline& cur, const MemBaseline& prev) {
+  _outputer.start_category_summary();
+  MEMFLAGS type;
+
+  for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
+    type = MemBaseline::MemType2NameMap[index]._flag;
+    _outputer.diff_category_summary(type,
+      amount_in_current_scale(cur.reserved_amount(type)),
+      amount_in_current_scale(cur.committed_amount(type)),
+      amount_in_current_scale(cur.malloc_amount(type)),
+      cur.malloc_count(type),
+      amount_in_current_scale(cur.arena_amount(type)),
+      cur.arena_count(type),
+      diff_in_current_scale(cur.reserved_amount(type), prev.reserved_amount(type)),
+      diff_in_current_scale(cur.committed_amount(type), prev.committed_amount(type)),
+      diff_in_current_scale(cur.malloc_amount(type), prev.malloc_amount(type)),
+      diff(cur.malloc_count(type), prev.malloc_count(type)),
+      diff_in_current_scale(cur.arena_amount(type), prev.arena_amount(type)),
+      diff(cur.arena_count(type), prev.arena_count(type)));
+  }
+
+  _outputer.done_category_summary();
+}
+
+void BaselineReporter::diff_callsites(const MemBaseline& cur, const MemBaseline& prev) {
+  _outputer.start_callsite();
+  MemBaseline* pBL_cur = const_cast<MemBaseline*>(&cur);
+  MemBaseline* pBL_prev = const_cast<MemBaseline*>(&prev);
+
+  // walk malloc callsites
+  MemPointerArrayIteratorImpl cur_malloc_itr(pBL_cur->_malloc_cs);
+  MemPointerArrayIteratorImpl prev_malloc_itr(pBL_prev->_malloc_cs);
+
+  MallocCallsitePointer*      cur_malloc_callsite =
+                  (MallocCallsitePointer*)cur_malloc_itr.current();
+  MallocCallsitePointer*      prev_malloc_callsite =
+                  (MallocCallsitePointer*)prev_malloc_itr.current();
+
+  while (cur_malloc_callsite != NULL || prev_malloc_callsite != NULL) {
+    if (prev_malloc_callsite == NULL ||
+        cur_malloc_callsite->addr() < prev_malloc_callsite->addr()) {
+      _outputer.diff_malloc_callsite(cur_malloc_callsite->addr(),
+        amount_in_current_scale(cur_malloc_callsite->amount()),
+        cur_malloc_callsite->count(),
+        diff_in_current_scale(cur_malloc_callsite->amount(), 0),
+        diff(cur_malloc_callsite->count(), 0));
+      cur_malloc_callsite = (MallocCallsitePointer*)cur_malloc_itr.next();
+    } else if (prev_malloc_callsite == NULL ||
+               cur_malloc_callsite->addr() > prev_malloc_callsite->addr()) {
+      _outputer.diff_malloc_callsite(cur_malloc_callsite->addr(),
+        amount_in_current_scale(prev_malloc_callsite->amount()),
+        prev_malloc_callsite->count(),
+        diff_in_current_scale(0, prev_malloc_callsite->amount()),
+        diff(0, prev_malloc_callsite->count()));
+      prev_malloc_callsite = (MallocCallsitePointer*)prev_malloc_itr.next();
+    } else {  // the same callsite
+      _outputer.diff_malloc_callsite(cur_malloc_callsite->addr(),
+        amount_in_current_scale(cur_malloc_callsite->amount()),
+        cur_malloc_callsite->count(),
+        diff_in_current_scale(cur_malloc_callsite->amount(), prev_malloc_callsite->amount()),
+        diff(cur_malloc_callsite->count(), prev_malloc_callsite->count()));
+      cur_malloc_callsite = (MallocCallsitePointer*)cur_malloc_itr.next();
+      prev_malloc_callsite = (MallocCallsitePointer*)prev_malloc_itr.next();
+    }
+  }
+
+  // walk virtual memory callsite
+  MemPointerArrayIteratorImpl cur_vm_itr(pBL_cur->_vm_cs);
+  MemPointerArrayIteratorImpl prev_vm_itr(pBL_prev->_vm_cs);
+  VMCallsitePointer*          cur_vm_callsite = (VMCallsitePointer*)cur_vm_itr.current();
+  VMCallsitePointer*          prev_vm_callsite = (VMCallsitePointer*)prev_vm_itr.current();
+  while (cur_vm_callsite != NULL || prev_vm_callsite != NULL) {
+    if (prev_vm_callsite == NULL || cur_vm_callsite->addr() < prev_vm_callsite->addr()) {
+      _outputer.diff_virtual_memory_callsite(cur_vm_callsite->addr(),
+        amount_in_current_scale(cur_vm_callsite->reserved_amount()),
+        amount_in_current_scale(cur_vm_callsite->committed_amount()),
+        diff_in_current_scale(cur_vm_callsite->reserved_amount(), 0),
+        diff_in_current_scale(cur_vm_callsite->committed_amount(), 0));
+      cur_vm_callsite = (VMCallsitePointer*)cur_vm_itr.next();
+    } else if (cur_vm_callsite == NULL || cur_vm_callsite->addr() > prev_vm_callsite->addr()) {
+      _outputer.diff_virtual_memory_callsite(prev_vm_callsite->addr(),
+        amount_in_current_scale(prev_vm_callsite->reserved_amount()),
+        amount_in_current_scale(prev_vm_callsite->committed_amount()),
+        diff_in_current_scale(0, prev_vm_callsite->reserved_amount()),
+        diff_in_current_scale(0, prev_vm_callsite->committed_amount()));
+      prev_vm_callsite = (VMCallsitePointer*)prev_vm_itr.next();
+    } else { // the same callsite
+      _outputer.diff_virtual_memory_callsite(cur_vm_callsite->addr(),
+        amount_in_current_scale(cur_vm_callsite->reserved_amount()),
+        amount_in_current_scale(cur_vm_callsite->committed_amount()),
+        diff_in_current_scale(cur_vm_callsite->reserved_amount(), prev_vm_callsite->reserved_amount()),
+        diff_in_current_scale(cur_vm_callsite->committed_amount(), prev_vm_callsite->committed_amount()));
+      cur_vm_callsite  = (VMCallsitePointer*)cur_vm_itr.next();
+      prev_vm_callsite = (VMCallsitePointer*)prev_vm_itr.next();
+    }
+  }
+
+  _outputer.done_callsite();
+}
+
+size_t BaselineReporter::amount_in_current_scale(size_t amt) const {
+  return (size_t)(((float)amt/(float)_scale) + 0.5);
+}
+
+int BaselineReporter::diff_in_current_scale(size_t value1, size_t value2) const {
+  return (int)(((float)value1 - (float)value2)/((float)_scale) + 0.5);
+}
+
+int BaselineReporter::diff(size_t value1, size_t value2) const {
+  return ((int)value1 - (int)value2);
+}
+
+void BaselineTTYOutputer::start(size_t scale, bool report_diff) {
+  _scale = scale;
+  _output->print_cr(" ");
+  _output->print_cr("Native Memory Tracking:");
+  _output->print_cr(" ");
+}
+
+void BaselineTTYOutputer::done() {
+
+}
+
+void BaselineTTYOutputer::total_usage(size_t total_reserved, size_t total_committed) {
+  const char* unit = memory_unit(_scale);
+  _output->print_cr("Total:  reserved=%d%s,  committed=%d%s",
+    total_reserved, unit, total_committed, unit);
+}
+
+void BaselineTTYOutputer::start_category_summary() {
+  _output->print_cr(" ");
+}
+
+/**
+ * report a summary of memory type
+ */
+void BaselineTTYOutputer::category_summary(MEMFLAGS type,
+  size_t reserved_amt, size_t committed_amt, size_t malloc_amt,
+  size_t malloc_count, size_t arena_amt, size_t arena_count) {
+
+  // we report mtThreadStack under mtThread category
+  if (type == mtThreadStack) {
+    assert(malloc_amt == 0 && malloc_count == 0 && arena_amt == 0,
+      "Just check");
+    _thread_stack_reserved = reserved_amt;
+    _thread_stack_committed = committed_amt;
+  } else {
+    const char* unit = memory_unit(_scale);
+    size_t total_reserved = (reserved_amt + malloc_amt + arena_amt);
+    size_t total_committed = (committed_amt + malloc_amt + arena_amt);
+    if (type == mtThread) {
+      total_reserved += _thread_stack_reserved;
+      total_committed += _thread_stack_committed;
+    }
+
+    if (total_reserved > 0) {
+      _output->print_cr("-%26s (reserved=%d%s, committed=%d%s)",
+        MemBaseline::type2name(type), total_reserved, unit,
+        total_committed, unit);
+
+      if (type == mtClass) {
+        _output->print_cr("%27s (classes #%d)", " ", _num_of_classes);
+      } else if (type == mtThread) {
+        _output->print_cr("%27s (thread #%d)", " ", _num_of_threads);
+        _output->print_cr("%27s (stack: reserved=%d%s, committed=%d%s)", " ",
+          _thread_stack_reserved, unit, _thread_stack_committed, unit);
+      }
+
+      if (malloc_amt > 0) {
+        if (type != mtChunk) {
+          _output->print_cr("%27s (malloc=%d%s, #%d)", " ", malloc_amt, unit,
+            malloc_count);
+        } else {
+          _output->print_cr("%27s (malloc=%d%s)", " ", malloc_amt, unit);
+        }
+      }
+
+      if (reserved_amt > 0) {
+        _output->print_cr("%27s (mmap: reserved=%d%s, committed=%d%s)",
+          " ", reserved_amt, unit, committed_amt, unit);
+      }
+
+      if (arena_amt > 0) {
+        _output->print_cr("%27s (arena=%d%s, #%d)", " ", arena_amt, unit, arena_count);
+      }
+
+      _output->print_cr(" ");
+    }
+  }
+}
+
+void BaselineTTYOutputer::done_category_summary() {
+  _output->print_cr(" ");
+}
+
+void BaselineTTYOutputer::start_callsite() {
+  _output->print_cr("Details:");
+  _output->print_cr(" ");
+}
+
+void BaselineTTYOutputer::done_callsite() {
+  _output->print_cr(" ");
+}
+
+void BaselineTTYOutputer::malloc_callsite(address pc, size_t malloc_amt,
+  size_t malloc_count) {
+  if (malloc_amt > 0) {
+    const char* unit = memory_unit(_scale);
+    char buf[64];
+    int  offset;
+    if (pc == 0) {
+      _output->print("[BOOTSTRAP]%18s", " ");
+    } else if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+      _output->print_cr("[" PTR_FORMAT "] %s+0x%x", pc, buf, offset);
+      _output->print("%28s", " ");
+    } else {
+      _output->print("[" PTR_FORMAT "]%18s", pc, " ");
+    }
+
+    _output->print_cr("(malloc=%d%s #%d)", malloc_amt, unit, malloc_count);
+    _output->print_cr(" ");
+  }
+}
+
+void BaselineTTYOutputer::virtual_memory_callsite(address pc, size_t reserved_amt,
+  size_t committed_amt) {
+  if (reserved_amt > 0) {
+    const char* unit = memory_unit(_scale);
+    char buf[64];
+    int  offset;
+    if (pc == 0) {
+      _output->print("[BOOTSTRAP]%18s", " ");
+    } else if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+      _output->print_cr("[" PTR_FORMAT "] %s+0x%x", pc, buf, offset);
+      _output->print("%28s", " ");
+    } else {
+      _output->print("[" PTR_FORMAT "]%18s", " ");
+    }
+
+    _output->print_cr("(mmap: reserved=%d%s, committed=%d%s)",
+      reserved_amt, unit, committed_amt, unit);
+    _output->print_cr(" ");
+  }
+}
+
+void BaselineTTYOutputer::diff_total_usage(size_t total_reserved,
+  size_t total_committed, int reserved_diff, int committed_diff) {
+  const char* unit = memory_unit(_scale);
+  _output->print_cr("Total:  reserved=%d%s  %+d%s, committed=%d%s %+d%s",
+    total_reserved, unit, reserved_diff, unit, total_committed, unit,
+    committed_diff, unit);
+}
+
+void BaselineTTYOutputer::diff_category_summary(MEMFLAGS type,
+  size_t cur_reserved_amt, size_t cur_committed_amt,
+  size_t cur_malloc_amt, size_t cur_malloc_count,
+  size_t cur_arena_amt, size_t cur_arena_count,
+  int reserved_diff, int committed_diff, int malloc_diff,
+  int malloc_count_diff, int arena_diff, int arena_count_diff) {
+
+  if (type == mtThreadStack) {
+    assert(cur_malloc_amt == 0 && cur_malloc_count == 0 &&
+      cur_arena_amt == 0, "Just check");
+    _thread_stack_reserved = cur_reserved_amt;
+    _thread_stack_committed = cur_committed_amt;
+    _thread_stack_reserved_diff = reserved_diff;
+    _thread_stack_committed_diff = committed_diff;
+  } else {
+    const char* unit = memory_unit(_scale);
+    size_t total_reserved = (cur_reserved_amt + cur_malloc_amt + cur_arena_amt);
+    // nothing to report in this category
+    if (total_reserved == 0) {
+      return;
+    }
+    int    diff_reserved = (reserved_diff + malloc_diff + arena_diff);
+
+    // category summary
+    _output->print("-%26s (reserved=%d%s", MemBaseline::type2name(type),
+      total_reserved, unit);
+
+    if (diff_reserved != 0) {
+      _output->print(" %+d%s", diff_reserved, unit);
+    }
+
+    size_t total_committed = cur_committed_amt + cur_malloc_amt + cur_arena_amt;
+    _output->print(", committed=%d%s", total_committed, unit);
+
+    int total_committed_diff = committed_diff + malloc_diff + arena_diff;
+    if (total_committed_diff != 0) {
+      _output->print(" %+d%s", total_committed_diff, unit);
+    }
+
+    _output->print_cr(")");
+
+    // special cases
+    if (type == mtClass) {
+      _output->print("%27s (classes #%d", " ", _num_of_classes);
+      if (_num_of_classes_diff != 0) {
+        _output->print(" %+d", _num_of_classes_diff);
+      }
+      _output->print_cr(")");
+    } else if (type == mtThread) {
+      // thread count
+      _output->print("%27s (thread #%d", " ", _num_of_threads);
+      if (_num_of_threads_diff != 0) {
+        _output->print_cr(" %+d)", _num_of_threads_diff);
+      } else {
+        _output->print_cr(")");
+      }
+      _output->print("%27s (stack: reserved=%d%s", " ", _thread_stack_reserved, unit);
+      if (_thread_stack_reserved_diff != 0) {
+        _output->print(" %+d%s", _thread_stack_reserved_diff, unit);
+      }
+
+      _output->print(", committed=%d%s", _thread_stack_committed, unit);
+      if (_thread_stack_committed_diff != 0) {
+        _output->print(" %+d%s",_thread_stack_committed_diff, unit);
+      }
+
+      _output->print_cr(")");
+    }
+
+    // malloc'd memory
+    if (cur_malloc_amt > 0) {
+      _output->print("%27s (malloc=%d%s", " ", cur_malloc_amt, unit);
+      if (malloc_diff != 0) {
+        _output->print(" %+d%s", malloc_diff, unit);
+      }
+      if (type != mtChunk) {
+        _output->print(", #%d", cur_malloc_count);
+        if (malloc_count_diff) {
+          _output->print(" %+d", malloc_count_diff);
+        }
+      }
+      _output->print_cr(")");
+    }
+
+    // mmap'd memory
+    if (cur_reserved_amt > 0) {
+      _output->print("%27s (mmap: reserved=%d%s", " ", cur_reserved_amt, unit);
+      if (reserved_diff != 0) {
+        _output->print(" %+d%s", reserved_diff, unit);
+      }
+
+      _output->print(", committed=%d%s", cur_committed_amt, unit);
+      if (committed_diff != 0) {
+        _output->print(" %+d%s", committed_diff, unit);
+      }
+      _output->print_cr(")");
+    }
+
+    // arena memory
+    if (cur_arena_amt > 0) {
+      _output->print("%27s (arena=%d%s", " ", cur_arena_amt, unit);
+      if (arena_diff != 0) {
+        _output->print(" %+d%s", arena_diff, unit);
+      }
+      _output->print(", #%d", cur_arena_count);
+      if (arena_count_diff != 0) {
+        _output->print(" %+d", arena_count_diff);
+      }
+      _output->print_cr(")");
+    }
+
+    _output->print_cr(" ");
+  }
+}
+
+void BaselineTTYOutputer::diff_malloc_callsite(address pc,
+    size_t cur_malloc_amt, size_t cur_malloc_count,
+    int malloc_diff, int malloc_count_diff) {
+  if (malloc_diff != 0) {
+    const char* unit = memory_unit(_scale);
+    char buf[64];
+    int  offset;
+    if (pc == 0) {
+      _output->print_cr("[BOOTSTRAP]%18s", " ");
+    } else {
+      if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+        _output->print_cr("[" PTR_FORMAT "] %s+0x%x", pc, buf, offset);
+        _output->print("%28s", " ");
+      } else {
+        _output->print("[" PTR_FORMAT "]%18s", pc, " ");
+      }
+    }
+
+    _output->print("(malloc=%d%s", cur_malloc_amt, unit);
+    if (malloc_diff != 0) {
+      _output->print(" %+d%s", malloc_diff, unit);
+    }
+    _output->print(", #%d", cur_malloc_count);
+    if (malloc_count_diff != 0) {
+      _output->print(" %+d", malloc_count_diff);
+    }
+    _output->print_cr(")");
+    _output->print_cr(" ");
+  }
+}
+
+void BaselineTTYOutputer::diff_virtual_memory_callsite(address pc,
+    size_t cur_reserved_amt, size_t cur_committed_amt,
+    int reserved_diff, int committed_diff) {
+  if (reserved_diff != 0 || committed_diff != 0) {
+    const char* unit = memory_unit(_scale);
+    char buf[64];
+    int  offset;
+    if (pc == 0) {
+      _output->print_cr("[BOOSTRAP]%18s", " ");
+    } else {
+      if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+        _output->print_cr("[" PTR_FORMAT "] %s+0x%x", pc, buf, offset);
+        _output->print("%28s", " ");
+      } else {
+        _output->print("[" PTR_FORMAT "]%18s", " ");
+      }
+    }
+
+    _output->print("(mmap: reserved=%d%s", cur_reserved_amt, unit);
+    if (reserved_diff != 0) {
+      _output->print(" %+d%s", reserved_diff, unit);
+    }
+    _output->print(", committed=%d%s", cur_committed_amt, unit);
+    if (committed_diff != 0) {
+      _output->print(" %+d%s", committed_diff, unit);
+    }
+    _output->print_cr(")");
+    _output->print_cr(" ");
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memReporter.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_MEM_REPORTER_HPP
+#define SHARE_VM_SERVICES_MEM_REPORTER_HPP
+
+#include "runtime/mutexLocker.hpp"
+#include "services/memBaseline.hpp"
+#include "services/memTracker.hpp"
+#include "utilities/ostream.hpp"
+
+/*
+ * MemBaselineReporter reports data to this outputer class,
+ * ReportOutputer is responsible for format, store and redirect
+ * the data to the final destination.
+ */
+class BaselineOutputer : public StackObj {
+ public:
+  // start to report memory usage in specified scale.
+  // if report_diff = true, the reporter reports baseline comparison
+  // information.
+
+  virtual void start(size_t scale, bool report_diff = false) = 0;
+  // Done reporting
+  virtual void done() = 0;
+
+  /* report baseline summary information */
+  virtual void total_usage(size_t total_reserved,
+                           size_t total_committed) = 0;
+  virtual void num_of_classes(size_t classes) = 0;
+  virtual void num_of_threads(size_t threads) = 0;
+
+  virtual void thread_info(size_t stack_reserved_amt, size_t stack_committed_amt) = 0;
+
+  /* report baseline summary comparison */
+  virtual void diff_total_usage(size_t total_reserved,
+                                size_t total_committed,
+                                int reserved_diff,
+                                int committed_diff) = 0;
+  virtual void diff_num_of_classes(size_t classes, int diff) = 0;
+  virtual void diff_num_of_threads(size_t threads, int diff) = 0;
+
+  virtual void diff_thread_info(size_t stack_reserved, size_t stack_committed,
+        int stack_reserved_diff, int stack_committed_diff) = 0;
+
+
+  /*
+   * memory summary by memory types.
+   * for each memory type, following summaries are reported:
+   *  - reserved amount, committed amount
+   *  - malloc'd amount, malloc count
+   *  - arena amount, arena count
+   */
+
+  // start reporting memory summary by memory type
+  virtual void start_category_summary() = 0;
+
+  virtual void category_summary(MEMFLAGS type, size_t reserved_amt,
+                                size_t committed_amt,
+                                size_t malloc_amt, size_t malloc_count,
+                                size_t arena_amt, size_t arena_count) = 0;
+
+  virtual void diff_category_summary(MEMFLAGS type, size_t cur_reserved_amt,
+                                size_t cur_committed_amt,
+                                size_t cur_malloc_amt, size_t cur_malloc_count,
+                                size_t cur_arena_amt, size_t cur_arena_count,
+                                int reserved_diff, int committed_diff, int malloc_diff,
+                                int malloc_count_diff, int arena_diff,
+                                int arena_count_diff) = 0;
+
+  virtual void done_category_summary() = 0;
+
+  /*
+   *  Report callsite information
+   */
+  virtual void start_callsite() = 0;
+  virtual void malloc_callsite(address pc, size_t malloc_amt, size_t malloc_count) = 0;
+  virtual void virtual_memory_callsite(address pc, size_t reserved_amt, size_t committed_amt) = 0;
+
+  virtual void diff_malloc_callsite(address pc, size_t cur_malloc_amt, size_t cur_malloc_count,
+              int malloc_diff, int malloc_count_diff) = 0;
+  virtual void diff_virtual_memory_callsite(address pc, size_t cur_reserved_amt, size_t cur_committed_amt,
+              int reserved_diff, int committed_diff) = 0;
+
+  virtual void done_callsite() = 0;
+
+  // return current scale in "KB", "MB" or "GB"
+  static const char* memory_unit(size_t scale);
+};
+
+/*
+ * This class reports processed data from a baseline or
+ * the changes between the two baseline.
+ */
+class BaselineReporter : public StackObj {
+ private:
+  BaselineOutputer&  _outputer;
+  size_t             _scale;
+
+ public:
+  // construct a reporter that reports memory usage
+  // in specified scale
+  BaselineReporter(BaselineOutputer& outputer, size_t scale = K):
+    _outputer(outputer) {
+    _scale = scale;
+  }
+  virtual void report_baseline(const MemBaseline& baseline, bool summary_only = false);
+  virtual void diff_baselines(const MemBaseline& cur, const MemBaseline& prev,
+                              bool summary_only = false);
+
+  void set_scale(size_t scale);
+  size_t scale() const { return _scale; }
+
+ private:
+  void report_summaries(const MemBaseline& baseline);
+  void report_callsites(const MemBaseline& baseline);
+
+  void diff_summaries(const MemBaseline& cur, const MemBaseline& prev);
+  void diff_callsites(const MemBaseline& cur, const MemBaseline& prev);
+
+  // calculate memory size in current memory scale
+  size_t amount_in_current_scale(size_t amt) const;
+  // diff two unsigned values in current memory scale
+  int    diff_in_current_scale(size_t value1, size_t value2) const;
+  // diff two unsigned value
+  int    diff(size_t value1, size_t value2) const;
+};
+
+/*
+ * tty output implementation. Native memory tracking
+ * DCmd uses this outputer.
+ */
+class BaselineTTYOutputer : public BaselineOutputer {
+ private:
+  size_t         _scale;
+
+  size_t         _num_of_classes;
+  size_t         _num_of_threads;
+  size_t         _thread_stack_reserved;
+  size_t         _thread_stack_committed;
+
+  int            _num_of_classes_diff;
+  int            _num_of_threads_diff;
+  int            _thread_stack_reserved_diff;
+  int            _thread_stack_committed_diff;
+
+  outputStream*  _output;
+
+ public:
+  BaselineTTYOutputer(outputStream* st) {
+    _scale = K;
+    _num_of_classes = 0;
+    _num_of_threads = 0;
+    _thread_stack_reserved = 0;
+    _thread_stack_committed = 0;
+    _num_of_classes_diff = 0;
+    _num_of_threads_diff = 0;
+    _thread_stack_reserved_diff = 0;
+    _thread_stack_committed_diff = 0;
+    _output = st;
+  }
+
+  // begin reporting memory usage in specified scale
+  void start(size_t scale, bool report_diff = false);
+  // done reporting
+  void done();
+
+  // total memory usage
+  void total_usage(size_t total_reserved,
+                   size_t total_committed);
+  // report total loaded classes
+  void num_of_classes(size_t classes) {
+    _num_of_classes = classes;
+  }
+
+  void num_of_threads(size_t threads) {
+    _num_of_threads = threads;
+  }
+
+  void thread_info(size_t stack_reserved_amt, size_t stack_committed_amt) {
+    _thread_stack_reserved = stack_reserved_amt;
+    _thread_stack_committed = stack_committed_amt;
+  }
+
+  void diff_total_usage(size_t total_reserved,
+                        size_t total_committed,
+                        int reserved_diff,
+                        int committed_diff);
+
+  void diff_num_of_classes(size_t classes, int diff) {
+    _num_of_classes = classes;
+    _num_of_classes_diff = diff;
+  }
+
+  void diff_num_of_threads(size_t threads, int diff) {
+    _num_of_threads = threads;
+    _num_of_threads_diff = diff;
+  }
+
+  void diff_thread_info(size_t stack_reserved_amt, size_t stack_committed_amt,
+               int stack_reserved_diff, int stack_committed_diff) {
+    _thread_stack_reserved = stack_reserved_amt;
+    _thread_stack_committed = stack_committed_amt;
+    _thread_stack_reserved_diff = stack_reserved_diff;
+    _thread_stack_committed_diff = stack_committed_diff;
+  }
+
+  /*
+   * Report memory summary categoriuzed by memory types.
+   * For each memory type, following summaries are reported:
+   *  - reserved amount, committed amount
+   *  - malloc-ed amount, malloc count
+   *  - arena amount, arena count
+   */
+  // start reporting memory summary by memory type
+  void start_category_summary();
+  void category_summary(MEMFLAGS type, size_t reserved_amt, size_t committed_amt,
+                               size_t malloc_amt, size_t malloc_count,
+                               size_t arena_amt, size_t arena_count);
+
+  void diff_category_summary(MEMFLAGS type, size_t cur_reserved_amt,
+                          size_t cur_committed_amt,
+                          size_t cur_malloc_amt, size_t cur_malloc_count,
+                          size_t cur_arena_amt, size_t cur_arena_count,
+                          int reserved_diff, int committed_diff, int malloc_diff,
+                          int malloc_count_diff, int arena_diff,
+                          int arena_count_diff);
+
+  void done_category_summary();
+
+  /*
+   *  Report callsite information
+   */
+  void start_callsite();
+  void malloc_callsite(address pc, size_t malloc_amt, size_t malloc_count);
+  void virtual_memory_callsite(address pc, size_t reserved_amt, size_t committed_amt);
+
+  void diff_malloc_callsite(address pc, size_t cur_malloc_amt, size_t cur_malloc_count,
+              int malloc_diff, int malloc_count_diff);
+  void diff_virtual_memory_callsite(address pc, size_t cur_reserved_amt, size_t cur_committed_amt,
+              int reserved_diff, int committed_diff);
+
+  void done_callsite();
+};
+
+
+#endif // SHARE_VM_SERVICES_MEM_REPORTER_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memSnapshot.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,463 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "utilities/decoder.hpp"
+#include "services/memBaseline.hpp"
+#include "services/memPtr.hpp"
+#include "services/memPtrArray.hpp"
+#include "services/memSnapshot.hpp"
+#include "services/memTracker.hpp"
+
+
+// stagging data groups the data of a VM memory range, so we can consolidate
+// them into one record during the walk
+bool StagingWalker::consolidate_vm_records(VMMemRegionEx* vm_rec) {
+  MemPointerRecord* cur = (MemPointerRecord*)_itr.current();
+  assert(cur != NULL && cur->is_vm_pointer(), "not a virtual memory pointer");
+
+  jint cur_seq;
+  jint next_seq;
+
+  bool trackCallsite = MemTracker::track_callsite();
+
+  if (trackCallsite) {
+    vm_rec->init((MemPointerRecordEx*)cur);
+    cur_seq = ((SeqMemPointerRecordEx*)cur)->seq();
+  } else {
+    vm_rec->init((MemPointerRecord*)cur);
+    cur_seq = ((SeqMemPointerRecord*)cur)->seq();
+  }
+
+  // only can consolidate when we have allocation record,
+  // which contains virtual memory range
+  if (!cur->is_allocation_record()) {
+    _itr.next();
+    return true;
+  }
+
+  // allocation range
+  address base = cur->addr();
+  address end = base + cur->size();
+
+  MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next();
+  // if the memory range is alive
+  bool live_vm_rec = true;
+  while (next != NULL && next->is_vm_pointer()) {
+    if (next->is_allocation_record()) {
+      assert(next->addr() >= base, "sorting order or overlapping");
+      break;
+    }
+
+    if (trackCallsite) {
+      next_seq = ((SeqMemPointerRecordEx*)next)->seq();
+    } else {
+      next_seq = ((SeqMemPointerRecord*)next)->seq();
+    }
+
+    if (next_seq < cur_seq) {
+      _itr.next();
+      next = (MemPointerRecord*)_itr.peek_next();
+      continue;
+    }
+
+    if (next->is_deallocation_record()) {
+      if (next->addr() == base && next->size() == cur->size()) {
+        // the virtual memory range has been released
+        _itr.next();
+        live_vm_rec = false;
+        break;
+      } else if (next->addr() < end) { // partial release
+        vm_rec->partial_release(next->addr(), next->size());
+        _itr.next();
+      } else {
+        break;
+      }
+    } else if (next->is_commit_record()) {
+      if (next->addr() >= base && next->addr() + next->size() <= end) {
+        vm_rec->commit(next->size());
+        _itr.next();
+      } else {
+        assert(next->addr() >= base, "sorting order or overlapping");
+        break;
+      }
+    } else if (next->is_uncommit_record()) {
+      if (next->addr() >= base && next->addr() + next->size() <= end) {
+        vm_rec->uncommit(next->size());
+        _itr.next();
+      } else {
+        assert(next->addr() >= end, "sorting order or overlapping");
+        break;
+      }
+    } else if (next->is_type_tagging_record()) {
+      if (next->addr() >= base && next->addr() < end ) {
+        vm_rec->tag(next->flags());
+        _itr.next();
+      } else {
+          break;
+      }
+    } else {
+      assert(false, "unknown record type");
+    }
+    next = (MemPointerRecord*)_itr.peek_next();
+  }
+  _itr.next();
+  return live_vm_rec;
+}
+
+MemPointer* StagingWalker::next() {
+  MemPointerRecord* cur_p = (MemPointerRecord*)_itr.current();
+  if (cur_p == NULL) {
+    _end_of_array = true;
+    return NULL;
+  }
+
+  MemPointerRecord* next_p;
+  if (cur_p->is_vm_pointer()) {
+    _is_vm_record = true;
+    if (!consolidate_vm_records(&_vm_record)) {
+      return next();
+    }
+  } else { // malloc-ed pointer
+    _is_vm_record = false;
+    next_p = (MemPointerRecord*)_itr.peek_next();
+    if (next_p != NULL && next_p->addr() == cur_p->addr()) {
+      assert(cur_p->is_allocation_record(), "sorting order");
+      assert(!next_p->is_allocation_record(), "sorting order");
+      _itr.next();
+      if (cur_p->seq() < next_p->seq()) {
+        cur_p = next_p;
+      }
+    }
+    if (MemTracker::track_callsite()) {
+      _malloc_record.init((MemPointerRecordEx*)cur_p);
+    } else {
+      _malloc_record.init((MemPointerRecord*)cur_p);
+    }
+
+    _itr.next();
+  }
+  return current();
+}
+
+MemSnapshot::MemSnapshot() {
+  if (MemTracker::track_callsite()) {
+    _alloc_ptrs = new (std::nothrow) MemPointerArrayImpl<MemPointerRecordEx>();
+    _vm_ptrs = new (std::nothrow)MemPointerArrayImpl<VMMemRegionEx>(64, true);
+    _staging_area = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecordEx>();
+  } else {
+    _alloc_ptrs = new (std::nothrow) MemPointerArrayImpl<MemPointerRecord>();
+    _vm_ptrs = new (std::nothrow)MemPointerArrayImpl<VMMemRegion>(64, true);
+    _staging_area = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecord>();
+  }
+
+  _lock = new (std::nothrow) Mutex(Monitor::max_nonleaf - 1, "memSnapshotLock");
+  NOT_PRODUCT(_untracked_count = 0;)
+}
+
+MemSnapshot::~MemSnapshot() {
+  assert(MemTracker::shutdown_in_progress(), "native memory tracking still on");
+  {
+    MutexLockerEx locker(_lock);
+    if (_staging_area != NULL) {
+      delete _staging_area;
+      _staging_area = NULL;
+    }
+
+    if (_alloc_ptrs != NULL) {
+      delete _alloc_ptrs;
+      _alloc_ptrs = NULL;
+    }
+
+    if (_vm_ptrs != NULL) {
+      delete _vm_ptrs;
+      _vm_ptrs = NULL;
+    }
+  }
+
+  if (_lock != NULL) {
+    delete _lock;
+    _lock = NULL;
+  }
+}
+
+void MemSnapshot::copy_pointer(MemPointerRecord* dest, const MemPointerRecord* src) {
+  assert(dest != NULL && src != NULL, "Just check");
+  assert(dest->addr() == src->addr(), "Just check");
+
+  MEMFLAGS flags = dest->flags();
+
+  if (MemTracker::track_callsite()) {
+    *(MemPointerRecordEx*)dest = *(MemPointerRecordEx*)src;
+  } else {
+    *dest = *src;
+  }
+}
+
+
+// merge a per-thread memory recorder to the staging area
+bool MemSnapshot::merge(MemRecorder* rec) {
+  assert(rec != NULL && !rec->out_of_memory(), "Just check");
+
+  // out of memory
+  if (_staging_area == NULL || _staging_area->out_of_memory()) {
+    return false;
+  }
+
+  SequencedRecordIterator itr(rec->pointer_itr());
+
+  MutexLockerEx lock(_lock, true);
+  MemPointerIterator staging_itr(_staging_area);
+  MemPointerRecord *p1, *p2;
+  p1 = (MemPointerRecord*) itr.current();
+  while (p1 != NULL) {
+    p2 = (MemPointerRecord*)staging_itr.locate(p1->addr());
+    // we have not seen this memory block, so just add to staging area
+    if (p2 == NULL) {
+      if (!staging_itr.insert(p1)) {
+        return false;
+      }
+    } else if (p1->addr() == p2->addr()) {
+      MemPointerRecord* staging_next = (MemPointerRecord*)staging_itr.peek_next();
+      // a memory block can have many tagging records, find right one to replace or
+      // right position to insert
+      while (staging_next != NULL && staging_next->addr() == p1->addr()) {
+        if ((staging_next->flags() & MemPointerRecord::tag_masks) <=
+          (p1->flags() & MemPointerRecord::tag_masks)) {
+          p2 = (MemPointerRecord*)staging_itr.next();
+          staging_next = (MemPointerRecord*)staging_itr.peek_next();
+        } else {
+          break;
+        }
+      }
+      int df = (p1->flags() & MemPointerRecord::tag_masks) -
+        (p2->flags() & MemPointerRecord::tag_masks);
+      if (df == 0) {
+        assert(p1->seq() > 0, "not sequenced");
+        assert(p2->seq() > 0, "not sequenced");
+        if (p1->seq() > p2->seq()) {
+          copy_pointer(p2, p1);
+        }
+      } else if (df < 0) {
+        if (!staging_itr.insert(p1)) {
+          return false;
+        }
+      } else {
+        if (!staging_itr.insert_after(p1)) {
+          return false;
+        }
+      }
+    } else if (p1->addr() < p2->addr()) {
+      if (!staging_itr.insert(p1)) {
+        return false;
+      }
+    } else {
+      if (!staging_itr.insert_after(p1)) {
+        return false;
+      }
+    }
+    p1 = (MemPointerRecord*)itr.next();
+  }
+  NOT_PRODUCT(void check_staging_data();)
+  return true;
+}
+
+
+
+// promote data to next generation
+void MemSnapshot::promote() {
+  assert(_alloc_ptrs != NULL && _staging_area != NULL && _vm_ptrs != NULL,
+    "Just check");
+  MutexLockerEx lock(_lock, true);
+  StagingWalker walker(_staging_area);
+  MemPointerIterator malloc_itr(_alloc_ptrs);
+  VMMemPointerIterator vm_itr(_vm_ptrs);
+  MemPointer* cur = walker.current();
+  while (cur != NULL) {
+    if (walker.is_vm_record()) {
+      VMMemRegion* cur_vm = (VMMemRegion*)cur;
+      VMMemRegion* p = (VMMemRegion*)vm_itr.locate(cur_vm->addr());
+      cur_vm = (VMMemRegion*)cur;
+      if (p != NULL && (p->contains(cur_vm) || p->base() == cur_vm->base())) {
+        assert(p->is_reserve_record() ||
+          p->is_commit_record(), "wrong vm record type");
+        // resize existing reserved range
+        if (cur_vm->is_reserve_record() && p->base() == cur_vm->base()) {
+          assert(cur_vm->size() >= p->committed_size(), "incorrect resizing");
+          p->set_reserved_size(cur_vm->size());
+        } else if (cur_vm->is_commit_record()) {
+          p->commit(cur_vm->committed_size());
+        } else if (cur_vm->is_uncommit_record()) {
+          p->uncommit(cur_vm->committed_size());
+          if (!p->is_reserve_record() && p->committed_size() == 0) {
+            vm_itr.remove();
+          }
+        } else if (cur_vm->is_type_tagging_record()) {
+          p->tag(cur_vm->flags());
+        } else if (cur_vm->is_release_record()) {
+          if (cur_vm->base() == p->base() && cur_vm->size() == p->size()) {
+            // release the whole range
+            vm_itr.remove();
+          } else {
+            // partial release
+            p->partial_release(cur_vm->base(), cur_vm->size());
+          }
+        } else {
+          // we do see multiple reserver on the same vm range
+          assert((cur_vm->is_commit_record() || cur_vm->is_reserve_record()) &&
+             cur_vm->base() == p->base() && cur_vm->size() == p->size(), "bad record");
+          p->tag(cur_vm->flags());
+        }
+      } else {
+        if(cur_vm->is_reserve_record()) {
+          if (p == NULL || p->base() > cur_vm->base()) {
+            vm_itr.insert(cur_vm);
+          } else {
+            vm_itr.insert_after(cur_vm);
+          }
+        } else {
+          // In theory, we should assert without conditions. However, in case of native
+          // thread stack, NMT explicitly releases the thread stack in Thread's destructor,
+          // due to platform dependent behaviors. On some platforms, we see uncommit/release
+          // native thread stack, but some, we don't.
+          assert(cur_vm->is_uncommit_record() || cur_vm->is_deallocation_record(),
+            err_msg("Should not reach here, pointer addr = [" INTPTR_FORMAT "], flags = [%x]",
+               cur_vm->addr(), cur_vm->flags()));
+        }
+      }
+    } else {
+      MemPointerRecord* cur_p = (MemPointerRecord*)cur;
+      MemPointerRecord* p = (MemPointerRecord*)malloc_itr.locate(cur->addr());
+      if (p != NULL && cur_p->addr() == p->addr()) {
+        assert(p->is_allocation_record() || p->is_arena_size_record(), "untracked");
+        if (cur_p->is_allocation_record() || cur_p->is_arena_size_record()) {
+          copy_pointer(p, cur_p);
+        } else {   // deallocation record
+          assert(cur_p->is_deallocation_record(), "wrong record type");
+
+          // we are removing an arena record, we also need to remove its 'size'
+          // record behind it
+          if (p->is_arena_record()) {
+            MemPointerRecord* next_p = (MemPointerRecord*)malloc_itr.peek_next();
+            if (next_p->is_arena_size_record()) {
+              assert(next_p->is_size_record_of_arena(p), "arena records dont match");
+              malloc_itr.remove();
+            }
+          }
+          malloc_itr.remove();
+        }
+      } else {
+        if (cur_p->is_arena_size_record()) {
+          MemPointerRecord* prev_p = (MemPointerRecord*)malloc_itr.peek_prev();
+          if (prev_p != NULL &&
+             (!prev_p->is_arena_record() || !cur_p->is_size_record_of_arena(prev_p))) {
+            // arena already deallocated
+            cur_p = NULL;
+          }
+        }
+        if (cur_p != NULL) {
+          if (cur_p->is_allocation_record() || cur_p->is_arena_size_record()) {
+            if (p != NULL && cur_p->addr() > p->addr()) {
+              malloc_itr.insert_after(cur);
+            } else {
+              malloc_itr.insert(cur);
+            }
+          }
+#ifndef PRODUCT
+          else if (!has_allocation_record(cur_p->addr())){
+            // NMT can not track some startup memory, which allocated before NMT
+            // is enabled
+            _untracked_count ++;
+          }
+#endif
+        }
+      }
+    }
+
+    cur = walker.next();
+  }
+  NOT_PRODUCT(check_malloc_pointers();)
+  _staging_area->shrink();
+  _staging_area->clear();
+}
+
+
+#ifndef PRODUCT
+void MemSnapshot::print_snapshot_stats(outputStream* st) {
+  st->print_cr("Snapshot:");
+  st->print_cr("\tMalloced: %d/%d [%5.2f%%]  %dKB", _alloc_ptrs->length(), _alloc_ptrs->capacity(),
+    (100.0 * (float)_alloc_ptrs->length()) / (float)_alloc_ptrs->capacity(), _alloc_ptrs->instance_size()/K);
+
+  st->print_cr("\tVM: %d/%d [%5.2f%%] %dKB", _vm_ptrs->length(), _vm_ptrs->capacity(),
+    (100.0 * (float)_vm_ptrs->length()) / (float)_vm_ptrs->capacity(), _vm_ptrs->instance_size()/K);
+
+  st->print_cr("\tStaging:     %d/%d [%5.2f%%] %dKB", _staging_area->length(), _staging_area->capacity(),
+    (100.0 * (float)_staging_area->length()) / (float)_staging_area->capacity(), _staging_area->instance_size()/K);
+
+  st->print_cr("\tUntracked allocation: %d", _untracked_count);
+}
+
+void MemSnapshot::check_malloc_pointers() {
+  MemPointerArrayIteratorImpl mItr(_alloc_ptrs);
+  MemPointerRecord* p = (MemPointerRecord*)mItr.current();
+  MemPointerRecord* prev = NULL;
+  while (p != NULL) {
+    if (prev != NULL) {
+      assert(p->addr() >= prev->addr(), "sorting order");
+    }
+    prev = p;
+    p = (MemPointerRecord*)mItr.next();
+  }
+}
+
+bool MemSnapshot::has_allocation_record(address addr) {
+  MemPointerArrayIteratorImpl itr(_staging_area);
+  MemPointerRecord* cur = (MemPointerRecord*)itr.current();
+  while (cur != NULL) {
+    if (cur->addr() == addr && cur->is_allocation_record()) {
+      return true;
+    }
+    cur = (MemPointerRecord*)itr.next();
+  }
+  return false;
+}
+#endif // PRODUCT
+
+#ifdef ASSERT
+void MemSnapshot::check_staging_data() {
+  MemPointerArrayIteratorImpl itr(_staging_area);
+  MemPointerRecord* cur = (MemPointerRecord*)itr.current();
+  MemPointerRecord* next = (MemPointerRecord*)itr.next();
+  while (next != NULL) {
+    assert((next->addr() > cur->addr()) ||
+      ((next->flags() & MemPointerRecord::tag_masks) >
+       (cur->flags() & MemPointerRecord::tag_masks)),
+       "sorting order");
+    cur = next;
+    next = (MemPointerRecord*)itr.next();
+  }
+}
+#endif // ASSERT
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memSnapshot.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_MEM_SNAPSHOT_HPP
+#define SHARE_VM_SERVICES_MEM_SNAPSHOT_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "services/memBaseline.hpp"
+#include "services/memPtrArray.hpp"
+
+
+// Snapshot pointer array iterator
+
+// The pointer array contains malloc-ed pointers
+class MemPointerIterator : public MemPointerArrayIteratorImpl {
+ public:
+  MemPointerIterator(MemPointerArray* arr):
+    MemPointerArrayIteratorImpl(arr) {
+    assert(arr != NULL, "null array");
+  }
+
+#ifdef ASSERT
+  virtual bool is_dup_pointer(const MemPointer* ptr1,
+    const MemPointer* ptr2) const {
+    MemPointerRecord* p1 = (MemPointerRecord*)ptr1;
+    MemPointerRecord* p2 = (MemPointerRecord*)ptr2;
+
+    if (p1->addr() != p2->addr()) return false;
+    if ((p1->flags() & MemPointerRecord::tag_masks) !=
+        (p2->flags() & MemPointerRecord::tag_masks)) {
+      return false;
+    }
+    // we do see multiple commit/uncommit on the same memory, it is ok
+    return (p1->flags() & MemPointerRecord::tag_masks) == MemPointerRecord::tag_alloc ||
+           (p1->flags() & MemPointerRecord::tag_masks) == MemPointerRecord::tag_release;
+  }
+
+  virtual bool insert(MemPointer* ptr) {
+    if (_pos > 0) {
+      MemPointer* p1 = (MemPointer*)ptr;
+      MemPointer* p2 = (MemPointer*)_array->at(_pos - 1);
+      assert(!is_dup_pointer(p1, p2),
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
+    }
+     if (_pos < _array->length() -1) {
+      MemPointer* p1 = (MemPointer*)ptr;
+      MemPointer* p2 = (MemPointer*)_array->at(_pos + 1);
+      assert(!is_dup_pointer(p1, p2),
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
+     }
+    return _array->insert_at(ptr, _pos);
+  }
+
+  virtual bool insert_after(MemPointer* ptr) {
+    if (_pos > 0) {
+      MemPointer* p1 = (MemPointer*)ptr;
+      MemPointer* p2 = (MemPointer*)_array->at(_pos - 1);
+      assert(!is_dup_pointer(p1, p2),
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
+    }
+    if (_pos < _array->length() - 1) {
+      MemPointer* p1 = (MemPointer*)ptr;
+      MemPointer* p2 = (MemPointer*)_array->at(_pos + 1);
+
+      assert(!is_dup_pointer(p1, p2),
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
+     }
+    if (_array->insert_at(ptr, _pos + 1)) {
+      _pos ++;
+      return true;
+    }
+    return false;
+  }
+#endif
+
+  virtual MemPointer* locate(address addr) {
+    MemPointer* cur = current();
+    while (cur != NULL && cur->addr() < addr) {
+      cur = next();
+    }
+    return cur;
+  }
+};
+
+class VMMemPointerIterator : public MemPointerIterator {
+ public:
+  VMMemPointerIterator(MemPointerArray* arr):
+      MemPointerIterator(arr) {
+  }
+
+  // locate an exiting record that contains specified address, or
+  // the record, where the record with specified address, should
+  // be inserted
+  virtual MemPointer* locate(address addr) {
+    VMMemRegion* cur = (VMMemRegion*)current();
+    VMMemRegion* next_p;
+
+    while (cur != NULL) {
+      if (cur->base() > addr) {
+        return cur;
+      } else {
+        // find nearest existing range that has base address <= addr
+        next_p = (VMMemRegion*)peek_next();
+        if (next_p != NULL && next_p->base() <= addr) {
+          cur = (VMMemRegion*)next();
+          continue;
+        }
+      }
+
+      if (cur->is_reserve_record() &&
+        cur->base() <= addr &&
+        (cur->base() + cur->size() > addr)) {
+          return cur;
+      } else if (cur->is_commit_record() &&
+        cur->base() <= addr &&
+        (cur->base() + cur->committed_size() > addr)) {
+          return cur;
+      }
+      cur = (VMMemRegion*)next();
+    }
+    return NULL;
+  }
+
+#ifdef ASSERT
+  virtual bool is_dup_pointer(const MemPointer* ptr1,
+    const MemPointer* ptr2) const {
+    VMMemRegion* p1 = (VMMemRegion*)ptr1;
+    VMMemRegion* p2 = (VMMemRegion*)ptr2;
+
+    if (p1->addr() != p2->addr()) return false;
+    if ((p1->flags() & MemPointerRecord::tag_masks) !=
+        (p2->flags() & MemPointerRecord::tag_masks)) {
+      return false;
+    }
+    // we do see multiple commit/uncommit on the same memory, it is ok
+    return (p1->flags() & MemPointerRecord::tag_masks) == MemPointerRecord::tag_alloc ||
+           (p1->flags() & MemPointerRecord::tag_masks) == MemPointerRecord::tag_release;
+  }
+#endif
+};
+
+class StagingWalker : public MemPointerArrayIterator {
+ private:
+  MemPointerArrayIteratorImpl  _itr;
+  bool                         _is_vm_record;
+  bool                         _end_of_array;
+  VMMemRegionEx                _vm_record;
+  MemPointerRecordEx           _malloc_record;
+
+ public:
+  StagingWalker(MemPointerArray* arr): _itr(arr) {
+    _end_of_array = false;
+    next();
+  }
+
+  // return the pointer at current position
+  MemPointer* current() const {
+    if (_end_of_array) {
+      return NULL;
+    }
+    if (is_vm_record()) {
+      return (MemPointer*)&_vm_record;
+    } else {
+      return (MemPointer*)&_malloc_record;
+    }
+  }
+
+  // return the next pointer and advance current position
+  MemPointer* next();
+
+  // type of 'current' record
+  bool is_vm_record() const {
+    return _is_vm_record;
+  }
+
+  // return the next poinger without advancing current position
+  MemPointer* peek_next() const {
+    assert(false, "not supported");
+    return NULL;
+  }
+
+  MemPointer* peek_prev() const {
+    assert(false, "not supported");
+    return NULL;
+  }
+  // remove the pointer at current position
+  void remove() {
+    assert(false, "not supported");
+  }
+
+  // insert the pointer at current position
+  bool insert(MemPointer* ptr) {
+    assert(false, "not supported");
+    return false;
+  }
+
+  bool insert_after(MemPointer* ptr) {
+    assert(false, "not supported");
+    return false;
+  }
+
+ private:
+  // consolidate all records referring to this vm region
+  bool consolidate_vm_records(VMMemRegionEx* vm_rec);
+};
+
+class MemBaseline;
+
+class MemSnapshot : public CHeapObj<mtNMT> {
+ private:
+  // the following two arrays contain records of all known lived memory blocks
+  // live malloc-ed memory pointers
+  MemPointerArray*      _alloc_ptrs;
+  // live virtual memory pointers
+  MemPointerArray*      _vm_ptrs;
+
+  // stagging a generation's data, before
+  // it can be prompted to snapshot
+  MemPointerArray*      _staging_area;
+
+  // the lock to protect this snapshot
+  Monitor*              _lock;
+
+  NOT_PRODUCT(size_t    _untracked_count;)
+  friend class MemBaseline;
+
+ public:
+  MemSnapshot();
+  virtual ~MemSnapshot();
+
+  // if we are running out of native memory
+  bool out_of_memory() const {
+    return (_alloc_ptrs == NULL || _staging_area == NULL ||
+      _vm_ptrs == NULL || _lock == NULL ||
+      _alloc_ptrs->out_of_memory() ||
+      _staging_area->out_of_memory() ||
+      _vm_ptrs->out_of_memory());
+  }
+
+  // merge a per-thread memory recorder into staging area
+  bool merge(MemRecorder* rec);
+  // promote staged data to snapshot
+  void promote();
+
+
+  void wait(long timeout) {
+    assert(_lock != NULL, "Just check");
+    MonitorLockerEx locker(_lock);
+    locker.wait(true, timeout);
+  }
+
+  NOT_PRODUCT(void print_snapshot_stats(outputStream* st);)
+  NOT_PRODUCT(void check_staging_data();)
+  NOT_PRODUCT(void check_malloc_pointers();)
+  NOT_PRODUCT(bool has_allocation_record(address addr);)
+
+ private:
+   // copy pointer data from src to dest
+   void copy_pointer(MemPointerRecord* dest, const MemPointerRecord* src);
+};
+
+
+#endif // SHARE_VM_SERVICES_MEM_SNAPSHOT_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memTrackWorker.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/threadCritical.hpp"
+#include "services/memTracker.hpp"
+#include "services/memTrackWorker.hpp"
+#include "utilities/decoder.hpp"
+#include "utilities/vmError.hpp"
+
+MemTrackWorker::MemTrackWorker() {
+  // create thread uses cgc thread type for now. We should revisit
+  // the option, or create new thread type.
+  _has_error = !os::create_thread(this, os::cgc_thread);
+  set_name("MemTrackWorker", 0);
+
+  // initial generation circuit buffer
+  if (!has_error()) {
+    _head = _tail = 0;
+    for(int index = 0; index < MAX_GENERATIONS; index ++) {
+      _gen[index] = NULL;
+    }
+  }
+  NOT_PRODUCT(_sync_point_count = 0;)
+  NOT_PRODUCT(_merge_count = 0;)
+  NOT_PRODUCT(_last_gen_in_use = 0;)
+}
+
+MemTrackWorker::~MemTrackWorker() {
+  for (int index = 0; index < MAX_GENERATIONS; index ++) {
+    MemRecorder* rc = _gen[index];
+    if (rc != NULL) {
+      delete rc;
+    }
+  }
+}
+
+void* MemTrackWorker::operator new(size_t size) {
+  assert(false, "use nothrow version");
+  return NULL;
+}
+
+void* MemTrackWorker::operator new(size_t size, const std::nothrow_t& nothrow_constant) {
+  return allocate(size, false, mtNMT);
+}
+
+void MemTrackWorker::start() {
+  os::start_thread(this);
+}
+
+/*
+ * Native memory tracking worker thread loop:
+ *   1. merge one generation of memory recorders to staging area
+ *   2. promote staging data to memory snapshot
+ *
+ * This thread can run through safepoint.
+ */
+
+void MemTrackWorker::run() {
+  assert(MemTracker::is_on(), "native memory tracking is off");
+  this->initialize_thread_local_storage();
+  this->record_stack_base_and_size();
+  MemSnapshot* snapshot = MemTracker::get_snapshot();
+  assert(snapshot != NULL, "Worker should not be started");
+  MemRecorder* rec;
+
+  while (!MemTracker::shutdown_in_progress()) {
+    NOT_PRODUCT(_last_gen_in_use = generations_in_use();)
+    {
+      // take a recorder from earliest generation in buffer
+      ThreadCritical tc;
+      rec = _gen[_head];
+      if (rec != NULL) {
+        _gen[_head] = rec->next();
+      }
+      assert(count_recorder(_gen[_head]) <= MemRecorder::_instance_count,
+        "infinite loop after dequeue");
+    }
+    if (rec != NULL) {
+      // merge the recorder into staging area
+      if (!snapshot->merge(rec)) {
+        MemTracker::shutdown(MemTracker::NMT_out_of_memory);
+      } else {
+        NOT_PRODUCT(_merge_count ++;)
+      }
+      MemTracker::release_thread_recorder(rec);
+    } else {
+      // no more recorder to merge, promote staging area
+      // to snapshot
+      if (_head != _tail) {
+        {
+          ThreadCritical tc;
+          if (_gen[_head] != NULL || _head == _tail) {
+            continue;
+          }
+          // done with this generation, increment _head pointer
+          _head = (_head + 1) % MAX_GENERATIONS;
+        }
+        // promote this generation data to snapshot
+        snapshot->promote();
+      } else {
+        snapshot->wait(1000);
+        ThreadCritical tc;
+        // check if more data arrived
+        if (_gen[_head] == NULL) {
+          _gen[_head] = MemTracker::get_pending_recorders();
+        }
+      }
+    }
+  }
+  assert(MemTracker::shutdown_in_progress(), "just check");
+
+  // transits to final shutdown
+  MemTracker::final_shutdown();
+}
+
+// at synchronization point, where 'safepoint visible' Java threads are blocked
+// at a safepoint, and the rest of threads are blocked on ThreadCritical lock.
+// The caller MemTracker::sync() already takes ThreadCritical before calling this
+// method.
+//
+// Following tasks are performed:
+//   1. add all recorders in pending queue to current generation
+//   2. increase generation
+
+void MemTrackWorker::at_sync_point(MemRecorder* rec) {
+  NOT_PRODUCT(_sync_point_count ++;)
+  assert(count_recorder(rec) <= MemRecorder::_instance_count,
+    "pending queue has infinite loop");
+
+  bool out_of_generation_buffer = false;
+  // check shutdown state inside ThreadCritical
+  if (MemTracker::shutdown_in_progress()) return;
+  // append the recorders to the end of the generation
+  if( rec != NULL) {
+    MemRecorder* cur_head = _gen[_tail];
+    if (cur_head == NULL) {
+      _gen[_tail] = rec;
+    } else {
+      while (cur_head->next() != NULL) {
+        cur_head = cur_head->next();
+      }
+      cur_head->set_next(rec);
+    }
+  }
+  assert(count_recorder(rec) <= MemRecorder::_instance_count,
+    "after add to current generation has infinite loop");
+  // we have collected all recorders for this generation. If there is data,
+  // we need to increment _tail to start a new generation.
+  if (_gen[_tail] != NULL || _head == _tail) {
+    _tail = (_tail + 1) % MAX_GENERATIONS;
+    out_of_generation_buffer = (_tail == _head);
+  }
+
+  if (out_of_generation_buffer) {
+    MemTracker::shutdown(MemTracker::NMT_out_of_generation);
+  }
+}
+
+#ifndef PRODUCT
+int MemTrackWorker::count_recorder(const MemRecorder* head) {
+  int count = 0;
+  while(head != NULL) {
+    count ++;
+    head = head->next();
+  }
+  return count;
+}
+
+int MemTrackWorker::count_pending_recorders() const {
+  int count = 0;
+  for (int index = 0; index < MAX_GENERATIONS; index ++) {
+    MemRecorder* head = _gen[index];
+    if (head != NULL) {
+      count += count_recorder(head);
+    }
+  }
+  return count;
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memTrackWorker.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_MEM_TRACK_WORKER_HPP
+#define SHARE_VM_SERVICES_MEM_TRACK_WORKER_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/thread.hpp"
+#include "services/memRecorder.hpp"
+
+// Maximum MAX_GENERATIONS generation data can be tracked.
+#define MAX_GENERATIONS  512
+
+
+class MemTrackWorker : public NamedThread {
+ private:
+  // circular buffer. This buffer contains recorders to be merged into global
+  // snaphsot.
+  // Each slot holds a linked list of memory recorders, that contains one
+  // generation of memory data.
+  MemRecorder*  _gen[MAX_GENERATIONS];
+  int           _head, _tail; // head and tail pointers to above circular buffer
+
+  bool          _has_error;
+
+ public:
+  MemTrackWorker();
+  ~MemTrackWorker();
+  _NOINLINE_ void* operator new(size_t size);
+  _NOINLINE_ void* operator new(size_t size, const std::nothrow_t& nothrow_constant);
+
+  void start();
+  void run();
+
+  inline bool has_error() const { return _has_error; }
+
+  // task at synchronization point
+  void at_sync_point(MemRecorder* pending_recorders);
+
+  // for debugging purpose, they are not thread safe.
+  NOT_PRODUCT(static int count_recorder(const MemRecorder* head);)
+  NOT_PRODUCT(int count_pending_recorders() const;)
+
+  NOT_PRODUCT(int _sync_point_count;)
+  NOT_PRODUCT(int _merge_count;)
+  NOT_PRODUCT(int _last_gen_in_use;)
+
+  inline int generations_in_use() const {
+    return (_tail >= _head ? (_tail - _head + 1) : (MAX_GENERATIONS - (_head - _tail) + 1));
+  }
+};
+
+#endif // SHARE_VM_SERVICES_MEM_TRACK_WORKER_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memTracker.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+
+#include "runtime/atomic.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/threadCritical.hpp"
+#include "services/memPtr.hpp"
+#include "services/memReporter.hpp"
+#include "services/memTracker.hpp"
+#include "utilities/decoder.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+bool NMT_track_callsite = false;
+
+// walk all 'known' threads at NMT sync point, and collect their recorders
+void SyncThreadRecorderClosure::do_thread(Thread* thread) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Safepoint required");
+  if (thread->is_Java_thread()) {
+    JavaThread* javaThread = (JavaThread*)thread;
+    MemRecorder* recorder = javaThread->get_recorder();
+    if (recorder != NULL) {
+      MemTracker::enqueue_pending_recorder(recorder);
+      javaThread->set_recorder(NULL);
+    }
+  }
+  _thread_count ++;
+}
+
+
+MemRecorder*                    MemTracker::_global_recorder = NULL;
+MemSnapshot*                    MemTracker::_snapshot = NULL;
+MemBaseline                     MemTracker::_baseline;
+Mutex*                          MemTracker::_query_lock = NULL;
+volatile MemRecorder*           MemTracker::_merge_pending_queue = NULL;
+volatile MemRecorder*           MemTracker::_pooled_recorders = NULL;
+MemTrackWorker*                 MemTracker::_worker_thread = NULL;
+int                             MemTracker::_sync_point_skip_count = 0;
+MemTracker::NMTLevel            MemTracker::_tracking_level = MemTracker::NMT_off;
+volatile MemTracker::NMTStates  MemTracker::_state = NMT_uninited;
+MemTracker::ShutdownReason      MemTracker::_reason = NMT_shutdown_none;
+int                             MemTracker::_thread_count = 255;
+volatile jint                   MemTracker::_pooled_recorder_count = 0;
+debug_only(intx                 MemTracker::_main_thread_tid = 0;)
+NOT_PRODUCT(volatile jint       MemTracker::_pending_recorder_count = 0;)
+
+void MemTracker::init_tracking_options(const char* option_line) {
+  _tracking_level = NMT_off;
+  if (strncmp(option_line, "=summary", 8) == 0) {
+    _tracking_level = NMT_summary;
+  } else if (strncmp(option_line, "=detail", 8) == 0) {
+    _tracking_level = NMT_detail;
+  }
+}
+
+// first phase of bootstrapping, when VM is still in single-threaded mode.
+void MemTracker::bootstrap_single_thread() {
+  if (_tracking_level > NMT_off) {
+    assert(_state == NMT_uninited, "wrong state");
+
+    // NMT is not supported with UseMallocOnly is on. NMT can NOT
+    // handle the amount of malloc data without significantly impacting
+    // runtime performance when this flag is on.
+    if (UseMallocOnly) {
+      shutdown(NMT_use_malloc_only);
+      return;
+    }
+
+    _query_lock = new (std::nothrow) Mutex(Monitor::max_nonleaf, "NMT_queryLock");
+    if (_query_lock == NULL) {
+      shutdown(NMT_out_of_memory);
+      return;
+    }
+
+    debug_only(_main_thread_tid = os::current_thread_id();)
+    _state = NMT_bootstrapping_single_thread;
+    NMT_track_callsite = (_tracking_level == NMT_detail && can_walk_stack());
+  }
+}
+
+// second phase of bootstrapping, when VM is about to or already entered multi-theaded mode.
+void MemTracker::bootstrap_multi_thread() {
+  if (_tracking_level > NMT_off && _state == NMT_bootstrapping_single_thread) {
+  // create nmt lock for multi-thread execution
+    assert(_main_thread_tid == os::current_thread_id(), "wrong thread");
+    _state = NMT_bootstrapping_multi_thread;
+    NMT_track_callsite = (_tracking_level == NMT_detail && can_walk_stack());
+  }
+}
+
+// fully start nmt
+void MemTracker::start() {
+  // Native memory tracking is off from command line option
+  if (_tracking_level == NMT_off || shutdown_in_progress()) return;
+
+  assert(_main_thread_tid == os::current_thread_id(), "wrong thread");
+  assert(_state == NMT_bootstrapping_multi_thread, "wrong state");
+
+  _snapshot = new (std::nothrow)MemSnapshot();
+  if (_snapshot != NULL && !_snapshot->out_of_memory()) {
+    if (start_worker()) {
+      _state = NMT_started;
+      NMT_track_callsite = (_tracking_level == NMT_detail && can_walk_stack());
+      return;
+    }
+  }
+
+  // fail to start native memory tracking, shut it down
+  shutdown(NMT_initialization);
+}
+
+/**
+ * Shutting down native memory tracking.
+ * We can not shutdown native memory tracking immediately, so we just
+ * setup shutdown pending flag, every native memory tracking component
+ * should orderly shut itself down.
+ *
+ * The shutdown sequences:
+ *  1. MemTracker::shutdown() sets MemTracker to shutdown pending state
+ *  2. Worker thread calls MemTracker::final_shutdown(), which transites
+ *     MemTracker to final shutdown state.
+ *  3. At sync point, MemTracker does final cleanup, before sets memory
+ *     tracking level to off to complete shutdown.
+ */
+void MemTracker::shutdown(ShutdownReason reason) {
+  if (_tracking_level == NMT_off) return;
+
+  if (_state <= NMT_bootstrapping_single_thread) {
+    // we still in single thread mode, there is not contention
+    _state = NMT_shutdown_pending;
+    _reason = reason;
+  } else {
+    // we want to know who initialized shutdown
+    if ((jint)NMT_started == Atomic::cmpxchg((jint)NMT_shutdown_pending,
+                                       (jint*)&_state, (jint)NMT_started)) {
+        _reason = reason;
+    }
+  }
+}
+
+// final phase of shutdown
+void MemTracker::final_shutdown() {
+  // delete all pending recorders and pooled recorders
+  delete_all_pending_recorders();
+  delete_all_pooled_recorders();
+
+  {
+    // shared baseline and snapshot are the only objects needed to
+    // create query results
+    MutexLockerEx locker(_query_lock, true);
+    // cleanup baseline data and snapshot
+    _baseline.clear();
+    delete _snapshot;
+    _snapshot = NULL;
+  }
+
+  // shutdown shared decoder instance, since it is only
+  // used by native memory tracking so far.
+  Decoder::shutdown();
+
+  MemTrackWorker* worker = NULL;
+  {
+    ThreadCritical tc;
+    // can not delete worker inside the thread critical
+    if (_worker_thread != NULL && Thread::current() == _worker_thread) {
+      worker = _worker_thread;
+      _worker_thread = NULL;
+    }
+  }
+  if (worker != NULL) {
+    delete worker;
+  }
+  _state = NMT_final_shutdown;
+}
+
+// delete all pooled recorders
+void MemTracker::delete_all_pooled_recorders() {
+  // free all pooled recorders
+  volatile MemRecorder* cur_head = _pooled_recorders;
+  if (cur_head != NULL) {
+    MemRecorder* null_ptr = NULL;
+    while (cur_head != NULL && (void*)cur_head != Atomic::cmpxchg_ptr((void*)null_ptr,
+      (void*)&_pooled_recorders, (void*)cur_head)) {
+      cur_head = _pooled_recorders;
+    }
+    if (cur_head != NULL) {
+      delete cur_head;
+      _pooled_recorder_count = 0;
+    }
+  }
+}
+
+// delete all recorders in pending queue
+void MemTracker::delete_all_pending_recorders() {
+  // free all pending recorders
+  MemRecorder* pending_head = get_pending_recorders();
+  if (pending_head != NULL) {
+    delete pending_head;
+  }
+}
+
+/*
+ * retrieve per-thread recorder of specified thread.
+ * if thread == NULL, it means global recorder
+ */
+MemRecorder* MemTracker::get_thread_recorder(JavaThread* thread) {
+  if (shutdown_in_progress()) return NULL;
+
+  MemRecorder* rc;
+  if (thread == NULL) {
+    rc = _global_recorder;
+  } else {
+    rc = thread->get_recorder();
+  }
+
+  if (rc != NULL && rc->is_full()) {
+    enqueue_pending_recorder(rc);
+    rc = NULL;
+  }
+
+  if (rc == NULL) {
+    rc = get_new_or_pooled_instance();
+    if (thread == NULL) {
+      _global_recorder = rc;
+    } else {
+      thread->set_recorder(rc);
+    }
+  }
+  return rc;
+}
+
+/*
+ * get a per-thread recorder from pool, or create a new one if
+ * there is not one available.
+ */
+MemRecorder* MemTracker::get_new_or_pooled_instance() {
+   MemRecorder* cur_head = const_cast<MemRecorder*> (_pooled_recorders);
+   if (cur_head == NULL) {
+     MemRecorder* rec = new (std::nothrow)MemRecorder();
+     if (rec == NULL || rec->out_of_memory()) {
+       shutdown(NMT_out_of_memory);
+       if (rec != NULL) {
+         delete rec;
+         rec = NULL;
+       }
+     }
+     return rec;
+   } else {
+     MemRecorder* next_head = cur_head->next();
+     if ((void*)cur_head != Atomic::cmpxchg_ptr((void*)next_head, (void*)&_pooled_recorders,
+       (void*)cur_head)) {
+       return get_new_or_pooled_instance();
+     }
+     cur_head->set_next(NULL);
+     Atomic::dec(&_pooled_recorder_count);
+     debug_only(cur_head->set_generation();)
+     return cur_head;
+  }
+}
+
+/*
+ * retrieve all recorders in pending queue, and empty the queue
+ */
+MemRecorder* MemTracker::get_pending_recorders() {
+  MemRecorder* cur_head = const_cast<MemRecorder*>(_merge_pending_queue);
+  MemRecorder* null_ptr = NULL;
+  while ((void*)cur_head != Atomic::cmpxchg_ptr((void*)null_ptr, (void*)&_merge_pending_queue,
+    (void*)cur_head)) {
+    cur_head = const_cast<MemRecorder*>(_merge_pending_queue);
+  }
+  NOT_PRODUCT(Atomic::store(0, &_pending_recorder_count));
+  return cur_head;
+}
+
+/*
+ * release a recorder to recorder pool.
+ */
+void MemTracker::release_thread_recorder(MemRecorder* rec) {
+  assert(rec != NULL, "null recorder");
+  // we don't want to pool too many recorders
+  rec->set_next(NULL);
+  if (shutdown_in_progress() || _pooled_recorder_count > _thread_count * 2) {
+    delete rec;
+    return;
+  }
+
+  rec->clear();
+  MemRecorder* cur_head = const_cast<MemRecorder*>(_pooled_recorders);
+  rec->set_next(cur_head);
+  while ((void*)cur_head != Atomic::cmpxchg_ptr((void*)rec, (void*)&_pooled_recorders,
+    (void*)cur_head)) {
+    cur_head = const_cast<MemRecorder*>(_pooled_recorders);
+    rec->set_next(cur_head);
+  }
+  Atomic::inc(&_pooled_recorder_count);
+}
+
+/*
+ * This is the most important method in whole nmt implementation.
+ *
+ * Create a memory record.
+ * 1. When nmt is in single-threaded bootstrapping mode, no lock is needed as VM
+ *    still in single thread mode.
+ * 2. For all threads other than JavaThread, ThreadCritical is needed
+ *    to write to recorders to global recorder.
+ * 3. For JavaThreads that are not longer visible by safepoint, also
+ *    need to take ThreadCritical and records are written to global
+ *    recorders, since these threads are NOT walked by Threads.do_thread().
+ * 4. JavaThreads that are running in native state, have to transition
+ *    to VM state before writing to per-thread recorders.
+ * 5. JavaThreads that are running in VM state do not need any lock and
+ *    records are written to per-thread recorders.
+ * 6. For a thread has yet to attach VM 'Thread', they need to take
+ *    ThreadCritical to write to global recorder.
+ *
+ *    Important note:
+ *    NO LOCK should be taken inside ThreadCritical lock !!!
+ */
+void MemTracker::create_memory_record(address addr, MEMFLAGS flags,
+    size_t size, address pc, Thread* thread) {
+  if (!shutdown_in_progress()) {
+    // single thread, we just write records direct to global recorder,'
+    // with any lock
+    if (_state == NMT_bootstrapping_single_thread) {
+      assert(_main_thread_tid == os::current_thread_id(), "wrong thread");
+      thread = NULL;
+    } else {
+      if (thread == NULL) {
+          // don't use Thread::current(), since it is possible that
+          // the calling thread has yet to attach to VM 'Thread',
+          // which will result assertion failure
+          thread = ThreadLocalStorage::thread();
+      }
+    }
+
+    if (thread != NULL) {
+      if (thread->is_Java_thread() && ((JavaThread*)thread)->is_safepoint_visible()) {
+        JavaThread*      java_thread = static_cast<JavaThread*>(thread);
+        JavaThreadState  state = java_thread->thread_state();
+        if (SafepointSynchronize::safepoint_safe(java_thread, state)) {
+          // JavaThreads that are safepoint safe, can run through safepoint,
+          // so ThreadCritical is needed to ensure no threads at safepoint create
+          // new records while the records are being gathered and the sequence number is changing
+          ThreadCritical tc;
+          create_record_in_recorder(addr, flags, size, pc, java_thread);
+        } else {
+          create_record_in_recorder(addr, flags, size, pc, java_thread);
+        }
+      } else {
+        // other threads, such as worker and watcher threads, etc. need to
+        // take ThreadCritical to write to global recorder
+        ThreadCritical tc;
+        create_record_in_recorder(addr, flags, size, pc, NULL);
+      }
+    } else {
+      if (_state == NMT_bootstrapping_single_thread) {
+        // single thread, no lock needed
+        create_record_in_recorder(addr, flags, size, pc, NULL);
+      } else {
+        // for thread has yet to attach VM 'Thread', we can not use VM mutex.
+        // use native thread critical instead
+        ThreadCritical tc;
+        create_record_in_recorder(addr, flags, size, pc, NULL);
+      }
+    }
+  }
+}
+
+// write a record to proper recorder. No lock can be taken from this method
+// down.
+void MemTracker::create_record_in_recorder(address addr, MEMFLAGS flags,
+    size_t size, address pc, JavaThread* thread) {
+
+    MemRecorder* rc = get_thread_recorder(thread);
+    if (rc != NULL) {
+      rc->record(addr, flags, size, pc);
+    }
+}
+
+/**
+ * enqueue a recorder to pending queue
+ */
+void MemTracker::enqueue_pending_recorder(MemRecorder* rec) {
+  assert(rec != NULL, "null recorder");
+
+  // we are shutting down, so just delete it
+  if (shutdown_in_progress()) {
+    rec->set_next(NULL);
+    delete rec;
+    return;
+  }
+
+  MemRecorder* cur_head = const_cast<MemRecorder*>(_merge_pending_queue);
+  rec->set_next(cur_head);
+  while ((void*)cur_head != Atomic::cmpxchg_ptr((void*)rec, (void*)&_merge_pending_queue,
+    (void*)cur_head)) {
+    cur_head = const_cast<MemRecorder*>(_merge_pending_queue);
+    rec->set_next(cur_head);
+  }
+  NOT_PRODUCT(Atomic::inc(&_pending_recorder_count);)
+}
+
+/*
+ * The method is called at global safepoint
+ * during it synchronization process.
+ *   1. enqueue all JavaThreads' per-thread recorders
+ *   2. enqueue global recorder
+ *   3. retrieve all pending recorders
+ *   4. reset global sequence number generator
+ *   5. call worker's sync
+ */
+#define MAX_SAFEPOINTS_TO_SKIP     128
+#define SAFE_SEQUENCE_THRESHOLD    30
+#define HIGH_GENERATION_THRESHOLD  60
+
+void MemTracker::sync() {
+  assert(_tracking_level > NMT_off, "NMT is not enabled");
+  assert(SafepointSynchronize::is_at_safepoint(), "Safepoint required");
+
+  // Some GC tests hit large number of safepoints in short period of time
+  // without meaningful activities. We should prevent going to
+  // sync point in these cases, which can potentially exhaust generation buffer.
+  // Here is the factots to determine if we should go into sync point:
+  // 1. not to overflow sequence number
+  // 2. if we are in danger to overflow generation buffer
+  // 3. how many safepoints we already skipped sync point
+  if (_state == NMT_started) {
+    // worker thread is not ready, no one can manage generation
+    // buffer, so skip this safepoint
+    if (_worker_thread == NULL) return;
+
+    if (_sync_point_skip_count < MAX_SAFEPOINTS_TO_SKIP) {
+      int per_seq_in_use = SequenceGenerator::peek() * 100 / max_jint;
+      int per_gen_in_use = _worker_thread->generations_in_use() * 100 / MAX_GENERATIONS;
+      if (per_seq_in_use < SAFE_SEQUENCE_THRESHOLD && per_gen_in_use >= HIGH_GENERATION_THRESHOLD) {
+        _sync_point_skip_count ++;
+        return;
+      }
+    }
+    _sync_point_skip_count = 0;
+    {
+      // This method is running at safepoint, with ThreadCritical lock,
+      // it should guarantee that NMT is fully sync-ed.
+      ThreadCritical tc;
+
+      // walk all JavaThreads to collect recorders
+      SyncThreadRecorderClosure stc;
+      Threads::threads_do(&stc);
+
+      _thread_count = stc.get_thread_count();
+      MemRecorder* pending_recorders = get_pending_recorders();
+
+      if (_global_recorder != NULL) {
+        _global_recorder->set_next(pending_recorders);
+        pending_recorders = _global_recorder;
+        _global_recorder = NULL;
+      }
+      SequenceGenerator::reset();
+      // check _worker_thread with lock to avoid racing condition
+      if (_worker_thread != NULL) {
+        _worker_thread->at_sync_point(pending_recorders);
+      }
+    }
+  }
+
+  // now, it is the time to shut whole things off
+  if (_state == NMT_final_shutdown) {
+    // walk all JavaThreads to delete all recorders
+    SyncThreadRecorderClosure stc;
+    Threads::threads_do(&stc);
+    // delete global recorder
+    {
+      ThreadCritical tc;
+      if (_global_recorder != NULL) {
+        delete _global_recorder;
+        _global_recorder = NULL;
+      }
+    }
+    MemRecorder* pending_recorders = get_pending_recorders();
+    if (pending_recorders != NULL) {
+      delete pending_recorders;
+    }
+    // try at a later sync point to ensure MemRecorder instance drops to zero to
+    // completely shutdown NMT
+    if (MemRecorder::_instance_count == 0) {
+      _state = NMT_shutdown;
+      _tracking_level = NMT_off;
+    }
+  }
+}
+
+/*
+ * Start worker thread.
+ */
+bool MemTracker::start_worker() {
+  assert(_worker_thread == NULL, "Just Check");
+  _worker_thread = new (std::nothrow) MemTrackWorker();
+  if (_worker_thread == NULL || _worker_thread->has_error()) {
+    shutdown(NMT_initialization);
+    return false;
+  }
+  _worker_thread->start();
+  return true;
+}
+
+/*
+ * We need to collect a JavaThread's per-thread recorder
+ * before it exits.
+ */
+void MemTracker::thread_exiting(JavaThread* thread) {
+  if (is_on()) {
+    MemRecorder* rec = thread->get_recorder();
+    if (rec != NULL) {
+      enqueue_pending_recorder(rec);
+      thread->set_recorder(NULL);
+    }
+  }
+}
+
+// baseline current memory snapshot
+bool MemTracker::baseline() {
+  MutexLockerEx lock(_query_lock, true);
+  MemSnapshot* snapshot = get_snapshot();
+  if (snapshot != NULL) {
+    return _baseline.baseline(*snapshot, false);
+  }
+  return false;
+}
+
+// print memory usage from current snapshot
+bool MemTracker::print_memory_usage(BaselineOutputer& out, size_t unit, bool summary_only) {
+  MemBaseline  baseline;
+  MutexLockerEx lock(_query_lock, true);
+  MemSnapshot* snapshot = get_snapshot();
+  if (snapshot != NULL && baseline.baseline(*snapshot, summary_only)) {
+    BaselineReporter reporter(out, unit);
+    reporter.report_baseline(baseline, summary_only);
+    return true;
+  }
+  return false;
+}
+
+// compare memory usage between current snapshot and baseline
+bool MemTracker::compare_memory_usage(BaselineOutputer& out, size_t unit, bool summary_only) {
+  MutexLockerEx lock(_query_lock, true);
+  if (_baseline.baselined()) {
+    MemBaseline baseline;
+    MemSnapshot* snapshot = get_snapshot();
+    if (snapshot != NULL && baseline.baseline(*snapshot, summary_only)) {
+      BaselineReporter reporter(out, unit);
+      reporter.diff_baselines(baseline, _baseline, summary_only);
+      return true;
+    }
+  }
+  return false;
+}
+
+#ifndef PRODUCT
+void MemTracker::walk_stack(int toSkip, char* buf, int len) {
+  int cur_len = 0;
+  char tmp[1024];
+  address pc;
+
+  while (cur_len < len) {
+    pc = os::get_caller_pc(toSkip + 1);
+    if (pc != NULL && os::dll_address_to_function_name(pc, tmp, sizeof(tmp), NULL)) {
+      jio_snprintf(&buf[cur_len], (len - cur_len), "%s\n", tmp);
+      cur_len = (int)strlen(buf);
+    } else {
+      buf[cur_len] = '\0';
+      break;
+    }
+    toSkip ++;
+  }
+}
+
+void MemTracker::print_tracker_stats(outputStream* st) {
+  st->print_cr("\nMemory Tracker Stats:");
+  st->print_cr("\tMax sequence number = %d", SequenceGenerator::max_seq_num());
+  st->print_cr("\tthead count = %d", _thread_count);
+  st->print_cr("\tArena instance = %d", Arena::_instance_count);
+  st->print_cr("\tpooled recorder count = %d", _pooled_recorder_count);
+  st->print_cr("\tqueued recorder count = %d", _pending_recorder_count);
+  st->print_cr("\tmemory recorder instance count = %d", MemRecorder::_instance_count);
+  if (_worker_thread != NULL) {
+    st->print_cr("\tWorker thread:");
+    st->print_cr("\t\tSync point count = %d", _worker_thread->_sync_point_count);
+    st->print_cr("\t\tpending recorder count = %d", _worker_thread->count_pending_recorders());
+    st->print_cr("\t\tmerge count = %d", _worker_thread->_merge_count);
+  } else {
+    st->print_cr("\tWorker thread is not started");
+  }
+  st->print_cr(" ");
+
+  if (_snapshot != NULL) {
+    _snapshot->print_snapshot_stats(st);
+  } else {
+    st->print_cr("No snapshot");
+  }
+}
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/memTracker.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_MEM_TRACKER_HPP
+#define SHARE_VM_SERVICES_MEM_TRACKER_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/os.hpp"
+#include "runtime/thread.hpp"
+#include "services/memPtr.hpp"
+#include "services/memRecorder.hpp"
+#include "services/memSnapshot.hpp"
+#include "services/memTrackWorker.hpp"
+
+#ifdef SOLARIS
+#include "thread_solaris.inline.hpp"
+#endif
+
+#ifdef _DEBUG_
+  #define DEBUG_CALLER_PC  os::get_caller_pc(3)
+#else
+  #define DEBUG_CALLER_PC  0
+#endif
+
+// The thread closure walks threads to collect per-thread
+// memory recorders at NMT sync point
+class SyncThreadRecorderClosure : public ThreadClosure {
+ private:
+  int _thread_count;
+
+ public:
+  SyncThreadRecorderClosure() {
+    _thread_count =0;
+  }
+
+  void do_thread(Thread* thread);
+  int  get_thread_count() const {
+    return _thread_count;
+  }
+};
+
+class BaselineOutputer;
+class MemSnapshot;
+class MemTrackWorker;
+class Thread;
+/*
+ * MemTracker is the 'gate' class to native memory tracking runtime.
+ */
+class MemTracker : AllStatic {
+  friend class MemTrackWorker;
+  friend class MemSnapshot;
+  friend class SyncThreadRecorderClosure;
+
+  // NMT state
+  enum NMTStates {
+    NMT_uninited,                        // not yet initialized
+    NMT_bootstrapping_single_thread,     // bootstrapping, VM is in single thread mode
+    NMT_bootstrapping_multi_thread,      // bootstrapping, VM is about to enter multi-thread mode
+    NMT_started,                         // NMT fully started
+    NMT_shutdown_pending,                // shutdown pending
+    NMT_final_shutdown,                  // in final phase of shutdown
+    NMT_shutdown                         // shutdown
+  };
+
+
+  // native memory tracking level
+  enum NMTLevel {
+    NMT_off,              // native memory tracking is off
+    NMT_summary,          // don't track callsite
+    NMT_detail            // track callsite also
+  };
+
+ public:
+   enum ShutdownReason {
+     NMT_shutdown_none,     // no shutdown requested
+     NMT_shutdown_user,     // user requested shutdown
+     NMT_normal,            // normal shutdown, process exit
+     NMT_out_of_memory,     // shutdown due to out of memory
+     NMT_initialization,    // shutdown due to initialization failure
+     NMT_use_malloc_only,   // can not combine NMT with UseMallocOnly flag
+     NMT_error_reporting,   // shutdown by vmError::report_and_die()
+     NMT_out_of_generation, // running out of generation queue
+     NMT_sequence_overflow  // overflow the sequence number
+   };
+
+ public:
+  // initialize NMT tracking level from command line options, called
+   // from VM command line parsing code
+  static void init_tracking_options(const char* option_line);
+
+  // if NMT is enabled to record memory activities
+  static inline bool is_on() {
+    return (_tracking_level >= NMT_summary &&
+      _state >= NMT_bootstrapping_single_thread);
+  }
+
+  // user readable reason for shutting down NMT
+  static const char* reason() {
+    switch(_reason) {
+      case NMT_shutdown_none:
+        return "Native memory tracking is not enabled";
+      case NMT_shutdown_user:
+        return "Native memory tracking has been shutdown by user";
+      case NMT_normal:
+        return "Native memory tracking has been shutdown due to process exiting";
+      case NMT_out_of_memory:
+        return "Native memory tracking has been shutdown due to out of native memory";
+      case NMT_initialization:
+        return "Native memory tracking failed to initialize";
+      case NMT_error_reporting:
+        return "Native memory tracking has been shutdown due to error reporting";
+      case NMT_out_of_generation:
+        return "Native memory tracking has been shutdown due to running out of generation buffer";
+      case NMT_sequence_overflow:
+        return "Native memory tracking has been shutdown due to overflow the sequence number";
+      case NMT_use_malloc_only:
+        return "Native memory tracking is not supported when UseMallocOnly is on";
+      default:
+        ShouldNotReachHere();
+        return NULL;
+    }
+  }
+
+  // test if we can walk native stack
+  static bool can_walk_stack() {
+  // native stack is not walkable during bootstrapping on sparc
+#if defined(SPARC)
+    return (_state == NMT_started);
+#else
+    return (_state >= NMT_bootstrapping_single_thread && _state  <= NMT_started);
+#endif
+  }
+
+  // if native memory tracking tracks callsite
+  static inline bool track_callsite() { return _tracking_level == NMT_detail; }
+
+  // shutdown native memory tracking capability. Native memory tracking
+  // can be shutdown by VM when it encounters low memory scenarios.
+  // Memory tracker should gracefully shutdown itself, and preserve the
+  // latest memory statistics for post morten diagnosis.
+  static void shutdown(ShutdownReason reason);
+
+  // if there is shutdown requested
+  static inline bool shutdown_in_progress() {
+    return (_state >= NMT_shutdown_pending);
+  }
+
+  // bootstrap native memory tracking, so it can start to collect raw data
+  // before worker thread can start
+
+  // the first phase of bootstrapping, when VM still in single-threaded mode
+  static void bootstrap_single_thread();
+  // the second phase of bootstrapping, VM is about or already in multi-threaded mode
+  static void bootstrap_multi_thread();
+
+
+  // start() has to be called when VM still in single thread mode, but after
+  // command line option parsing is done.
+  static void start();
+
+  // record a 'malloc' call
+  static inline void record_malloc(address addr, size_t size, MEMFLAGS flags,
+                            address pc = 0, Thread* thread = NULL) {
+    if (NMT_CAN_TRACK(flags)) {
+      create_memory_record(addr, (flags|MemPointerRecord::malloc_tag()), size, pc, thread);
+    }
+  }
+  // record a 'free' call
+  static inline void record_free(address addr, MEMFLAGS flags, Thread* thread = NULL) {
+    if (is_on() && NMT_CAN_TRACK(flags)) {
+      create_memory_record(addr, MemPointerRecord::free_tag(), 0, 0, thread);
+    }
+  }
+  // record a 'realloc' call
+  static inline void record_realloc(address old_addr, address new_addr, size_t size,
+       MEMFLAGS flags, address pc = 0, Thread* thread = NULL) {
+    if (is_on()) {
+      record_free(old_addr, flags, thread);
+      record_malloc(new_addr, size, flags, pc, thread);
+    }
+  }
+
+  // record arena size
+  static inline void record_arena_size(address addr, size_t size) {
+    // we add a positive offset to arena address, so we can have arena size record
+    // sorted after arena record
+    if (is_on() && !UseMallocOnly) {
+      create_memory_record((addr + sizeof(void*)), MemPointerRecord::arena_size_tag(), size,
+        0, NULL);
+    }
+  }
+
+  // record a virtual memory 'reserve' call
+  static inline void record_virtual_memory_reserve(address addr, size_t size,
+                            address pc = 0, Thread* thread = NULL) {
+    if (is_on()) {
+      assert(size > 0, "reserve szero size");
+      create_memory_record(addr, MemPointerRecord::virtual_memory_reserve_tag(),
+                           size, pc, thread);
+    }
+  }
+
+  // record a virtual memory 'commit' call
+  static inline void record_virtual_memory_commit(address addr, size_t size,
+                            address pc = 0, Thread* thread = NULL) {
+    if (is_on()) {
+      create_memory_record(addr, MemPointerRecord::virtual_memory_commit_tag(),
+                           size, pc, thread);
+    }
+  }
+
+  // record a virtual memory 'uncommit' call
+  static inline void record_virtual_memory_uncommit(address addr, size_t size,
+                            Thread* thread = NULL) {
+    if (is_on()) {
+      create_memory_record(addr, MemPointerRecord::virtual_memory_uncommit_tag(),
+                           size, 0, thread);
+    }
+  }
+
+  // record a virtual memory 'release' call
+  static inline void record_virtual_memory_release(address addr, size_t size,
+                            Thread* thread = NULL) {
+    if (is_on()) {
+      create_memory_record(addr, MemPointerRecord::virtual_memory_release_tag(),
+                           size, 0, thread);
+    }
+  }
+
+  // record memory type on virtual memory base address
+  static inline void record_virtual_memory_type(address base, MEMFLAGS flags,
+                            Thread* thread = NULL) {
+    if (is_on()) {
+      assert(base > 0, "wrong base address");
+      assert((flags & (~mt_masks)) == 0, "memory type only");
+      create_memory_record(base, (flags | MemPointerRecord::virtual_memory_type_tag()),
+                           0, 0, thread);
+    }
+  }
+
+
+  // create memory baseline of current memory snapshot
+  static bool baseline();
+  // is there a memory baseline
+  static bool has_baseline() {
+    return _baseline.baselined();
+  }
+
+  // print memory usage from current snapshot
+  static bool print_memory_usage(BaselineOutputer& out, size_t unit,
+           bool summary_only = true);
+  // compare memory usage between current snapshot and baseline
+  static bool compare_memory_usage(BaselineOutputer& out, size_t unit,
+           bool summary_only = true);
+
+  // sync is called within global safepoint to synchronize nmt data
+  static void sync();
+
+  // called when a thread is about to exit
+  static void thread_exiting(JavaThread* thread);
+
+  // retrieve global snapshot
+  static MemSnapshot* get_snapshot() {
+    if (shutdown_in_progress()) {
+      return NULL;
+    }
+    return _snapshot;
+  }
+
+  // print tracker stats
+  NOT_PRODUCT(static void print_tracker_stats(outputStream* st);)
+  NOT_PRODUCT(static void walk_stack(int toSkip, char* buf, int len);)
+
+ private:
+  // start native memory tracking worker thread
+  static bool start_worker();
+
+  // called by worker thread to complete shutdown process
+  static void final_shutdown();
+
+ protected:
+  // retrieve per-thread recorder of the specified thread.
+  // if the recorder is full, it will be enqueued to overflow
+  // queue, a new recorder is acquired from recorder pool or a
+  // new instance is created.
+  // when thread == NULL, it means global recorder
+  static MemRecorder* get_thread_recorder(JavaThread* thread);
+
+  // per-thread recorder pool
+  static void release_thread_recorder(MemRecorder* rec);
+  static void delete_all_pooled_recorders();
+
+  // pending recorder queue. Recorders are queued to pending queue
+  // when they are overflowed or collected at nmt sync point.
+  static void enqueue_pending_recorder(MemRecorder* rec);
+  static MemRecorder* get_pending_recorders();
+  static void delete_all_pending_recorders();
+
+ private:
+  // retrieve a pooled memory record or create new one if there is not
+  // one available
+  static MemRecorder* get_new_or_pooled_instance();
+  static void create_memory_record(address addr, MEMFLAGS type,
+                   size_t size, address pc, Thread* thread);
+  static void create_record_in_recorder(address addr, MEMFLAGS type,
+                   size_t size, address pc, JavaThread* thread);
+
+ private:
+  // global memory snapshot
+  static MemSnapshot*     _snapshot;
+
+  // a memory baseline of snapshot
+  static MemBaseline      _baseline;
+
+  // query lock
+  static Mutex*           _query_lock;
+
+  // a thread can start to allocate memory before it is attached
+  // to VM 'Thread', those memory activities are recorded here.
+  // ThreadCritical is required to guard this global recorder.
+  static MemRecorder*     _global_recorder;
+
+  // main thread id
+  debug_only(static intx   _main_thread_tid;)
+
+  // pending recorders to be merged
+  static volatile MemRecorder*      _merge_pending_queue;
+
+  NOT_PRODUCT(static volatile jint   _pending_recorder_count;)
+
+  // pooled memory recorders
+  static volatile MemRecorder*      _pooled_recorders;
+
+  // memory recorder pool management, uses following
+  // counter to determine if a released memory recorder
+  // should be pooled
+
+  // latest thread count
+  static int               _thread_count;
+  // pooled recorder count
+  static volatile jint     _pooled_recorder_count;
+
+
+  // worker thread to merge pending recorders into snapshot
+  static MemTrackWorker*  _worker_thread;
+
+  // how many safepoints we skipped without entering sync point
+  static int              _sync_point_skip_count;
+
+  // if the tracker is properly intialized
+  static bool             _is_tracker_ready;
+  // tracking level (off, summary and detail)
+  static enum NMTLevel    _tracking_level;
+
+  // current nmt state
+  static volatile enum NMTStates   _state;
+  // the reason for shutting down nmt
+  static enum ShutdownReason       _reason;
+};
+
+#endif // SHARE_VM_SERVICES_MEM_TRACKER_HPP
--- a/src/share/vm/services/memoryManager.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/memoryManager.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -166,15 +166,15 @@
 
 GCStatInfo::GCStatInfo(int num_pools) {
   // initialize the arrays for memory usage
-  _before_gc_usage_array = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools);
-  _after_gc_usage_array  = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools);
+  _before_gc_usage_array = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools, mtInternal);
+  _after_gc_usage_array  = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools, mtInternal);
   _usage_array_size = num_pools;
   clear();
 }
 
 GCStatInfo::~GCStatInfo() {
-  FREE_C_HEAP_ARRAY(MemoryUsage*, _before_gc_usage_array);
-  FREE_C_HEAP_ARRAY(MemoryUsage*, _after_gc_usage_array);
+  FREE_C_HEAP_ARRAY(MemoryUsage*, _before_gc_usage_array, mtInternal);
+  FREE_C_HEAP_ARRAY(MemoryUsage*, _after_gc_usage_array, mtInternal);
 }
 
 void GCStatInfo::set_gc_usage(int pool_index, MemoryUsage usage, bool before_gc) {
@@ -214,8 +214,8 @@
 
 void GCMemoryManager::initialize_gc_stat_info() {
   assert(MemoryService::num_memory_pools() > 0, "should have one or more memory pools");
-  _last_gc_stat = new(ResourceObj::C_HEAP) GCStatInfo(MemoryService::num_memory_pools());
-  _current_gc_stat = new(ResourceObj::C_HEAP) GCStatInfo(MemoryService::num_memory_pools());
+  _last_gc_stat = new(ResourceObj::C_HEAP, mtGC) GCStatInfo(MemoryService::num_memory_pools());
+  _current_gc_stat = new(ResourceObj::C_HEAP, mtGC) GCStatInfo(MemoryService::num_memory_pools());
   // tracking concurrent collections we need two objects: one to update, and one to
   // hold the publicly available "last (completed) gc" information.
 }
--- a/src/share/vm/services/memoryManager.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/memoryManager.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
 class GCMemoryManager;
 class OopClosure;
 
-class MemoryManager : public CHeapObj {
+class MemoryManager : public CHeapObj<mtInternal> {
 private:
   enum {
     max_num_pools = 10
--- a/src/share/vm/services/memoryPool.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/memoryPool.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -50,7 +50,7 @@
 class PermGen;
 class ThresholdSupport;
 
-class MemoryPool : public CHeapObj {
+class MemoryPool : public CHeapObj<mtInternal> {
   friend class MemoryManager;
  public:
   enum PoolType {
--- a/src/share/vm/services/memoryService.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/memoryService.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -58,9 +58,9 @@
 #endif
 
 GrowableArray<MemoryPool*>* MemoryService::_pools_list =
-  new (ResourceObj::C_HEAP) GrowableArray<MemoryPool*>(init_pools_list_size, true);
+  new (ResourceObj::C_HEAP, mtInternal) GrowableArray<MemoryPool*>(init_pools_list_size, true);
 GrowableArray<MemoryManager*>* MemoryService::_managers_list =
-  new (ResourceObj::C_HEAP) GrowableArray<MemoryManager*>(init_managers_list_size, true);
+  new (ResourceObj::C_HEAP, mtInternal) GrowableArray<MemoryManager*>(init_managers_list_size, true);
 
 GCMemoryManager* MemoryService::_minor_gc_manager = NULL;
 GCMemoryManager* MemoryService::_major_gc_manager = NULL;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/nmtDCmd.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+#include "services/nmtDCmd.hpp"
+#include "services/memReporter.hpp"
+#include "services/memTracker.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+NMTDCmd::NMTDCmd(outputStream* output,
+  bool heap): DCmdWithParser(output, heap),
+  _summary("summary", "request runtime to report current memory summary, " \
+           "which includes total reserved and committed memory, along " \
+           "with memory usage summary by each subsytem.",
+           "BOOLEAN", false, "false"),
+  _detail("detail", "request runtime to report memory allocation >= "
+           "1K by each callsite.",
+           "BOOLEAN", false, "false"),
+  _baseline("baseline", "request runtime to baseline current memory usage, " \
+            "so it can be compared against in later time.",
+            "BOOLEAN", false, "false"),
+  _summary_diff("summary.diff", "request runtime to report memory summary " \
+            "comparison against previous baseline.",
+            "BOOLEAN", false, "false"),
+  _detail_diff("detail.diff", "request runtime to report memory detail " \
+            "comparison against previous baseline, which shows the memory " \
+            "allocation activities at different callsites.",
+            "BOOLEAN", false, "false"),
+  _shutdown("shutdown", "request runtime to shutdown itself and free the " \
+            "memory used by runtime.",
+            "BOOLEAN", false, "false"),
+#ifndef PRODUCT
+  _debug("debug", "print tracker statistics. Debug only, not thread safe", \
+            "BOOLEAN", false, "false"),
+#endif
+  _scale("scale", "Memory usage in which scale, KB, MB or GB",
+       "STRING", false, "KB") {
+  _dcmdparser.add_dcmd_option(&_summary);
+  _dcmdparser.add_dcmd_option(&_detail);
+  _dcmdparser.add_dcmd_option(&_baseline);
+  _dcmdparser.add_dcmd_option(&_summary_diff);
+  _dcmdparser.add_dcmd_option(&_detail_diff);
+  _dcmdparser.add_dcmd_option(&_shutdown);
+#ifndef PRODUCT
+  _dcmdparser.add_dcmd_option(&_debug);
+#endif
+  _dcmdparser.add_dcmd_option(&_scale);
+}
+
+void NMTDCmd::execute(TRAPS) {
+  const char* scale_value = _scale.value();
+  size_t scale_unit;
+  if (strcmp(scale_value, "KB") == 0 || strcmp(scale_value, "kb") == 0) {
+    scale_unit = K;
+  } else if (strcmp(scale_value, "MB") == 0 ||
+             strcmp(scale_value, "mb") == 0) {
+    scale_unit = M;
+  } else if (strcmp(scale_value, "GB") == 0 ||
+             strcmp(scale_value, "gb") == 0) {
+    scale_unit = G;
+  } else {
+    output()->print_cr("Incorrect scale value: %s", scale_value);
+    return;
+  }
+
+  int nopt = 0;
+  if(_summary.is_set()) { ++nopt; }
+  if(_detail.is_set()) { ++nopt; }
+  if(_baseline.is_set()) { ++nopt; }
+  if(_summary_diff.is_set()) { ++nopt; }
+  if(_detail_diff.is_set()) { ++nopt; }
+  if(_shutdown.is_set()) { ++nopt; }
+#ifndef PRODUCT
+  if(_debug.is_set()) { ++nopt; }
+#endif
+
+  if(nopt > 1) {
+      output()->print_cr("At most one of the following option can be specified: " \
+        "summary, detail, baseline, summary.diff, detail.diff, shutdown"
+#ifndef PRODUCT
+        " ,debug"
+#endif
+      );
+      return;
+  }
+
+  if(nopt == 0) {
+      _summary.set_value(true);
+  }
+
+#ifndef PRODUCT
+  if (_debug.value()) {
+    output()->print_cr("debug command is NOT thread-safe, may cause crash");
+    MemTracker::print_tracker_stats(output());
+    return;
+  }
+#endif
+
+  // native memory tracking has to be on
+  if (!MemTracker::is_on() || MemTracker::shutdown_in_progress()) {
+    // if it is not on, what's the reason?
+    output()->print_cr(MemTracker::reason());
+    return;
+  }
+
+  if (_summary.value()) {
+    BaselineTTYOutputer outputer(output());
+    MemTracker::print_memory_usage(outputer, scale_unit, true);
+  } else if (_detail.value()) {
+    BaselineTTYOutputer outputer(output());
+    MemTracker::print_memory_usage(outputer, scale_unit, false);
+  } else if (_baseline.value()) {
+    if (MemTracker::baseline()) {
+      output()->print_cr("Successfully baselined.");
+    } else {
+      output()->print_cr("Baseline failed.");
+    }
+  } else if (_summary_diff.value()) {
+    if (MemTracker::has_baseline()) {
+      BaselineTTYOutputer outputer(output());
+      MemTracker::compare_memory_usage(outputer, scale_unit, true);
+    } else {
+      output()->print_cr("No baseline to compare, run 'baseline' command first");
+    }
+  } else if (_detail_diff.value()) {
+    if (MemTracker::has_baseline()) {
+      BaselineTTYOutputer outputer(output());
+      MemTracker::compare_memory_usage(outputer, scale_unit, false);
+    } else {
+      output()->print_cr("No baseline to compare to, run 'baseline' command first");
+    }
+  } else if (_shutdown.value()) {
+    MemTracker::shutdown(MemTracker::NMT_shutdown_user);
+    output()->print_cr("Shutdown is in progress, it will take a few moments to " \
+      "completely shutdown");
+  } else {
+    ShouldNotReachHere();
+    output()->print_cr("Unknown command");
+  }
+}
+
+int NMTDCmd::num_arguments() {
+  ResourceMark rm;
+  NMTDCmd* dcmd = new NMTDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/nmtDCmd.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_NMT_DCMD_HPP
+#define SHARE_VM_SERVICES_NMT_DCMD_HPP
+
+#include "services/diagnosticArgument.hpp"
+#include "services/diagnosticFramework.hpp"
+
+/**
+ * Native memory tracking DCmd implementation
+ */
+class NMTDCmd: public DCmdWithParser {
+ protected:
+  DCmdArgument<bool>  _summary;
+  DCmdArgument<bool>  _detail;
+  DCmdArgument<bool>  _baseline;
+  DCmdArgument<bool>  _summary_diff;
+  DCmdArgument<bool>  _detail_diff;
+  DCmdArgument<bool>  _shutdown;
+#ifndef PRODUCT
+  DCmdArgument<bool>  _debug;
+#endif
+  DCmdArgument<char*> _scale;
+
+ public:
+  NMTDCmd(outputStream* output, bool heap);
+  static const char* name() { return "VM.native_memory"; }
+  static const char* description() {
+    return "Print native memory usage";
+  }
+  static const char* impact() {
+    return "Medium:";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
+};
+
+#endif // SHARE_VM_SERVICES_NMT_DCMD_HPP
--- a/src/share/vm/services/threadService.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/threadService.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -437,7 +437,7 @@
     GrowableArray<MonitorInfo*>* list = jvf->locked_monitors();
     int length = list->length();
     if (length > 0) {
-      _locked_monitors = new (ResourceObj::C_HEAP) GrowableArray<oop>(length, true);
+      _locked_monitors = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<oop>(length, true);
       for (int i = 0; i < length; i++) {
         MonitorInfo* monitor = list->at(i);
         assert(monitor->owner(), "This monitor must have an owning object");
@@ -491,11 +491,11 @@
 
 ThreadStackTrace::ThreadStackTrace(JavaThread* t, bool with_locked_monitors) {
   _thread = t;
-  _frames = new (ResourceObj::C_HEAP) GrowableArray<StackFrameInfo*>(INITIAL_ARRAY_SIZE, true);
+  _frames = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<StackFrameInfo*>(INITIAL_ARRAY_SIZE, true);
   _depth = 0;
   _with_locked_monitors = with_locked_monitors;
   if (_with_locked_monitors) {
-    _jni_locked_monitors = new (ResourceObj::C_HEAP) GrowableArray<oop>(INITIAL_ARRAY_SIZE, true);
+    _jni_locked_monitors = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<oop>(INITIAL_ARRAY_SIZE, true);
   } else {
     _jni_locked_monitors = NULL;
   }
@@ -689,7 +689,7 @@
 
 ThreadConcurrentLocks::ThreadConcurrentLocks(JavaThread* thread) {
   _thread = thread;
-  _owned_locks = new (ResourceObj::C_HEAP) GrowableArray<instanceOop>(INITIAL_ARRAY_SIZE, true);
+  _owned_locks = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<instanceOop>(INITIAL_ARRAY_SIZE, true);
   _next = NULL;
 }
 
@@ -803,7 +803,7 @@
 
 DeadlockCycle::DeadlockCycle() {
   _is_deadlock = false;
-  _threads = new (ResourceObj::C_HEAP) GrowableArray<JavaThread*>(INITIAL_ARRAY_SIZE, true);
+  _threads = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<JavaThread*>(INITIAL_ARRAY_SIZE, true);
   _next = NULL;
 }
 
--- a/src/share/vm/services/threadService.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/services/threadService.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -116,7 +116,7 @@
 };
 
 // Per-thread Statistics for synchronization
-class ThreadStatistics : public CHeapObj {
+class ThreadStatistics : public CHeapObj<mtInternal> {
 private:
   // The following contention statistics are only updated by
   // the thread owning these statistics when contention occurs.
@@ -186,7 +186,7 @@
 };
 
 // Thread snapshot to represent the thread state and statistics
-class ThreadSnapshot : public CHeapObj {
+class ThreadSnapshot : public CHeapObj<mtInternal> {
 private:
   JavaThread* _thread;
   oop         _threadObj;
@@ -244,7 +244,7 @@
   void        oops_do(OopClosure* f);
 };
 
-class ThreadStackTrace : public CHeapObj {
+class ThreadStackTrace : public CHeapObj<mtInternal> {
  private:
   JavaThread*                     _thread;
   int                             _depth;  // number of stack frames added
@@ -275,7 +275,7 @@
 // StackFrameInfo for keeping methodOop and bci during
 // stack walking for later construction of StackTraceElement[]
 // Java instances
-class StackFrameInfo : public CHeapObj {
+class StackFrameInfo : public CHeapObj<mtInternal> {
  private:
   methodOop           _method;
   int                 _bci;
@@ -299,7 +299,7 @@
   void      print_on(outputStream* st) const;
 };
 
-class ThreadConcurrentLocks : public CHeapObj {
+class ThreadConcurrentLocks : public CHeapObj<mtInternal> {
 private:
   GrowableArray<instanceOop>* _owned_locks;
   ThreadConcurrentLocks*      _next;
@@ -356,7 +356,7 @@
   void                 oops_do(OopClosure* f);
 };
 
-class DeadlockCycle : public CHeapObj {
+class DeadlockCycle : public CHeapObj<mtInternal> {
  private:
   bool _is_deadlock;
   GrowableArray<JavaThread*>* _threads;
--- a/src/share/vm/trace/traceMacros.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/trace/traceMacros.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,5 +43,9 @@
 #define TRACE_SET_KLASS_TRACE_ID(x1, x2) do { } while (0)
 #define TRACE_DEFINE_KLASS_METHODS typedef int ___IGNORED_hs_trace_type1
 #define TRACE_DEFINE_KLASS_TRACE_ID typedef int ___IGNORED_hs_trace_type2
+#define TRACE_DEFINE_OFFSET typedef int ___IGNORED_hs_trace_type3
+#define TRACE_ID_OFFSET in_ByteSize(0); ShouldNotReachHere()
+#define TRACE_TEMPLATES(template)
+#define TRACE_INTRINSICS(do_intrinsic, do_class, do_name, do_signature, do_alias)
 
 #endif
--- a/src/share/vm/utilities/accessFlags.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/accessFlags.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -55,9 +55,6 @@
   JVM_ACC_IS_OBSOLETE             = 0x00020000,     // RedefineClasses() has made method obsolete
   JVM_ACC_IS_PREFIXED_NATIVE      = 0x00040000,     // JVMTI has prefixed this native method
 
-  JVM_MH_INVOKE_BITS           // = 0x10001100      // MethodHandle.invoke quasi-native
-                                  = (JVM_ACC_NATIVE | JVM_ACC_SYNTHETIC | JVM_ACC_MONITOR_MATCH),
-
   // klassOop flags
   JVM_ACC_HAS_MIRANDA_METHODS     = 0x10000000,     // True if this class has miranda methods in it's vtable
   JVM_ACC_HAS_VANILLA_CONSTRUCTOR = 0x20000000,     // True if klass has a vanilla default constructor
@@ -80,10 +77,12 @@
   JVM_ACC_FIELD_ACCESS_WATCHED       = 0x00002000,  // field access is watched by JVMTI
   JVM_ACC_FIELD_MODIFICATION_WATCHED = 0x00008000,  // field modification is watched by JVMTI
   JVM_ACC_FIELD_INTERNAL             = 0x00000400,  // internal field, same as JVM_ACC_ABSTRACT
+  JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE = 0x00000800, // field has generic signature
 
   JVM_ACC_FIELD_INTERNAL_FLAGS       = JVM_ACC_FIELD_ACCESS_WATCHED |
                                        JVM_ACC_FIELD_MODIFICATION_WATCHED |
-                                       JVM_ACC_FIELD_INTERNAL,
+                                       JVM_ACC_FIELD_INTERNAL |
+                                       JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE,
 
                                                     // flags accepted by set_field_flags()
   JVM_ACC_FIELD_FLAGS                = JVM_RECOGNIZED_FIELD_MODIFIERS | JVM_ACC_FIELD_INTERNAL_FLAGS
@@ -131,15 +130,6 @@
   bool is_obsolete             () const { return (_flags & JVM_ACC_IS_OBSOLETE            ) != 0; }
   bool is_prefixed_native      () const { return (_flags & JVM_ACC_IS_PREFIXED_NATIVE     ) != 0; }
 
-  // JSR 292:  A method of the form MethodHandle.invoke(A...)R method is
-  // neither bytecoded nor a JNI native, but rather a fast call through
-  // a lightweight method handle object.  Because it is not bytecoded,
-  // it has the native bit set, but the monitor-match bit is also set
-  // to distinguish it from a JNI native (which never has the match bit set).
-  // The synthetic bit is also present, because such a method is never
-  // explicitly defined in Java code.
-  bool is_method_handle_invoke () const { return (_flags & JVM_MH_INVOKE_BITS) == JVM_MH_INVOKE_BITS; }
-
   // klassOop flags
   bool has_miranda_methods     () const { return (_flags & JVM_ACC_HAS_MIRANDA_METHODS    ) != 0; }
   bool has_vanilla_constructor () const { return (_flags & JVM_ACC_HAS_VANILLA_CONSTRUCTOR) != 0; }
@@ -156,6 +146,8 @@
   bool is_field_modification_watched() const
                                         { return (_flags & JVM_ACC_FIELD_MODIFICATION_WATCHED) != 0; }
   bool is_internal() const              { return (_flags & JVM_ACC_FIELD_INTERNAL) != 0; }
+  bool field_has_generic_signature() const
+                                        { return (_flags & JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE) != 0; }
 
   // get .class file flags
   jint get_flags               () const { return (_flags & JVM_ACC_WRITTEN_FLAGS); }
@@ -225,6 +217,10 @@
                                            atomic_clear_bits(JVM_ACC_FIELD_MODIFICATION_WATCHED);
                                          }
                                        }
+  void set_field_has_generic_signature()
+                                       {
+                                         atomic_set_bits(JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE);
+                                       }
 
   // Conversion
   jshort as_short() const              { return (jshort)_flags; }
--- a/src/share/vm/utilities/array.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/array.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -49,7 +49,7 @@
 void ResourceArray::sort(size_t esize, ftype f) {
   if (!is_empty()) qsort(_data, length(), esize, f);
 }
-void CHeapArray::sort(size_t esize, ftype f) {
+template <MEMFLAGS F> void CHeapArray<F>::sort(size_t esize, ftype f) {
   if (!is_empty()) qsort(_data, length(), esize, f);
 }
 
@@ -70,14 +70,14 @@
 }
 
 
-void CHeapArray::expand(size_t esize, int i, int& size) {
+template <MEMFLAGS F> void CHeapArray<F>::expand(size_t esize, int i, int& size) {
   // determine new size
   if (size == 0) size = 4; // prevent endless loop
   while (i >= size) size *= 2;
   // allocate and initialize new data section
-  void* data = NEW_C_HEAP_ARRAY(char*, esize * size);
+  void* data = NEW_C_HEAP_ARRAY(char*, esize * size, F);
   memcpy(data, _data, esize * length());
-  FREE_C_HEAP_ARRAY(char*, _data);
+  FREE_C_HEAP_ARRAY(char*, _data, F);
   _data = data;
 }
 
@@ -91,7 +91,7 @@
   memmove(dst, src, cnt);
 }
 
-void CHeapArray::remove_at(size_t esize, int i) {
+template <MEMFLAGS F> void CHeapArray<F>::remove_at(size_t esize, int i) {
   assert(0 <= i && i < length(), "index out of bounds");
   _length--;
   void* dst = (char*)_data + i*esize;
--- a/src/share/vm/utilities/array.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/array.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -79,7 +79,7 @@
 };
 
 
-class CHeapArray: public CHeapObj {
+template <MEMFLAGS F>class CHeapArray: public CHeapObj<F> {
  protected:
   int   _length;                                 // the number of array elements
   void* _data;                                   // the array memory
@@ -94,7 +94,7 @@
   CHeapArray(size_t esize, int length) {
     assert(length >= 0, "illegal length");
     _length  = length;
-    _data    = (void*) NEW_C_HEAP_ARRAY(char *, esize * length);
+    _data    = (void*) NEW_C_HEAP_ARRAY(char *, esize * length, F);
   }
 
 #ifdef ASSERT
--- a/src/share/vm/utilities/bitMap.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/bitMap.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,8 +65,8 @@
   if (in_resource_area) {
     _map = NEW_RESOURCE_ARRAY(bm_word_t, new_size_in_words);
   } else {
-    if (old_map != NULL) FREE_C_HEAP_ARRAY(bm_word_t, _map);
-    _map = NEW_C_HEAP_ARRAY(bm_word_t, new_size_in_words);
+    if (old_map != NULL) FREE_C_HEAP_ARRAY(bm_word_t, _map, mtInternal);
+    _map = NEW_C_HEAP_ARRAY(bm_word_t, new_size_in_words, mtInternal);
   }
   Copy::disjoint_words((HeapWord*)old_map, (HeapWord*) _map,
                        MIN2(old_size_in_words, new_size_in_words));
@@ -179,64 +179,6 @@
   clear_range_within_word(bit_index(end_full_word), end);
 }
 
-void BitMap::mostly_disjoint_range_union(BitMap* from_bitmap,
-                                         idx_t   from_start_index,
-                                         idx_t   to_start_index,
-                                         size_t  word_num) {
-  // Ensure that the parameters are correct.
-  // These shouldn't be that expensive to check, hence I left them as
-  // guarantees.
-  guarantee(from_bitmap->bit_in_word(from_start_index) == 0,
-            "it should be aligned on a word boundary");
-  guarantee(bit_in_word(to_start_index) == 0,
-            "it should be aligned on a word boundary");
-  guarantee(word_num >= 2, "word_num should be at least 2");
-
-  intptr_t* from = (intptr_t*) from_bitmap->word_addr(from_start_index);
-  intptr_t* to   = (intptr_t*) word_addr(to_start_index);
-
-  if (*from != 0) {
-    // if it's 0, then there's no point in doing the CAS
-    while (true) {
-      intptr_t old_value = *to;
-      intptr_t new_value = old_value | *from;
-      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
-      if (res == old_value) break;
-    }
-  }
-  ++from;
-  ++to;
-
-  for (size_t i = 0; i < word_num - 2; ++i) {
-    if (*from != 0) {
-      // if it's 0, then there's no point in doing the CAS
-      assert(*to == 0, "nobody else should be writing here");
-      intptr_t new_value = *from;
-      *to = new_value;
-    }
-
-    ++from;
-    ++to;
-  }
-
-  if (*from != 0) {
-    // if it's 0, then there's no point in doing the CAS
-    while (true) {
-      intptr_t old_value = *to;
-      intptr_t new_value = old_value | *from;
-      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
-      if (res == old_value) break;
-    }
-  }
-
-  // the -1 is because we didn't advance them after the final CAS
-  assert(from ==
-           (intptr_t*) from_bitmap->word_addr(from_start_index) + word_num - 1,
-            "invariant");
-  assert(to == (intptr_t*) word_addr(to_start_index) + word_num - 1,
-            "invariant");
-}
-
 void BitMap::at_put(idx_t offset, bool value) {
   if (value) {
     set_bit(offset);
@@ -527,7 +469,7 @@
 
 void BitMap::init_pop_count_table() {
   if (_pop_count_table == NULL) {
-    BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256);
+    BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256, mtInternal);
     for (uint i = 0; i < 256; i++) {
       table[i] = num_set_bits(i);
     }
@@ -537,7 +479,7 @@
                                        (intptr_t)  NULL_WORD);
     if (res != NULL_WORD) {
       guarantee( _pop_count_table == (void*) res, "invariant" );
-      FREE_C_HEAP_ARRAY(bm_word_t, table);
+      FREE_C_HEAP_ARRAY(bm_word_t, table, mtInternal);
     }
   }
 }
--- a/src/share/vm/utilities/bitMap.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/bitMap.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -192,31 +192,6 @@
   void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint);
   void par_clear_range  (idx_t beg, idx_t end, RangeSizeHint hint);
 
-  // It performs the union operation between subsets of equal length
-  // of two bitmaps (the target bitmap of the method and the
-  // from_bitmap) and stores the result to the target bitmap.  The
-  // from_start_index represents the first bit index of the subrange
-  // of the from_bitmap.  The to_start_index is the equivalent of the
-  // target bitmap. Both indexes should be word-aligned, i.e. they
-  // should correspond to the first bit on a bitmap word (it's up to
-  // the caller to ensure this; the method does check it).  The length
-  // of the subset is specified with word_num and it is in number of
-  // bitmap words. The caller should ensure that this is at least 2
-  // (smaller ranges are not support to save extra checks).  Again,
-  // this is checked in the method.
-  //
-  // Atomicity concerns: it is assumed that any contention on the
-  // target bitmap with other threads will happen on the first and
-  // last words; the ones in between will be "owned" exclusively by
-  // the calling thread and, in fact, they will already be 0. So, the
-  // method performs a CAS on the first word, copies the next
-  // word_num-2 words, and finally performs a CAS on the last word.
-  void mostly_disjoint_range_union(BitMap* from_bitmap,
-                                   idx_t   from_start_index,
-                                   idx_t   to_start_index,
-                                   size_t  word_num);
-
-
   // Clearing
   void clear_large();
   inline void clear();
--- a/src/share/vm/utilities/debug.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/debug.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -91,6 +91,13 @@
 #  endif
 #endif // PRODUCT
 
+FormatBufferResource::FormatBufferResource(const char * format, ...)
+  : FormatBufferBase((char*)resource_allocate_bytes(RES_BUFSZ)) {
+  va_list argp;
+  va_start(argp, format);
+  jio_vsnprintf(_buf, RES_BUFSZ, format, argp);
+  va_end(argp);
+}
 
 void warning(const char* format, ...) {
   if (PrintWarnings) {
--- a/src/share/vm/utilities/debug.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/debug.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -31,29 +31,43 @@
 #include <stdarg.h>
 
 // Simple class to format the ctor arguments into a fixed-sized buffer.
+class FormatBufferBase {
+ protected:
+  char* _buf;
+  inline FormatBufferBase(char* buf) : _buf(buf) {}
+ public:
+  operator const char *() const { return _buf; }
+};
+
+// Use resource area for buffer
+#define RES_BUFSZ 256
+class FormatBufferResource : public FormatBufferBase {
+ public:
+  FormatBufferResource(const char * format, ...);
+};
+
+// Use stack for buffer
 template <size_t bufsz = 256>
-class FormatBuffer {
+class FormatBuffer : public FormatBufferBase {
  public:
   inline FormatBuffer(const char * format, ...);
   inline void append(const char* format, ...);
   inline void print(const char* format, ...);
   inline void printv(const char* format, va_list ap);
-  operator const char *() const { return _buf; }
 
   char* buffer() { return _buf; }
   int size() { return bufsz; }
 
  private:
   FormatBuffer(const FormatBuffer &); // prevent copies
+  char _buffer[bufsz];
 
  protected:
-  char _buf[bufsz];
-
   inline FormatBuffer();
 };
 
 template <size_t bufsz>
-FormatBuffer<bufsz>::FormatBuffer(const char * format, ...) {
+FormatBuffer<bufsz>::FormatBuffer(const char * format, ...) : FormatBufferBase(_buffer) {
   va_list argp;
   va_start(argp, format);
   jio_vsnprintf(_buf, bufsz, format, argp);
@@ -61,7 +75,7 @@
 }
 
 template <size_t bufsz>
-FormatBuffer<bufsz>::FormatBuffer() {
+FormatBuffer<bufsz>::FormatBuffer() : FormatBufferBase(_buffer) {
   _buf[0] = '\0';
 }
 
@@ -93,6 +107,7 @@
 
 // Used to format messages for assert(), guarantee(), fatal(), etc.
 typedef FormatBuffer<> err_msg;
+typedef FormatBufferResource err_msg_res;
 
 // assertions
 #ifdef ASSERT
--- a/src/share/vm/utilities/decoder.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/decoder.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -91,6 +91,18 @@
   return decoder->decode(addr, buf, buflen, offset, modulepath);
 }
 
+bool Decoder::decode(address addr, char* buf, int buflen, int* offset, const void* base) {
+  assert(_shared_decoder_lock != NULL, "Just check");
+  bool error_handling_thread = os::current_thread_id() == VMError::first_error_tid;
+  MutexLockerEx locker(error_handling_thread ? NULL : _shared_decoder_lock, true);
+  AbstractDecoder* decoder = error_handling_thread ?
+    get_error_handler_instance(): get_shared_instance();
+  assert(decoder != NULL, "null decoder");
+
+  return decoder->decode(addr, buf, buflen, offset, base);
+}
+
+
 bool Decoder::demangle(const char* symbol, char* buf, int buflen) {
   assert(_shared_decoder_lock != NULL, "Just check");
   bool error_handling_thread = os::current_thread_id() == VMError::first_error_tid;
--- a/src/share/vm/utilities/decoder.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/decoder.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -29,7 +29,7 @@
 #include "memory/allocation.hpp"
 #include "runtime/mutex.hpp"
 
-class AbstractDecoder : public CHeapObj {
+class AbstractDecoder : public CHeapObj<mtInternal> {
 public:
   // status code for decoding native C frame
   enum decoder_status {
@@ -47,6 +47,8 @@
   // the function
   virtual bool decode(address pc, char* buf, int buflen, int* offset,
     const char* modulepath = NULL) = 0;
+  virtual bool decode(address pc, char* buf, int buflen, int* offset, const void* base) = 0;
+
   // demangle a C++ symbol
   virtual bool demangle(const char* symbol, char* buf, int buflen) = 0;
   // if the decoder can decode symbols in vm
@@ -82,6 +84,10 @@
     return false;
   }
 
+  virtual bool decode(address pc, char* buf, int buflen, int* offset, const void* base) {
+    return false;
+  }
+
   virtual bool demangle(const char* symbol, char* buf, int buflen) {
     return false;
   }
@@ -95,6 +101,7 @@
 class Decoder : AllStatic {
 public:
   static bool decode(address pc, char* buf, int buflen, int* offset, const char* modulepath = NULL);
+  static bool decode(address pc, char* buf, int buflen, int* offset, const void* base);
   static bool demangle(const char* symbol, char* buf, int buflen);
   static bool can_decode_C_frame_in_vm();
 
--- a/src/share/vm/utilities/decoder_elf.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/decoder_elf.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,6 +43,10 @@
 
   bool demangle(const char* symbol, char *buf, int buflen);
   bool decode(address addr, char *buf, int buflen, int* offset, const char* filepath = NULL);
+  bool decode(address addr, char *buf, int buflen, int* offset, const void *base) {
+    ShouldNotReachHere();
+    return false;
+  }
 
 private:
   ElfFile*         get_elf_file(const char* filepath);
--- a/src/share/vm/utilities/elfFile.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/elfFile.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -47,7 +47,7 @@
   m_status = NullDecoder::no_error;
 
   int len = strlen(filepath) + 1;
-  m_filepath = (const char*)os::malloc(len * sizeof(char));
+  m_filepath = (const char*)os::malloc(len * sizeof(char), mtInternal);
   if (m_filepath != NULL) {
     strcpy((char*)m_filepath, filepath);
     m_file = fopen(filepath, "r");
--- a/src/share/vm/utilities/elfFile.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/elfFile.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -82,7 +82,7 @@
 // in "error" state, so there are scenarios, lookup will fail. We want this
 // part of code to be very defensive, and bait out if anything went wrong.
 
-class ElfFile: public CHeapObj {
+class ElfFile: public CHeapObj<mtInternal> {
   friend class ElfDecoder;
  public:
   ElfFile(const char* filepath);
--- a/src/share/vm/utilities/elfStringTable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/elfStringTable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -42,7 +42,7 @@
 
   // try to load the string table
   long cur_offset = ftell(file);
-  m_table = (char*)os::malloc(sizeof(char) * shdr.sh_size);
+  m_table = (char*)os::malloc(sizeof(char) * shdr.sh_size, mtInternal);
   if (m_table != NULL) {
     // if there is an error, mark the error
     if (fseek(file, shdr.sh_offset, SEEK_SET) ||
--- a/src/share/vm/utilities/elfStringTable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/elfStringTable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,7 +35,7 @@
 // The string table represents a string table section in an elf file.
 // Whenever there is enough memory, it will load whole string table as
 // one blob. Otherwise, it will load string from file when requested.
-class ElfStringTable: CHeapObj {
+class ElfStringTable: CHeapObj<mtInternal> {
   friend class ElfFile;
  public:
   ElfStringTable(FILE* file, Elf_Shdr shdr, int index);
--- a/src/share/vm/utilities/elfSymbolTable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/elfSymbolTable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
   long cur_offset = ftell(file);
   if (cur_offset != -1) {
     // call malloc so we can back up if memory allocation fails.
-    m_symbols = (Elf_Sym*)os::malloc(shdr.sh_size);
+    m_symbols = (Elf_Sym*)os::malloc(shdr.sh_size, mtInternal);
     if (m_symbols) {
       if (fseek(file, shdr.sh_offset, SEEK_SET) ||
         fread((void*)m_symbols, shdr.sh_size, 1, file) != 1 ||
--- a/src/share/vm/utilities/elfSymbolTable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/elfSymbolTable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -38,7 +38,7 @@
  * of the elf file into memory. Otherwise, it will walk the section in file
  * to look up the symbol that nearest the given address.
  */
-class ElfSymbolTable: public CHeapObj {
+class ElfSymbolTable: public CHeapObj<mtInternal> {
   friend class ElfFile;
  public:
   ElfSymbolTable(FILE* file, Elf_Shdr shdr);
--- a/src/share/vm/utilities/events.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/events.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -46,7 +46,7 @@
 // crash time.  This is a very generic interface that is mainly here
 // for completeness.  Normally the templated EventLogBase would be
 // subclassed to provide different log types.
-class EventLog : public CHeapObj {
+class EventLog : public CHeapObj<mtInternal> {
   friend class Events;
 
  private:
--- a/src/share/vm/utilities/exceptions.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/exceptions.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -164,52 +164,58 @@
 }
 
 
-void Exceptions::_throw_msg(Thread* thread, const char* file, int line, Symbol* h_name, const char* message, Handle h_loader, Handle h_protection_domain) {
+void Exceptions::_throw_msg(Thread* thread, const char* file, int line, Symbol* name, const char* message,
+                            Handle h_loader, Handle h_protection_domain) {
   // Check for special boot-strapping/vm-thread handling
-  if (special_exception(thread, file, line, h_name, message)) return;
+  if (special_exception(thread, file, line, name, message)) return;
   // Create and throw exception
   Handle h_cause(thread, NULL);
-  Handle h_exception = new_exception(thread, h_name, message, h_cause, h_loader, h_protection_domain);
+  Handle h_exception = new_exception(thread, name, message, h_cause, h_loader, h_protection_domain);
   _throw(thread, file, line, h_exception, message);
 }
 
-// Throw an exception with a message and a cause
-void Exceptions::_throw_msg_cause(Thread* thread, const char* file, int line, Symbol* h_name, const char* message, Handle h_cause, Handle h_loader, Handle h_protection_domain) {
+void Exceptions::_throw_msg_cause(Thread* thread, const char* file, int line, Symbol* name, const char* message, Handle h_cause,
+                                  Handle h_loader, Handle h_protection_domain) {
   // Check for special boot-strapping/vm-thread handling
-  if (special_exception(thread, file, line, h_name, message)) return;
+  if (special_exception(thread, file, line, name, message)) return;
   // Create and throw exception and init cause
-  Handle h_exception = new_exception(thread, h_name, message, h_cause, h_loader, h_protection_domain);
+  Handle h_exception = new_exception(thread, name, message, h_cause, h_loader, h_protection_domain);
   _throw(thread, file, line, h_exception, message);
 }
 
-// This version already has a handle for name
-void Exceptions::_throw_msg(Thread* thread, const char* file, int line,
-                            Symbol* name, const char* message) {
-  Handle       h_loader(thread, NULL);
-  Handle       h_protection_domain(thread, NULL);
-  Exceptions::_throw_msg(thread, file, line, name, message, h_loader, h_protection_domain);
+void Exceptions::_throw_cause(Thread* thread, const char* file, int line, Symbol* name, Handle h_cause,
+                              Handle h_loader, Handle h_protection_domain) {
+  // Check for special boot-strapping/vm-thread handling
+  if (special_exception(thread, file, line, h_cause)) return;
+  // Create and throw exception
+  Handle h_exception = new_exception(thread, name, h_cause, h_loader, h_protection_domain);
+  _throw(thread, file, line, h_exception, NULL);
 }
 
-// This version already has a handle for name
-void Exceptions::_throw_msg_cause(Thread* thread, const char* file, int line,
-                            Symbol* name, const char* message, Handle cause) {
-  Handle       h_loader(thread, NULL);
-  Handle       h_protection_domain(thread, NULL);
-  Exceptions::_throw_msg_cause(thread, file, line, name, message, cause, h_loader, h_protection_domain);
-}
-
-void Exceptions::_throw_args(Thread* thread, const char* file, int line, Symbol* h_name, Symbol* h_signature, JavaCallArguments *args) {
+void Exceptions::_throw_args(Thread* thread, const char* file, int line, Symbol* name, Symbol* signature, JavaCallArguments *args) {
   // Check for special boot-strapping/vm-thread handling
-  if (special_exception(thread, file, line, h_name, NULL)) return;
+  if (special_exception(thread, file, line, name, NULL)) return;
   // Create and throw exception
   Handle h_loader(thread, NULL);
   Handle h_prot(thread, NULL);
-  Handle h_cause(thread, NULL);
-  Handle exception = new_exception(thread, h_name, h_signature, args, h_cause, h_loader, h_prot);
+  Handle exception = new_exception(thread, name, signature, args, h_loader, h_prot);
   _throw(thread, file, line, exception);
 }
 
 
+// Methods for default parameters.
+// NOTE: These must be here (and not in the header file) because of include circularities.
+void Exceptions::_throw_msg_cause(Thread* thread, const char* file, int line, Symbol* name, const char* message, Handle h_cause) {
+  _throw_msg_cause(thread, file, line, name, message, h_cause, Handle(thread, NULL), Handle(thread, NULL));
+}
+void Exceptions::_throw_msg(Thread* thread, const char* file, int line, Symbol* name, const char* message) {
+  _throw_msg(thread, file, line, name, message, Handle(thread, NULL), Handle(thread, NULL));
+}
+void Exceptions::_throw_cause(Thread* thread, const char* file, int line, Symbol* name, Handle h_cause) {
+  _throw_cause(thread, file, line, name, h_cause, Handle(thread, NULL), Handle(thread, NULL));
+}
+
+
 void Exceptions::throw_stack_overflow_exception(Thread* THREAD, const char* file, int line, methodHandle method) {
   Handle exception;
   if (!THREAD->has_pending_exception()) {
@@ -240,12 +246,9 @@
 
 // Creates an exception oop, calls the <init> method with the given signature.
 // and returns a Handle
-// Initializes the cause if cause non-null
-Handle Exceptions::new_exception(Thread *thread, Symbol* h_name,
-                                 Symbol* signature,
-                                 JavaCallArguments *args,
-                                 Handle h_cause, Handle h_loader,
-                                 Handle h_protection_domain) {
+Handle Exceptions::new_exception(Thread *thread, Symbol* name,
+                                 Symbol* signature, JavaCallArguments *args,
+                                 Handle h_loader, Handle h_protection_domain) {
   assert(Universe::is_fully_initialized(),
     "cannot be called during initialization");
   assert(thread->is_Java_thread(), "can only be called by a Java thread");
@@ -254,8 +257,8 @@
   Handle h_exception;
 
   // Resolve exception klass
-  klassOop ik = SystemDictionary::resolve_or_fail(h_name, h_loader, h_protection_domain, true, thread);
-  instanceKlassHandle klass (thread, ik);
+  klassOop ik = SystemDictionary::resolve_or_fail(name, h_loader, h_protection_domain, true, thread);
+  instanceKlassHandle klass(thread, ik);
 
   if (!thread->has_pending_exception()) {
     assert(klass.not_null(), "klass must exist");
@@ -273,24 +276,8 @@
                                          signature,
                                          args,
                                          thread);
-
       }
     }
-
-    // Future: object initializer should take a cause argument
-    if (h_cause() != NULL) {
-      assert(h_cause->is_a(SystemDictionary::Throwable_klass()),
-          "exception cause is not a subclass of java/lang/Throwable");
-      JavaValue result1(T_OBJECT);
-      JavaCallArguments args1;
-      args1.set_receiver(h_exception);
-      args1.push_oop(h_cause);
-      JavaCalls::call_virtual(&result1, klass,
-                                     vmSymbols::initCause_name(),
-                                     vmSymbols::throwable_throwable_signature(),
-                                     &args1,
-                                     thread);
-    }
   }
 
   // Check if another exception was thrown in the process, if so rethrow that one
@@ -301,12 +288,60 @@
   return h_exception;
 }
 
+// Creates an exception oop, calls the <init> method with the given signature.
+// and returns a Handle
+// Initializes the cause if cause non-null
+Handle Exceptions::new_exception(Thread *thread, Symbol* name,
+                                 Symbol* signature, JavaCallArguments *args,
+                                 Handle h_cause,
+                                 Handle h_loader, Handle h_protection_domain) {
+  Handle h_exception = new_exception(thread, name, signature, args, h_loader, h_protection_domain);
+
+  // Future: object initializer should take a cause argument
+  if (h_cause.not_null()) {
+    assert(h_cause->is_a(SystemDictionary::Throwable_klass()),
+        "exception cause is not a subclass of java/lang/Throwable");
+    JavaValue result1(T_OBJECT);
+    JavaCallArguments args1;
+    args1.set_receiver(h_exception);
+    args1.push_oop(h_cause);
+    JavaCalls::call_virtual(&result1, h_exception->klass(),
+                                      vmSymbols::initCause_name(),
+                                      vmSymbols::throwable_throwable_signature(),
+                                      &args1,
+                                      thread);
+  }
+
+  // Check if another exception was thrown in the process, if so rethrow that one
+  if (thread->has_pending_exception()) {
+    h_exception = Handle(thread, thread->pending_exception());
+    thread->clear_pending_exception();
+  }
+  return h_exception;
+}
+
+// Convenience method. Calls either the <init>() or <init>(Throwable) method when
+// creating a new exception
+Handle Exceptions::new_exception(Thread* thread, Symbol* name,
+                                 Handle h_cause,
+                                 Handle h_loader, Handle h_protection_domain,
+                                 ExceptionMsgToUtf8Mode to_utf8_safe) {
+  JavaCallArguments args;
+  Symbol* signature = NULL;
+  if (h_cause.is_null()) {
+    signature = vmSymbols::void_method_signature();
+  } else {
+    signature = vmSymbols::throwable_void_signature();
+    args.push_oop(h_cause);
+  }
+  return new_exception(thread, name, signature, &args, h_loader, h_protection_domain);
+}
+
 // Convenience method. Calls either the <init>() or <init>(String) method when
 // creating a new exception
-Handle Exceptions::new_exception(Thread* thread, Symbol* h_name,
+Handle Exceptions::new_exception(Thread* thread, Symbol* name,
                                  const char* message, Handle h_cause,
-                                 Handle h_loader,
-                                 Handle h_protection_domain,
+                                 Handle h_loader, Handle h_protection_domain,
                                  ExceptionMsgToUtf8Mode to_utf8_safe) {
   JavaCallArguments args;
   Symbol* signature = NULL;
@@ -320,7 +355,7 @@
     // the exception we are trying to build, or the pending exception.
     // This is sort of like what PRESERVE_EXCEPTION_MARK does, except
     // for the preferencing and the early returns.
-    Handle incoming_exception (thread, NULL);
+    Handle incoming_exception(thread, NULL);
     if (thread->has_pending_exception()) {
       incoming_exception = Handle(thread, thread->pending_exception());
       thread->clear_pending_exception();
@@ -344,7 +379,7 @@
     args.push_oop(msg);
     signature = vmSymbols::string_void_signature();
   }
-  return new_exception(thread, h_name, signature, &args, h_cause, h_loader, h_protection_domain);
+  return new_exception(thread, name, signature, &args, h_cause, h_loader, h_protection_domain);
 }
 
 // Another convenience method that creates handles for null class loaders and
@@ -355,8 +390,7 @@
 // encoding scheme of the string into account. One thing we should do at some
 // point is to push this flag down to class java_lang_String since other
 // classes may need similar functionalities.
-Handle Exceptions::new_exception(Thread* thread,
-                                 Symbol* name,
+Handle Exceptions::new_exception(Thread* thread, Symbol* name,
                                  const char* message,
                                  ExceptionMsgToUtf8Mode to_utf8_safe) {
 
--- a/src/share/vm/utilities/exceptions.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/exceptions.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -57,7 +57,7 @@
 // field of the Thread class w/o having access to the Thread's interface (for
 // include hierachy reasons).
 
-class ThreadShadow: public CHeapObj {
+class ThreadShadow: public CHeapObj<mtThread> {
   friend class VMStructs;
 
  protected:
@@ -112,19 +112,22 @@
   // Throw exceptions: w/o message, w/ message & with formatted message.
   static void _throw_oop(Thread* thread, const char* file, int line, oop exception);
   static void _throw(Thread* thread, const char* file, int line, Handle exception, const char* msg = NULL);
-  static void _throw_msg(Thread* thread, const char* file, int line,
-                         Symbol* name, const char* message, Handle loader,
-                         Handle protection_domain);
-  static void _throw_msg(Thread* thread, const char* file, int line,
-                         Symbol* name, const char* message);
+
+  static void _throw_msg(Thread* thread, const char* file, int line, Symbol* name, const char* message);
+  static void _throw_msg(Thread* thread, const char* file, int line, Symbol* name, const char* message,
+                         Handle loader, Handle protection_domain);
+
+  static void _throw_msg_cause(Thread* thread, const char* file, int line, Symbol* name, const char* message, Handle h_cause);
+  static void _throw_msg_cause(Thread* thread, const char* file, int line, Symbol* name, const char* message, Handle h_cause,
+                               Handle h_loader, Handle h_protection_domain);
+
+  static void _throw_cause(Thread* thread, const char* file, int line, Symbol* name, Handle h_cause);
+  static void _throw_cause(Thread* thread, const char* file, int line, Symbol* name, Handle h_cause,
+                           Handle h_loader, Handle h_protection_domain);
+
   static void _throw_args(Thread* thread, const char* file, int line,
                           Symbol* name, Symbol* signature,
                           JavaCallArguments* args);
-  static void _throw_msg_cause(Thread* thread, const char* file,
-                         int line, Symbol* h_name, const char* message,
-                         Handle h_cause, Handle h_loader, Handle h_protection_domain);
-  static void _throw_msg_cause(Thread* thread, const char* file, int line,
-                            Symbol* name, const char* message, Handle cause);
 
   // There is no THROW... macro for this method. Caller should remember
   // to do a return after calling it.
@@ -134,17 +137,26 @@
   // Create and initialize a new exception
   static Handle new_exception(Thread* thread, Symbol* name,
                               Symbol* signature, JavaCallArguments* args,
-                              Handle cause, Handle loader,
-                              Handle protection_domain);
+                              Handle loader, Handle protection_domain);
+
+  static Handle new_exception(Thread* thread, Symbol* name,
+                              Symbol* signature, JavaCallArguments* args,
+                              Handle cause,
+                              Handle loader, Handle protection_domain);
 
   static Handle new_exception(Thread* thread, Symbol* name,
-                              const char* message, Handle cause, Handle loader,
-                              Handle protection_domain,
+                              Handle cause,
+                              Handle loader, Handle protection_domain,
                               ExceptionMsgToUtf8Mode to_utf8_safe = safe_to_utf8);
 
- static Handle new_exception(Thread* thread, Symbol* name,
-                             const char* message,
-                             ExceptionMsgToUtf8Mode to_utf8_safe = safe_to_utf8);
+  static Handle new_exception(Thread* thread, Symbol* name,
+                              const char* message, Handle cause,
+                              Handle loader, Handle protection_domain,
+                              ExceptionMsgToUtf8Mode to_utf8_safe = safe_to_utf8);
+
+  static Handle new_exception(Thread* thread, Symbol* name,
+                              const char* message,
+                              ExceptionMsgToUtf8Mode to_utf8_safe = safe_to_utf8);
 
   static void throw_stack_overflow_exception(Thread* thread, const char* file, int line, methodHandle method);
 
@@ -214,6 +226,9 @@
 #define THROW_MSG(name, message)                    \
   { Exceptions::_throw_msg(THREAD_AND_LOCATION, name, message); return;  }
 
+#define THROW_CAUSE(name, cause)   \
+  { Exceptions::_throw_cause(THREAD_AND_LOCATION, name, cause); return; }
+
 #define THROW_MSG_LOADER(name, message, loader, protection_domain) \
   { Exceptions::_throw_msg(THREAD_AND_LOCATION, name, message, loader, protection_domain); return;  }
 
@@ -238,6 +253,9 @@
 #define THROW_ARG_(name, signature, args, result) \
   { Exceptions::_throw_args(THREAD_AND_LOCATION, name, signature, args); return result; }
 
+#define THROW_MSG_CAUSE(name, message, cause)   \
+  { Exceptions::_throw_msg_cause(THREAD_AND_LOCATION, name, message, cause); return; }
+
 #define THROW_MSG_CAUSE_(name, message, cause, result)   \
   { Exceptions::_throw_msg_cause(THREAD_AND_LOCATION, name, message, cause); return result; }
 
--- a/src/share/vm/utilities/globalDefinitions.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -161,10 +161,6 @@
 const size_t G                  = M*K;
 const size_t HWperKB            = K / sizeof(HeapWord);
 
-const size_t LOG_K              = 10;
-const size_t LOG_M              = 2 * LOG_K;
-const size_t LOG_G              = 2 * LOG_M;
-
 const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint
 const jint max_jint = (juint)min_jint - 1;                     // 0x7FFFFFFF == largest jint
 
@@ -179,6 +175,11 @@
 const jint  NANOSECS_PER_MILLISEC = 1000000;
 
 inline const char* proper_unit_for_byte_size(size_t s) {
+#ifdef _LP64
+  if (s >= 10*G) {
+    return "G";
+  }
+#endif
   if (s >= 10*M) {
     return "M";
   } else if (s >= 10*K) {
@@ -188,17 +189,22 @@
   }
 }
 
-inline size_t byte_size_in_proper_unit(size_t s) {
+template <class T>
+inline T byte_size_in_proper_unit(T s) {
+#ifdef _LP64
+  if (s >= 10*G) {
+    return (T)(s/G);
+  }
+#endif
   if (s >= 10*M) {
-    return s/M;
+    return (T)(s/M);
   } else if (s >= 10*K) {
-    return s/K;
+    return (T)(s/K);
   } else {
     return s;
   }
 }
 
-
 //----------------------------------------------------------------------------------------------------
 // VM type definitions
 
--- a/src/share/vm/utilities/growableArray.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/growableArray.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -61,7 +61,7 @@
   if (on_stack()) {
     return (void*)resource_allocate_bytes(byte_size);
   } else if (on_C_heap()) {
-    return (void*)AllocateHeap(byte_size, "GrET in " __FILE__);
+    return (void*)AllocateHeap(byte_size, _memflags);
   } else {
     return _arena->Amalloc(byte_size);
   }
--- a/src/share/vm/utilities/growableArray.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/growableArray.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -86,6 +86,9 @@
                         //   0 means default ResourceArea
                         //   1 means on C heap
                         //   otherwise, allocate in _arena
+
+  MEMFLAGS   _memflags;   // memory type if allocation in C heap
+
 #ifdef ASSERT
   int    _nesting;      // resource area nesting at creation
   void   set_nesting();
@@ -102,9 +105,14 @@
 
   // This GA will use the resource stack for storage if c_heap==false,
   // Else it will use the C heap.  Use clear_and_deallocate to avoid leaks.
-  GenericGrowableArray(int initial_size, int initial_len, bool c_heap) {
+  GenericGrowableArray(int initial_size, int initial_len, bool c_heap, MEMFLAGS flags = mtNone) {
     _len = initial_len;
     _max = initial_size;
+    _memflags = flags;
+
+    // memory type has to be specified for C heap allocation
+    assert(!(c_heap && flags == mtNone), "memory type not specified for C heap object");
+
     assert(_len >= 0 && _len <= _max, "initial_len too big");
     _arena = (c_heap ? (Arena*)1 : NULL);
     set_nesting();
@@ -121,6 +129,8 @@
     _max = initial_size;
     assert(_len >= 0 && _len <= _max, "initial_len too big");
     _arena = arena;
+    _memflags = mtNone;
+
     assert(on_arena(), "arena has taken on reserved value 0 or 1");
     // Relax next assert to allow object allocation on resource area,
     // on stack or embedded into an other object.
@@ -152,12 +162,14 @@
     for (int i = 0; i < _max; i++) ::new ((void*)&_data[i]) E();
   }
 
-  GrowableArray(int initial_size, bool C_heap = false) : GenericGrowableArray(initial_size, 0, C_heap) {
+  GrowableArray(int initial_size, bool C_heap = false, MEMFLAGS F = mtInternal)
+    : GenericGrowableArray(initial_size, 0, C_heap, F) {
     _data = (E*)raw_allocate(sizeof(E));
     for (int i = 0; i < _max; i++) ::new ((void*)&_data[i]) E();
   }
 
-  GrowableArray(int initial_size, int initial_len, const E& filler, bool C_heap = false) : GenericGrowableArray(initial_size, initial_len, C_heap) {
+  GrowableArray(int initial_size, int initial_len, const E& filler, bool C_heap = false, MEMFLAGS memflags = mtInternal)
+    : GenericGrowableArray(initial_size, initial_len, C_heap, memflags) {
     _data = (E*)raw_allocate(sizeof(E));
     int i = 0;
     for (; i < _len; i++) ::new ((void*)&_data[i]) E(filler);
@@ -198,8 +210,11 @@
     return idx;
   }
 
-  void append_if_missing(const E& elem) {
-    if (!contains(elem)) append(elem);
+  bool append_if_missing(const E& elem) {
+    // Returns TRUE if elem is added.
+    bool missed = !contains(elem);
+    if (missed) append(elem);
+    return missed;
   }
 
   E at(int i) const {
@@ -292,12 +307,22 @@
     ShouldNotReachHere();
   }
 
+  // The order is preserved.
   void remove_at(int index) {
     assert(0 <= index && index < _len, "illegal index");
     for (int j = index + 1; j < _len; j++) _data[j-1] = _data[j];
     _len--;
   }
 
+  // The order is changed.
+  void delete_at(int index) {
+    assert(0 <= index && index < _len, "illegal index");
+    if (index < --_len) {
+      // Replace removed element with last one.
+      _data[index] = _data[_len];
+    }
+  }
+
   // inserts the given element before the element at index i
   void insert_before(const int idx, const E& elem) {
     check_nesting();
--- a/src/share/vm/utilities/hashtable.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/hashtable.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -35,11 +35,6 @@
 #include "utilities/hashtable.inline.hpp"
 
 
-#ifndef USDT2
-HS_DTRACE_PROBE_DECL4(hs_private, hashtable__new_entry,
-  void*, unsigned int, void*, void*);
-#endif /* !USDT2 */
-
 // This is a generic hashtable, designed to be used for the symbol
 // and string tables.
 //
@@ -48,8 +43,8 @@
 // %note:
 //  - HashtableEntrys are allocated in blocks to reduce the space overhead.
 
-BasicHashtableEntry* BasicHashtable::new_entry(unsigned int hashValue) {
-  BasicHashtableEntry* entry;
+template <MEMFLAGS F> BasicHashtableEntry<F>* BasicHashtable<F>::new_entry(unsigned int hashValue) {
+  BasicHashtableEntry<F>* entry;
 
   if (_free_list) {
     entry = _free_list;
@@ -60,10 +55,10 @@
       int len = _entry_size * block_size;
       len = 1 << log2_intptr(len); // round down to power of 2
       assert(len >= _entry_size, "");
-      _first_free_entry = NEW_C_HEAP_ARRAY(char, len);
+      _first_free_entry = NEW_C_HEAP_ARRAY2(char, len, F, CURRENT_PC);
       _end_block = _first_free_entry + len;
     }
-    entry = (BasicHashtableEntry*)_first_free_entry;
+    entry = (BasicHashtableEntry<F>*)_first_free_entry;
     _first_free_entry += _entry_size;
   }
 
@@ -73,29 +68,21 @@
 }
 
 
-template <class T> HashtableEntry<T>* Hashtable<T>::new_entry(unsigned int hashValue, T obj) {
-  HashtableEntry<T>* entry;
+template <class T, MEMFLAGS F> HashtableEntry<T, F>* Hashtable<T, F>::new_entry(unsigned int hashValue, T obj) {
+  HashtableEntry<T, F>* entry;
 
-  entry = (HashtableEntry<T>*)BasicHashtable::new_entry(hashValue);
+  entry = (HashtableEntry<T, F>*)BasicHashtable<F>::new_entry(hashValue);
   entry->set_literal(obj);
-#ifndef USDT2
-  HS_DTRACE_PROBE4(hs_private, hashtable__new_entry,
-    this, hashValue, obj, entry);
-#else /* USDT2 */
-  HS_PRIVATE_HASHTABLE_NEW_ENTRY(
-    this, hashValue, (uintptr_t) obj, entry);
-#endif /* USDT2 */
   return entry;
 }
 
-
 // Check to see if the hashtable is unbalanced.  The caller set a flag to
 // rehash at the next safepoint.  If this bucket is 60 times greater than the
 // expected average bucket length, it's an unbalanced hashtable.
 // This is somewhat an arbitrary heuristic but if one bucket gets to
 // rehash_count which is currently 100, there's probably something wrong.
 
-bool BasicHashtable::check_rehash_table(int count) {
+template <MEMFLAGS F> bool BasicHashtable<F>::check_rehash_table(int count) {
   assert(table_size() != 0, "underflow");
   if (count > (((double)number_of_entries()/(double)table_size())*rehash_multiple)) {
     // Set a flag for the next safepoint, which should be at some guaranteed
@@ -105,15 +92,15 @@
   return false;
 }
 
-template <class T> jint Hashtable<T>::_seed = 0;
+template <class T, MEMFLAGS F> jint Hashtable<T, F>::_seed = 0;
 
-template <class T> unsigned int Hashtable<T>::new_hash(Symbol* sym) {
+template <class T, MEMFLAGS F> unsigned int Hashtable<T, F>::new_hash(Symbol* sym) {
   ResourceMark rm;
   // Use alternate hashing algorithm on this symbol.
   return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
 }
 
-template <class T> unsigned int Hashtable<T>::new_hash(oop string) {
+template <class T, MEMFLAGS F> unsigned int Hashtable<T, F>::new_hash(oop string) {
   ResourceMark rm;
   int length;
   jchar* chars = java_lang_String::as_unicode_string(string, length);
@@ -125,7 +112,7 @@
 // with the existing elements.   This can be used to change the hash code
 // and could in the future change the size of the table.
 
-template <class T> void Hashtable<T>::move_to(Hashtable<T>* new_table) {
+template <class T, MEMFLAGS F> void Hashtable<T, F>::move_to(Hashtable<T, F>* new_table) {
 
   // Initialize the global seed for hashing.
   _seed = AltHashing::compute_seed();
@@ -135,8 +122,8 @@
 
   // Iterate through the table and create a new entry for the new table
   for (int i = 0; i < new_table->table_size(); ++i) {
-    for (HashtableEntry<T>* p = bucket(i); p != NULL; ) {
-      HashtableEntry<T>* next = p->next();
+    for (HashtableEntry<T, F>* p = bucket(i); p != NULL; ) {
+      HashtableEntry<T, F>* next = p->next();
       T string = p->literal();
       // Use alternate hashing algorithm on the symbol in the first table
       unsigned int hashValue = new_hash(string);
@@ -148,7 +135,7 @@
       // walking the hashtable past these entries requires
       // BasicHashtableEntry::make_ptr() call.
       bool keep_shared = p->is_shared();
-      unlink_entry(p);
+      this->unlink_entry(p);
       new_table->add_entry(index, p);
       if (keep_shared) {
         p->set_shared();
@@ -164,16 +151,16 @@
   // for the elements has been used in a new table and is not
   // destroyed.  The memory reuse will benefit resizing the SystemDictionary
   // to avoid a memory allocation spike at safepoint.
-  free_buckets();
+  BasicHashtable<F>::free_buckets();
 }
 
-void BasicHashtable::free_buckets() {
+template <MEMFLAGS F> void BasicHashtable<F>::free_buckets() {
   if (NULL != _buckets) {
     // Don't delete the buckets in the shared space.  They aren't
     // allocated by os::malloc
     if (!UseSharedSpaces ||
         !FileMapInfo::current_info()->is_in_shared_space(_buckets)) {
-       FREE_C_HEAP_ARRAY(HashtableBucket, _buckets);
+       FREE_C_HEAP_ARRAY(HashtableBucket, _buckets, F);
     }
     _buckets = NULL;
   }
@@ -182,13 +169,13 @@
 
 // Reverse the order of elements in the hash buckets.
 
-void BasicHashtable::reverse() {
+template <MEMFLAGS F> void BasicHashtable<F>::reverse() {
 
   for (int i = 0; i < _table_size; ++i) {
-    BasicHashtableEntry* new_list = NULL;
-    BasicHashtableEntry* p = bucket(i);
+    BasicHashtableEntry<F>* new_list = NULL;
+    BasicHashtableEntry<F>* p = bucket(i);
     while (p != NULL) {
-      BasicHashtableEntry* next = p->next();
+      BasicHashtableEntry<F>* next = p->next();
       p->set_next(new_list);
       new_list = p;
       p = next;
@@ -200,7 +187,7 @@
 
 // Copy the table to the shared space.
 
-void BasicHashtable::copy_table(char** top, char* end) {
+template <MEMFLAGS F> void BasicHashtable<F>::copy_table(char** top, char* end) {
 
   // Dump the hash table entries.
 
@@ -209,13 +196,13 @@
 
   int i;
   for (i = 0; i < _table_size; ++i) {
-    for (BasicHashtableEntry** p = _buckets[i].entry_addr();
+    for (BasicHashtableEntry<F>** p = _buckets[i].entry_addr();
                               *p != NULL;
                                p = (*p)->next_addr()) {
       if (*top + entry_size() > end) {
         report_out_of_shared_space(SharedMiscData);
       }
-      *p = (BasicHashtableEntry*)memcpy(*top, *p, entry_size());
+      *p = (BasicHashtableEntry<F>*)memcpy(*top, *p, entry_size());
       *top += entry_size();
     }
   }
@@ -224,7 +211,7 @@
   // Set the shared bit.
 
   for (i = 0; i < _table_size; ++i) {
-    for (BasicHashtableEntry* p = bucket(i); p != NULL; p = p->next()) {
+    for (BasicHashtableEntry<F>* p = bucket(i); p != NULL; p = p->next()) {
       p->set_shared();
     }
   }
@@ -234,15 +221,15 @@
 
 // Reverse the order of elements in the hash buckets.
 
-template <class T> void Hashtable<T>::reverse(void* boundary) {
+template <class T, MEMFLAGS F> void Hashtable<T, F>::reverse(void* boundary) {
 
-  for (int i = 0; i < table_size(); ++i) {
-    HashtableEntry<T>* high_list = NULL;
-    HashtableEntry<T>* low_list = NULL;
-    HashtableEntry<T>* last_low_entry = NULL;
-    HashtableEntry<T>* p = bucket(i);
+  for (int i = 0; i < this->table_size(); ++i) {
+    HashtableEntry<T, F>* high_list = NULL;
+    HashtableEntry<T, F>* low_list = NULL;
+    HashtableEntry<T, F>* last_low_entry = NULL;
+    HashtableEntry<T, F>* p = bucket(i);
     while (p != NULL) {
-      HashtableEntry<T>* next = p->next();
+      HashtableEntry<T, F>* next = p->next();
       if ((void*)p->literal() >= boundary) {
         p->set_next(high_list);
         high_list = p;
@@ -267,8 +254,8 @@
 
 // Dump the hash table buckets.
 
-void BasicHashtable::copy_buckets(char** top, char* end) {
-  intptr_t len = _table_size * sizeof(HashtableBucket);
+template <MEMFLAGS F> void BasicHashtable<F>::copy_buckets(char** top, char* end) {
+  intptr_t len = _table_size * sizeof(HashtableBucket<F>);
   *(intptr_t*)(*top) = len;
   *top += sizeof(intptr_t);
 
@@ -278,18 +265,18 @@
   if (*top + len > end) {
     report_out_of_shared_space(SharedMiscData);
   }
-  _buckets = (HashtableBucket*)memcpy(*top, _buckets, len);
+  _buckets = (HashtableBucket<F>*)memcpy(*top, _buckets, len);
   *top += len;
 }
 
 
 #ifndef PRODUCT
 
-template <class T> void Hashtable<T>::print() {
+template <class T, MEMFLAGS F> void Hashtable<T, F>::print() {
   ResourceMark rm;
 
-  for (int i = 0; i < table_size(); i++) {
-    HashtableEntry<T>* entry = bucket(i);
+  for (int i = 0; i < BasicHashtable<F>::table_size(); i++) {
+    HashtableEntry<T, F>* entry = bucket(i);
     while(entry != NULL) {
       tty->print("%d : ", i);
       entry->literal()->print();
@@ -300,10 +287,10 @@
 }
 
 
-void BasicHashtable::verify() {
+template <MEMFLAGS F> void BasicHashtable<F>::verify() {
   int count = 0;
   for (int i = 0; i < table_size(); i++) {
-    for (BasicHashtableEntry* p = bucket(i); p != NULL; p = p->next()) {
+    for (BasicHashtableEntry<F>* p = bucket(i); p != NULL; p = p->next()) {
       ++count;
     }
   }
@@ -316,7 +303,7 @@
 
 #ifdef ASSERT
 
-void BasicHashtable::verify_lookup_length(double load) {
+template <MEMFLAGS F> void BasicHashtable<F>::verify_lookup_length(double load) {
   if ((double)_lookup_length / (double)_lookup_count > load * 2.0) {
     warning("Performance bug: SystemDictionary lookup_count=%d "
             "lookup_length=%d average=%lf load=%f",
@@ -326,10 +313,22 @@
 }
 
 #endif
-
 // Explicitly instantiate these types
-template class Hashtable<constantPoolOop>;
-template class Hashtable<Symbol*>;
-template class Hashtable<klassOop>;
-template class Hashtable<oop>;
-
+template class Hashtable<constantPoolOop, mtClass>;
+template class Hashtable<Symbol*, mtSymbol>;
+template class Hashtable<klassOop, mtClass>;
+template class Hashtable<oop, mtClass>;
+#ifdef SOLARIS
+template class Hashtable<oop, mtSymbol>;
+#endif
+template class Hashtable<oopDesc*, mtSymbol>;
+template class Hashtable<Symbol*, mtClass>;
+template class HashtableEntry<Symbol*, mtSymbol>;
+template class HashtableEntry<Symbol*, mtClass>;
+template class HashtableEntry<oop, mtSymbol>;
+template class BasicHashtableEntry<mtSymbol>;
+template class BasicHashtableEntry<mtCode>;
+template class BasicHashtable<mtClass>;
+template class BasicHashtable<mtSymbol>;
+template class BasicHashtable<mtCode>;
+template class BasicHashtable<mtInternal>;
--- a/src/share/vm/utilities/hashtable.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/hashtable.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -40,7 +40,7 @@
 
 
 
-class BasicHashtableEntry : public CHeapObj {
+template <MEMFLAGS F> class BasicHashtableEntry : public CHeapObj<F> {
   friend class VMStructs;
 private:
   unsigned int         _hash;           // 32-bit hash for item
@@ -52,7 +52,7 @@
   // shared entries will not change.  New entries will always be
   // unshared and since pointers are align, bit 0 will always remain 0
   // with no extra effort.
-  BasicHashtableEntry* _next;
+  BasicHashtableEntry<F>* _next;
 
   // Windows IA64 compiler requires subclasses to be able to access these
 protected:
@@ -69,19 +69,19 @@
   void set_hash(unsigned int hash)      { _hash = hash; }
   unsigned int* hash_addr()             { return &_hash; }
 
-  static BasicHashtableEntry* make_ptr(BasicHashtableEntry* p) {
+  static BasicHashtableEntry<F>* make_ptr(BasicHashtableEntry<F>* p) {
     return (BasicHashtableEntry*)((intptr_t)p & -2);
   }
 
-  BasicHashtableEntry* next() const {
+  BasicHashtableEntry<F>* next() const {
     return make_ptr(_next);
   }
 
-  void set_next(BasicHashtableEntry* next) {
+  void set_next(BasicHashtableEntry<F>* next) {
     _next = next;
   }
 
-  BasicHashtableEntry** next_addr() {
+  BasicHashtableEntry<F>** next_addr() {
     return &_next;
   }
 
@@ -90,13 +90,13 @@
   }
 
   void set_shared() {
-    _next = (BasicHashtableEntry*)((intptr_t)_next | 1);
+    _next = (BasicHashtableEntry<F>*)((intptr_t)_next | 1);
   }
 };
 
 
 
-template <class T> class HashtableEntry : public BasicHashtableEntry {
+template <class T, MEMFLAGS F> class HashtableEntry : public BasicHashtableEntry<F> {
   friend class VMStructs;
 private:
   T               _literal;          // ref to item in table.
@@ -108,20 +108,20 @@
   void set_literal(T s)               { _literal = s; }
 
   HashtableEntry* next() const {
-    return (HashtableEntry*)BasicHashtableEntry::next();
+    return (HashtableEntry*)BasicHashtableEntry<F>::next();
   }
   HashtableEntry** next_addr() {
-    return (HashtableEntry**)BasicHashtableEntry::next_addr();
+    return (HashtableEntry**)BasicHashtableEntry<F>::next_addr();
   }
 };
 
 
 
-class HashtableBucket : public CHeapObj {
+template <MEMFLAGS F> class HashtableBucket : public CHeapObj<F> {
   friend class VMStructs;
 private:
   // Instance variable
-  BasicHashtableEntry*       _entry;
+  BasicHashtableEntry<F>*       _entry;
 
 public:
   // Accessing
@@ -129,21 +129,21 @@
 
   // The following methods use order access methods to avoid race
   // conditions in multiprocessor systems.
-  BasicHashtableEntry* get_entry() const;
-  void set_entry(BasicHashtableEntry* l);
+  BasicHashtableEntry<F>* get_entry() const;
+  void set_entry(BasicHashtableEntry<F>* l);
 
   // The following method is not MT-safe and must be done under lock.
-  BasicHashtableEntry** entry_addr()  { return &_entry; }
+  BasicHashtableEntry<F>** entry_addr()  { return &_entry; }
 };
 
 
-class BasicHashtable : public CHeapObj {
+template <MEMFLAGS F> class BasicHashtable : public CHeapObj<F> {
   friend class VMStructs;
 
 public:
   BasicHashtable(int table_size, int entry_size);
   BasicHashtable(int table_size, int entry_size,
-                 HashtableBucket* buckets, int number_of_entries);
+                 HashtableBucket<F>* buckets, int number_of_entries);
 
   // Sharing support.
   void copy_buckets(char** top, char* end);
@@ -162,8 +162,8 @@
 private:
   // Instance variables
   int               _table_size;
-  HashtableBucket*  _buckets;
-  BasicHashtableEntry* _free_list;
+  HashtableBucket<F>*     _buckets;
+  BasicHashtableEntry<F>* _free_list;
   char*             _first_free_entry;
   char*             _end_block;
   int               _entry_size;
@@ -188,20 +188,20 @@
   int entry_size() const { return _entry_size; }
 
   // The following method is MT-safe and may be used with caution.
-  BasicHashtableEntry* bucket(int i);
+  BasicHashtableEntry<F>* bucket(int i);
 
   // The following method is not MT-safe and must be done under lock.
-  BasicHashtableEntry** bucket_addr(int i) { return _buckets[i].entry_addr(); }
+  BasicHashtableEntry<F>** bucket_addr(int i) { return _buckets[i].entry_addr(); }
 
   // Table entry management
-  BasicHashtableEntry* new_entry(unsigned int hashValue);
+  BasicHashtableEntry<F>* new_entry(unsigned int hashValue);
 
   // Check that the table is unbalanced
   bool check_rehash_table(int count);
 
   // Used when moving the entry to another table
   // Clean up links, but do not add to free_list
-  void unlink_entry(BasicHashtableEntry* entry) {
+  void unlink_entry(BasicHashtableEntry<F>* entry) {
     entry->set_next(NULL);
     --_number_of_entries;
   }
@@ -221,11 +221,11 @@
 
 public:
   int table_size() { return _table_size; }
-  void set_entry(int index, BasicHashtableEntry* entry);
+  void set_entry(int index, BasicHashtableEntry<F>* entry);
 
-  void add_entry(int index, BasicHashtableEntry* entry);
+  void add_entry(int index, BasicHashtableEntry<F>* entry);
 
-  void free_entry(BasicHashtableEntry* entry);
+  void free_entry(BasicHashtableEntry<F>* entry);
 
   int number_of_entries() { return _number_of_entries; }
 
@@ -233,16 +233,16 @@
 };
 
 
-template <class T> class Hashtable : public BasicHashtable {
+template <class T, MEMFLAGS F> class Hashtable : public BasicHashtable<F> {
   friend class VMStructs;
 
 public:
   Hashtable(int table_size, int entry_size)
-    : BasicHashtable(table_size, entry_size) { }
+    : BasicHashtable<F>(table_size, entry_size) { }
 
   Hashtable(int table_size, int entry_size,
-                   HashtableBucket* buckets, int number_of_entries)
-    : BasicHashtable(table_size, entry_size, buckets, number_of_entries) { }
+                   HashtableBucket<F>* buckets, int number_of_entries)
+    : BasicHashtable<F>(table_size, entry_size, buckets, number_of_entries) { }
 
   // Debugging
   void print()               PRODUCT_RETURN;
@@ -260,24 +260,24 @@
   }
 
   int index_for(Symbol* name) {
-    return hash_to_index(compute_hash(name));
+    return this->hash_to_index(compute_hash(name));
   }
 
   // Table entry management
-  HashtableEntry<T>* new_entry(unsigned int hashValue, T obj);
+  HashtableEntry<T, F>* new_entry(unsigned int hashValue, T obj);
 
   // The following method is MT-safe and may be used with caution.
-  HashtableEntry<T>* bucket(int i) {
-    return (HashtableEntry<T>*)BasicHashtable::bucket(i);
+  HashtableEntry<T, F>* bucket(int i) {
+    return (HashtableEntry<T, F>*)BasicHashtable<F>::bucket(i);
   }
 
   // The following method is not MT-safe and must be done under lock.
-  HashtableEntry<T>** bucket_addr(int i) {
-    return (HashtableEntry<T>**)BasicHashtable::bucket_addr(i);
+  HashtableEntry<T, F>** bucket_addr(int i) {
+    return (HashtableEntry<T, F>**)BasicHashtable<F>::bucket_addr(i);
   }
 
   // Function to move these elements into the new table.
-  void move_to(Hashtable<T>* new_table);
+  void move_to(Hashtable<T, F>* new_table);
   static bool use_alternate_hashcode()  { return _seed != 0; }
   static jint seed()                    { return _seed; }
 
@@ -291,15 +291,15 @@
 
 //  Verions of hashtable where two handles are used to compute the index.
 
-template <class T> class TwoOopHashtable : public Hashtable<T> {
+template <class T, MEMFLAGS F> class TwoOopHashtable : public Hashtable<T, F> {
   friend class VMStructs;
 protected:
   TwoOopHashtable(int table_size, int entry_size)
-    : Hashtable<T>(table_size, entry_size) {}
+    : Hashtable<T, F>(table_size, entry_size) {}
 
-  TwoOopHashtable(int table_size, int entry_size, HashtableBucket* t,
+  TwoOopHashtable(int table_size, int entry_size, HashtableBucket<F>* t,
                   int number_of_entries)
-    : Hashtable<T>(table_size, entry_size, t, number_of_entries) {}
+    : Hashtable<T, F>(table_size, entry_size, t, number_of_entries) {}
 
 public:
   unsigned int compute_hash(Symbol* name, Handle loader) {
--- a/src/share/vm/utilities/hashtable.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/hashtable.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,6 +27,7 @@
 
 #include "memory/allocation.inline.hpp"
 #include "utilities/hashtable.hpp"
+#include "utilities/dtrace.hpp"
 
 // Inline function definitions for hashtable.hpp.
 
@@ -34,18 +35,18 @@
 
 // Initialize a table.
 
-inline BasicHashtable::BasicHashtable(int table_size, int entry_size) {
+template <MEMFLAGS F> inline BasicHashtable<F>::BasicHashtable(int table_size, int entry_size) {
   // Called on startup, no locking needed
   initialize(table_size, entry_size, 0);
-  _buckets = NEW_C_HEAP_ARRAY(HashtableBucket, table_size);
+  _buckets = NEW_C_HEAP_ARRAY2(HashtableBucket<F>, table_size, F, CURRENT_PC);
   for (int index = 0; index < _table_size; index++) {
     _buckets[index].clear();
   }
 }
 
 
-inline BasicHashtable::BasicHashtable(int table_size, int entry_size,
-                                      HashtableBucket* buckets,
+template <MEMFLAGS F> inline BasicHashtable<F>::BasicHashtable(int table_size, int entry_size,
+                                      HashtableBucket<F>* buckets,
                                       int number_of_entries) {
   // Called on startup, no locking needed
   initialize(table_size, entry_size, number_of_entries);
@@ -53,7 +54,7 @@
 }
 
 
-inline void BasicHashtable::initialize(int table_size, int entry_size,
+template <MEMFLAGS F> inline void BasicHashtable<F>::initialize(int table_size, int entry_size,
                                        int number_of_entries) {
   // Called on startup, no locking needed
   _table_size = table_size;
@@ -70,12 +71,12 @@
 
 
 // The following method is MT-safe and may be used with caution.
-inline BasicHashtableEntry* BasicHashtable::bucket(int i) {
+template <MEMFLAGS F> inline BasicHashtableEntry<F>* BasicHashtable<F>::bucket(int i) {
   return _buckets[i].get_entry();
 }
 
 
-inline void HashtableBucket::set_entry(BasicHashtableEntry* l) {
+template <MEMFLAGS F> inline void HashtableBucket<F>::set_entry(BasicHashtableEntry<F>* l) {
   // Warning: Preserve store ordering.  The SystemDictionary is read
   //          without locks.  The new SystemDictionaryEntry must be
   //          complete before other threads can be allowed to see it
@@ -84,27 +85,27 @@
 }
 
 
-inline BasicHashtableEntry* HashtableBucket::get_entry() const {
+template <MEMFLAGS F> inline BasicHashtableEntry<F>* HashtableBucket<F>::get_entry() const {
   // Warning: Preserve load ordering.  The SystemDictionary is read
   //          without locks.  The new SystemDictionaryEntry must be
   //          complete before other threads can be allowed to see it
   //          via a store to _buckets[index].
-  return (BasicHashtableEntry*) OrderAccess::load_ptr_acquire(&_entry);
+  return (BasicHashtableEntry<F>*) OrderAccess::load_ptr_acquire(&_entry);
 }
 
 
-inline void BasicHashtable::set_entry(int index, BasicHashtableEntry* entry) {
+template <MEMFLAGS F> inline void BasicHashtable<F>::set_entry(int index, BasicHashtableEntry<F>* entry) {
   _buckets[index].set_entry(entry);
 }
 
 
-inline void BasicHashtable::add_entry(int index, BasicHashtableEntry* entry) {
+template <MEMFLAGS F> inline void BasicHashtable<F>::add_entry(int index, BasicHashtableEntry<F>* entry) {
   entry->set_next(bucket(index));
   _buckets[index].set_entry(entry);
   ++_number_of_entries;
 }
 
-inline void BasicHashtable::free_entry(BasicHashtableEntry* entry) {
+template <MEMFLAGS F> inline void BasicHashtable<F>::free_entry(BasicHashtableEntry<F>* entry) {
   entry->set_next(_free_list);
   _free_list = entry;
   --_number_of_entries;
--- a/src/share/vm/utilities/histogram.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/histogram.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -69,7 +69,7 @@
 
 Histogram::Histogram(const char* title,int estimatedCount) {
   _title = title;
-  _elements = new (ResourceObj::C_HEAP) GrowableArray<HistogramElement*>(estimatedCount,true);
+  _elements = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<HistogramElement*>(estimatedCount,true);
 }
 
 void Histogram::add_element(HistogramElement* element) {
--- a/src/share/vm/utilities/histogram.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/histogram.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -77,7 +77,7 @@
 
 #ifdef ASSERT
 
-class HistogramElement : public CHeapObj {
+class HistogramElement : public CHeapObj<mtInternal> {
  protected:
   jint _count;
   const char* _name;
@@ -91,7 +91,7 @@
   virtual int compare(HistogramElement* e1,HistogramElement* e2);
 };
 
-class Histogram : public CHeapObj {
+class Histogram : public CHeapObj<mtInternal> {
  protected:
   GrowableArray<HistogramElement*>* _elements;
   GrowableArray<HistogramElement*>* elements() { return _elements; }
--- a/src/share/vm/utilities/intHisto.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/intHisto.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,7 +27,7 @@
 
 IntHistogram::IntHistogram(int est, int max) : _max(max), _tot(0) {
   assert(0 <= est && est <= max, "Preconditions");
-  _elements = new (ResourceObj::C_HEAP) GrowableArray<int>(est, true);
+  _elements = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(est, true);
   guarantee(_elements != NULL, "alloc failure");
 }
 
--- a/src/share/vm/utilities/intHisto.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/intHisto.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -47,7 +47,7 @@
 // relation) to a count.
 
 
-class IntHistogram : public CHeapObj {
+class IntHistogram : public CHeapObj<mtInternal> {
  protected:
   int _max;
   int _tot;
--- a/src/share/vm/utilities/numberSeq.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/numberSeq.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -115,24 +115,6 @@
   return true;
 }
 
-NumberSeq::NumberSeq(NumberSeq *total, int n, NumberSeq **parts) {
-  guarantee(check_nums(total, n, parts), "all seq lengths should match");
-  double sum = total->sum();
-  for (int i = 0; i < n; ++i) {
-    if (parts[i] != NULL)
-      sum -= parts[i]->sum();
-  }
-
-  _num = total->num();
-  _sum = sum;
-
-  // we do not calculate these...
-  _sum_of_squares = -1.0;
-  _maximum = -1.0;
-  _davg = -1.0;
-  _dvariance = -1.0;
-}
-
 void NumberSeq::add(double val) {
   AbsSeq::add(val);
 
@@ -151,13 +133,13 @@
 
 TruncatedSeq::TruncatedSeq(int length, double alpha):
   AbsSeq(alpha), _length(length), _next(0) {
-  _sequence = NEW_C_HEAP_ARRAY(double, _length);
+  _sequence = NEW_C_HEAP_ARRAY(double, _length, mtInternal);
   for (int i = 0; i < _length; ++i)
     _sequence[i] = 0.0;
 }
 
 TruncatedSeq::~TruncatedSeq() {
-  FREE_C_HEAP_ARRAY(double, _sequence);
+  FREE_C_HEAP_ARRAY(double, _sequence, mtGC);
 }
 
 void TruncatedSeq::add(double val) {
--- a/src/share/vm/utilities/numberSeq.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/numberSeq.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_UTILITIES_NUMBERSEQ_HPP
 #define SHARE_VM_UTILITIES_NUMBERSEQ_HPP
 
+#include "memory/allocation.hpp"
+
 /**
  **  This file contains a few classes that represent number sequence,
  **  x1, x2, x3, ..., xN, and can calculate their avg, max, and sd.
@@ -40,7 +42,7 @@
 
 #define DEFAULT_ALPHA_VALUE 0.7
 
-class AbsSeq {
+class AbsSeq: public CHeapObj<mtInternal> {
 private:
   void init(double alpha);
 
@@ -93,7 +95,6 @@
 
 public:
   NumberSeq(double alpha = DEFAULT_ALPHA_VALUE);
-  NumberSeq(NumberSeq* total, int n_parts, NumberSeq** parts);
 
   virtual void add(double val);
   virtual double maximum() const { return _maximum; }
--- a/src/share/vm/utilities/ostream.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/ostream.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -237,8 +237,9 @@
   return;
 }
 
-void outputStream::indent() {
+outputStream& outputStream::indent() {
   while (_position < _indentation) sp();
+  return *this;
 }
 
 void outputStream::print_jlong(jlong value) {
@@ -251,6 +252,47 @@
   print(os::julong_format_specifier(), value);
 }
 
+/**
+ * This prints out hex data in a 'windbg' or 'xxd' form, where each line is:
+ *   <hex-address>: 8 * <hex-halfword> <ascii translation (optional)>
+ * example:
+ * 0000000: 7f44 4f46 0102 0102 0000 0000 0000 0000  .DOF............
+ * 0000010: 0000 0000 0000 0040 0000 0020 0000 0005  .......@... ....
+ * 0000020: 0000 0000 0000 0040 0000 0000 0000 015d  .......@.......]
+ * ...
+ *
+ * indent is applied to each line.  Ends with a CR.
+ */
+void outputStream::print_data(void* data, size_t len, bool with_ascii) {
+  size_t limit = (len + 16) / 16 * 16;
+  for (size_t i = 0; i < limit; ++i) {
+    if (i % 16 == 0) {
+      indent().print("%07x:", i);
+    }
+    if (i % 2 == 0) {
+      print(" ");
+    }
+    if (i < len) {
+      print("%02x", ((unsigned char*)data)[i]);
+    } else {
+      print("  ");
+    }
+    if ((i + 1) % 16 == 0) {
+      if (with_ascii) {
+        print("  ");
+        for (size_t j = 0; j < 16; ++j) {
+          size_t idx = i + j - 15;
+          if (idx < len) {
+            char c = ((char*)data)[idx];
+            print("%c", c >= 32 && c <= 126 ? c : '.');
+          }
+        }
+      }
+      print_cr("");
+    }
+  }
+}
+
 stringStream::stringStream(size_t initial_size) : outputStream() {
   buffer_length = initial_size;
   buffer        = NEW_RESOURCE_ARRAY(char, buffer_length);
@@ -384,7 +426,7 @@
   if (_file != NULL) {
     if (_need_close) fclose(_file);
     _file      = NULL;
-    FREE_C_HEAP_ARRAY(char, _file_name);
+    FREE_C_HEAP_ARRAY(char, _file_name, mtInternal);
     _file_name = NULL;
   }
 }
@@ -392,7 +434,7 @@
 rotatingFileStream::rotatingFileStream(const char* file_name) {
   _cur_file_num = 0;
   _bytes_writen = 0L;
-  _file_name = NEW_C_HEAP_ARRAY(char, strlen(file_name)+10);
+  _file_name = NEW_C_HEAP_ARRAY(char, strlen(file_name)+10, mtInternal);
   jio_snprintf(_file_name, strlen(file_name)+10, "%s.%d", file_name, _cur_file_num);
   _file = fopen(_file_name, "w");
   _need_close = true;
@@ -401,7 +443,7 @@
 rotatingFileStream::rotatingFileStream(const char* file_name, const char* opentype) {
   _cur_file_num = 0;
   _bytes_writen = 0L;
-  _file_name = NEW_C_HEAP_ARRAY(char, strlen(file_name)+10);
+  _file_name = NEW_C_HEAP_ARRAY(char, strlen(file_name)+10, mtInternal);
   jio_snprintf(_file_name, strlen(file_name)+10, "%s.%d", file_name, _cur_file_num);
   _file = fopen(_file_name, opentype);
   _need_close = true;
@@ -524,7 +566,7 @@
   }
 
   // Create big enough buffer.
-  char *buf = NEW_C_HEAP_ARRAY(char, buffer_length);
+  char *buf = NEW_C_HEAP_ARRAY(char, buffer_length, mtInternal);
 
   strcpy(buf, "");
   if (force_directory != NULL) {
@@ -549,7 +591,7 @@
   // %%% Need a MutexLocker?
   const char* log_name = LogFile != NULL ? LogFile : "hotspot.log";
   const char* try_name = make_log_name(log_name, NULL);
-  fileStream* file = new(ResourceObj::C_HEAP) fileStream(try_name);
+  fileStream* file = new(ResourceObj::C_HEAP, mtInternal) fileStream(try_name);
   if (!file->is_open()) {
     // Try again to open the file.
     char warnbuf[O_BUFLEN*2];
@@ -557,18 +599,18 @@
                  "Warning:  Cannot open log file: %s\n", try_name);
     // Note:  This feature is for maintainer use only.  No need for L10N.
     jio_print(warnbuf);
-    FREE_C_HEAP_ARRAY(char, try_name);
+    FREE_C_HEAP_ARRAY(char, try_name, mtInternal);
     try_name = make_log_name("hs_pid%p.log", os::get_temp_directory());
     jio_snprintf(warnbuf, sizeof(warnbuf),
                  "Warning:  Forcing option -XX:LogFile=%s\n", try_name);
     jio_print(warnbuf);
     delete file;
-    file = new(ResourceObj::C_HEAP) fileStream(try_name);
-    FREE_C_HEAP_ARRAY(char, try_name);
+    file = new(ResourceObj::C_HEAP, mtInternal) fileStream(try_name);
+    FREE_C_HEAP_ARRAY(char, try_name, mtInternal);
   }
   if (file->is_open()) {
     _log_file = file;
-    xmlStream* xs = new(ResourceObj::C_HEAP) xmlStream(file);
+    xmlStream* xs = new(ResourceObj::C_HEAP, mtInternal) xmlStream(file);
     _outer_xmlStream = xs;
     if (this == tty)  xtty = xs;
     // Write XML header.
@@ -815,7 +857,7 @@
 
 void ostream_init() {
   if (defaultStream::instance == NULL) {
-    defaultStream::instance = new(ResourceObj::C_HEAP) defaultStream();
+    defaultStream::instance = new(ResourceObj::C_HEAP, mtInternal) defaultStream();
     tty = defaultStream::instance;
 
     // We want to ensure that time stamps in GC logs consider time 0
@@ -833,9 +875,9 @@
   gclog_or_tty = tty; // default to tty
   if (Arguments::gc_log_filename() != NULL) {
     fileStream * gclog  = UseGCLogFileRotation ?
-                          new(ResourceObj::C_HEAP)
+                          new(ResourceObj::C_HEAP, mtInternal)
                              rotatingFileStream(Arguments::gc_log_filename()) :
-                          new(ResourceObj::C_HEAP)
+                          new(ResourceObj::C_HEAP, mtInternal)
                              fileStream(Arguments::gc_log_filename());
     if (gclog->is_open()) {
       // now we update the time stamp of the GC log to be synced up
@@ -940,7 +982,7 @@
 
 bufferedStream::bufferedStream(size_t initial_size, size_t bufmax) : outputStream() {
   buffer_length = initial_size;
-  buffer        = NEW_C_HEAP_ARRAY(char, buffer_length);
+  buffer        = NEW_C_HEAP_ARRAY(char, buffer_length, mtInternal);
   buffer_pos    = 0;
   buffer_fixed  = false;
   buffer_max    = bufmax;
@@ -971,7 +1013,7 @@
       if (end < buffer_length * 2) {
         end = buffer_length * 2;
       }
-      buffer = REALLOC_C_HEAP_ARRAY(char, buffer, end);
+      buffer = REALLOC_C_HEAP_ARRAY(char, buffer, end, mtInternal);
       buffer_length = end;
     }
   }
@@ -989,7 +1031,7 @@
 
 bufferedStream::~bufferedStream() {
   if (!buffer_fixed) {
-    FREE_C_HEAP_ARRAY(char, buffer);
+    FREE_C_HEAP_ARRAY(char, buffer, mtInternal);
   }
 }
 
--- a/src/share/vm/utilities/ostream.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/ostream.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -59,9 +59,11 @@
    outputStream(int width, bool has_time_stamps);
 
    // indentation
-   void indent();
+   outputStream& indent();
    void inc() { _indentation++; };
    void dec() { _indentation--; };
+   void inc(int n) { _indentation += n; };
+   void dec(int n) { _indentation -= n; };
    int  indentation() const    { return _indentation; }
    void set_indentation(int i) { _indentation = i;    }
    void fill_to(int col);
@@ -84,6 +86,7 @@
    void print_raw(const char* str, int len)   { write(str,         len); }
    void print_raw_cr(const char* str)         { write(str, strlen(str)); cr(); }
    void print_raw_cr(const char* str, int len){ write(str,         len); cr(); }
+   void print_data(void* data, size_t len, bool with_ascii);
    void put(char ch);
    void sp(int count = 1);
    void cr();
@@ -122,6 +125,19 @@
 extern outputStream* tty;           // tty output
 extern outputStream* gclog_or_tty;  // stream for gc log if -Xloggc:<f>, or tty
 
+class streamIndentor : public StackObj {
+ private:
+  outputStream* _str;
+  int _amount;
+
+ public:
+  streamIndentor(outputStream* str, int amt = 2) : _str(str), _amount(amt) {
+    _str->inc(_amount);
+  }
+  ~streamIndentor() { _str->dec(_amount); }
+};
+
+
 // advisory locking for the shared tty stream:
 class ttyLocker: StackObj {
   friend class ttyUnlocker;
--- a/src/share/vm/utilities/stack.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/stack.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_UTILITIES_STACK_HPP
 #define SHARE_VM_UTILITIES_STACK_HPP
 
+#include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 
 // Class Stack (below) grows and shrinks by linking together "segments" which
@@ -51,11 +52,11 @@
 // implementation in class Stack assumes that alloc() will terminate the process
 // if the allocation fails.
 
-template <class E> class StackIterator;
+template <class E, MEMFLAGS F> class StackIterator;
 
 // StackBase holds common data/methods that don't depend on the element type,
 // factored out to reduce template code duplication.
-class StackBase
+template <MEMFLAGS F> class StackBase
 {
 public:
   size_t segment_size()   const { return _seg_size; } // Elements per segment.
@@ -89,11 +90,11 @@
 #define inline
 #endif // __GNUC__
 
-template <class E>
-class Stack:  public StackBase
+template <class E, MEMFLAGS F>
+class Stack:  public StackBase<F>
 {
 public:
-  friend class StackIterator<E>;
+  friend class StackIterator<E, F>;
 
   // segment_size:    number of items per segment
   // max_cache_size:  maxmium number of *segments* to cache
@@ -103,15 +104,15 @@
                size_t max_cache_size = 4, size_t max_size = 0);
   inline ~Stack() { clear(true); }
 
-  inline bool is_empty() const { return _cur_seg == NULL; }
-  inline bool is_full()  const { return _full_seg_size >= max_size(); }
+  inline bool is_empty() const { return this->_cur_seg == NULL; }
+  inline bool is_full()  const { return this->_full_seg_size >= this->max_size(); }
 
   // Performance sensitive code should use is_empty() instead of size() == 0 and
   // is_full() instead of size() == max_size().  Using a conditional here allows
   // just one var to be updated when pushing/popping elements instead of two;
   // _full_seg_size is updated only when pushing/popping segments.
   inline size_t size() const {
-    return is_empty() ? 0 : _full_seg_size + _cur_seg_size;
+    return is_empty() ? 0 : this->_full_seg_size + this->_cur_seg_size;
   }
 
   inline void push(E elem);
@@ -161,18 +162,18 @@
   E* _cache;      // Segment cache to avoid ping-ponging.
 };
 
-template <class E> class ResourceStack:  public Stack<E>, public ResourceObj
+template <class E, MEMFLAGS F> class ResourceStack:  public Stack<E, F>, public ResourceObj
 {
 public:
   // If this class becomes widely used, it may make sense to save the Thread
   // and use it when allocating segments.
-  ResourceStack(size_t segment_size = Stack<E>::default_segment_size()):
-    Stack<E>(segment_size, max_uintx)
+//  ResourceStack(size_t segment_size = Stack<E, F>::default_segment_size()):
+  ResourceStack(size_t segment_size): Stack<E, F>(segment_size, max_uintx)
     { }
 
   // Set the segment pointers to NULL so the parent dtor does not free them;
   // that must be done by the ResourceMark code.
-  ~ResourceStack() { Stack<E>::reset(true); }
+  ~ResourceStack() { Stack<E, F>::reset(true); }
 
 protected:
   virtual E*   alloc(size_t bytes);
@@ -182,13 +183,13 @@
   void clear(bool clear_cache = false);
 };
 
-template <class E>
+template <class E, MEMFLAGS F>
 class StackIterator: public StackObj
 {
 public:
-  StackIterator(Stack<E>& stack): _stack(stack) { sync(); }
+  StackIterator(Stack<E, F>& stack): _stack(stack) { sync(); }
 
-  Stack<E>& stack() const { return _stack; }
+  Stack<E, F>& stack() const { return _stack; }
 
   bool is_empty() const { return _cur_seg == NULL; }
 
@@ -198,7 +199,7 @@
   void sync(); // Sync the iterator's state to the stack's current state.
 
 private:
-  Stack<E>& _stack;
+  Stack<E, F>& _stack;
   size_t    _cur_seg_size;
   E*        _cur_seg;
   size_t    _full_seg_size;
--- a/src/share/vm/utilities/stack.inline.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/stack.inline.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -27,7 +27,7 @@
 
 #include "utilities/stack.hpp"
 
-StackBase::StackBase(size_t segment_size, size_t max_cache_size,
+template <MEMFLAGS F> StackBase<F>::StackBase(size_t segment_size, size_t max_cache_size,
                      size_t max_size):
   _seg_size(segment_size),
   _max_cache_size(max_cache_size),
@@ -36,7 +36,7 @@
   assert(_max_size % _seg_size == 0, "not a multiple");
 }
 
-size_t StackBase::adjust_max_size(size_t max_size, size_t seg_size)
+template <MEMFLAGS F> size_t StackBase<F>::adjust_max_size(size_t max_size, size_t seg_size)
 {
   assert(seg_size > 0, "cannot be 0");
   assert(max_size >= seg_size || max_size == 0, "max_size too small");
@@ -47,54 +47,54 @@
   return (max_size + seg_size - 1) / seg_size * seg_size;
 }
 
-template <class E>
-Stack<E>::Stack(size_t segment_size, size_t max_cache_size, size_t max_size):
-  StackBase(adjust_segment_size(segment_size), max_cache_size, max_size)
+template <class E, MEMFLAGS F>
+Stack<E, F>::Stack(size_t segment_size, size_t max_cache_size, size_t max_size):
+  StackBase<F>(adjust_segment_size(segment_size), max_cache_size, max_size)
 {
   reset(true);
 }
 
-template <class E>
-void Stack<E>::push(E item)
+template <class E, MEMFLAGS F>
+void Stack<E, F>::push(E item)
 {
   assert(!is_full(), "pushing onto a full stack");
-  if (_cur_seg_size == _seg_size) {
+  if (this->_cur_seg_size == this->_seg_size) {
     push_segment();
   }
-  _cur_seg[_cur_seg_size] = item;
-  ++_cur_seg_size;
+  this->_cur_seg[this->_cur_seg_size] = item;
+  ++this->_cur_seg_size;
 }
 
-template <class E>
-E Stack<E>::pop()
+template <class E, MEMFLAGS F>
+E Stack<E, F>::pop()
 {
   assert(!is_empty(), "popping from an empty stack");
-  if (_cur_seg_size == 1) {
-    E tmp = _cur_seg[--_cur_seg_size];
+  if (this->_cur_seg_size == 1) {
+    E tmp = _cur_seg[--this->_cur_seg_size];
     pop_segment();
     return tmp;
   }
-  return _cur_seg[--_cur_seg_size];
+  return this->_cur_seg[--this->_cur_seg_size];
 }
 
-template <class E>
-void Stack<E>::clear(bool clear_cache)
+template <class E, MEMFLAGS F>
+void Stack<E, F>::clear(bool clear_cache)
 {
   free_segments(_cur_seg);
   if (clear_cache) free_segments(_cache);
   reset(clear_cache);
 }
 
-template <class E>
-size_t Stack<E>::default_segment_size()
+template <class E, MEMFLAGS F>
+size_t Stack<E, F>::default_segment_size()
 {
   // Number of elements that fit in 4K bytes minus the size of two pointers
   // (link field and malloc header).
   return (4096 - 2 * sizeof(E*)) / sizeof(E);
 }
 
-template <class E>
-size_t Stack<E>::adjust_segment_size(size_t seg_size)
+template <class E, MEMFLAGS F>
+size_t Stack<E, F>::adjust_segment_size(size_t seg_size)
 {
   const size_t elem_sz = sizeof(E);
   const size_t ptr_sz = sizeof(E*);
@@ -105,93 +105,93 @@
   return seg_size;
 }
 
-template <class E>
-size_t Stack<E>::link_offset() const
+template <class E, MEMFLAGS F>
+size_t Stack<E, F>::link_offset() const
 {
-  return align_size_up(_seg_size * sizeof(E), sizeof(E*));
+  return align_size_up(this->_seg_size * sizeof(E), sizeof(E*));
 }
 
-template <class E>
-size_t Stack<E>::segment_bytes() const
+template <class E, MEMFLAGS F>
+size_t Stack<E, F>::segment_bytes() const
 {
   return link_offset() + sizeof(E*);
 }
 
-template <class E>
-E** Stack<E>::link_addr(E* seg) const
+template <class E, MEMFLAGS F>
+E** Stack<E, F>::link_addr(E* seg) const
 {
   return (E**) ((char*)seg + link_offset());
 }
 
-template <class E>
-E* Stack<E>::get_link(E* seg) const
+template <class E, MEMFLAGS F>
+E* Stack<E, F>::get_link(E* seg) const
 {
   return *link_addr(seg);
 }
 
-template <class E>
-E* Stack<E>::set_link(E* new_seg, E* old_seg)
+template <class E, MEMFLAGS F>
+E* Stack<E, F>::set_link(E* new_seg, E* old_seg)
 {
   *link_addr(new_seg) = old_seg;
   return new_seg;
 }
 
-template <class E>
-E* Stack<E>::alloc(size_t bytes)
+template <class E, MEMFLAGS F>
+E* Stack<E, F>::alloc(size_t bytes)
 {
-  return (E*) NEW_C_HEAP_ARRAY(char, bytes);
+  return (E*) NEW_C_HEAP_ARRAY(char, bytes, F);
 }
 
-template <class E>
-void Stack<E>::free(E* addr, size_t bytes)
+template <class E, MEMFLAGS F>
+void Stack<E, F>::free(E* addr, size_t bytes)
 {
-  FREE_C_HEAP_ARRAY(char, (char*) addr);
+  FREE_C_HEAP_ARRAY(char, (char*) addr, F);
 }
 
-template <class E>
-void Stack<E>::push_segment()
+template <class E, MEMFLAGS F>
+void Stack<E, F>::push_segment()
 {
-  assert(_cur_seg_size == _seg_size, "current segment is not full");
+  assert(this->_cur_seg_size == this->_seg_size, "current segment is not full");
   E* next;
-  if (_cache_size > 0) {
+  if (this->_cache_size > 0) {
     // Use a cached segment.
     next = _cache;
     _cache = get_link(_cache);
-    --_cache_size;
+    --this->_cache_size;
   } else {
     next = alloc(segment_bytes());
     DEBUG_ONLY(zap_segment(next, true);)
   }
   const bool at_empty_transition = is_empty();
-  _cur_seg = set_link(next, _cur_seg);
-  _cur_seg_size = 0;
-  _full_seg_size += at_empty_transition ? 0 : _seg_size;
+  this->_cur_seg = set_link(next, _cur_seg);
+  this->_cur_seg_size = 0;
+  this->_full_seg_size += at_empty_transition ? 0 : this->_seg_size;
   DEBUG_ONLY(verify(at_empty_transition);)
 }
 
-template <class E>
-void Stack<E>::pop_segment()
+template <class E, MEMFLAGS F>
+void Stack<E, F>::pop_segment()
 {
-  assert(_cur_seg_size == 0, "current segment is not empty");
+  assert(this->_cur_seg_size == 0, "current segment is not empty");
   E* const prev = get_link(_cur_seg);
-  if (_cache_size < _max_cache_size) {
+  if (this->_cache_size < this->_max_cache_size) {
     // Add the current segment to the cache.
     DEBUG_ONLY(zap_segment(_cur_seg, false);)
     _cache = set_link(_cur_seg, _cache);
-    ++_cache_size;
+    ++this->_cache_size;
   } else {
     DEBUG_ONLY(zap_segment(_cur_seg, true);)
     free(_cur_seg, segment_bytes());
   }
   const bool at_empty_transition = prev == NULL;
-  _cur_seg = prev;
-  _cur_seg_size = _seg_size;
-  _full_seg_size -= at_empty_transition ? 0 : _seg_size;
+  this->_cur_seg = prev;
+  this->_cur_seg_size = this->_seg_size;
+  this->_full_seg_size -= at_empty_transition ? 0 : this->_seg_size;
   DEBUG_ONLY(verify(at_empty_transition);)
 }
 
-template <class E>
-void Stack<E>::free_segments(E* seg)
+template <class E, MEMFLAGS F>
+void Stack<E, F>::free_segments(E* seg)
 {
   const size_t bytes = segment_bytes();
   while (seg != NULL) {
@@ -201,37 +201,37 @@
   }
 }
 
-template <class E>
-void Stack<E>::reset(bool reset_cache)
+template <class E, MEMFLAGS F>
+void Stack<E, F>::reset(bool reset_cache)
 {
-  _cur_seg_size = _seg_size; // So push() will alloc a new segment.
-  _full_seg_size = 0;
+  this->_cur_seg_size = this->_seg_size; // So push() will alloc a new segment.
+  this->_full_seg_size = 0;
   _cur_seg = NULL;
   if (reset_cache) {
-    _cache_size = 0;
+    this->_cache_size = 0;
     _cache = NULL;
   }
 }
 
 #ifdef ASSERT
-template <class E>
-void Stack<E>::verify(bool at_empty_transition) const
+template <class E, MEMFLAGS F>
+void Stack<E, F>::verify(bool at_empty_transition) const
 {
-  assert(size() <= max_size(), "stack exceeded bounds");
-  assert(cache_size() <= max_cache_size(), "cache exceeded bounds");
-  assert(_cur_seg_size <= segment_size(), "segment index exceeded bounds");
+  assert(size() <= this->max_size(), "stack exceeded bounds");
+  assert(this->cache_size() <= this->max_cache_size(), "cache exceeded bounds");
+  assert(this->_cur_seg_size <= this->segment_size(), "segment index exceeded bounds");
 
-  assert(_full_seg_size % _seg_size == 0, "not a multiple");
+  assert(this->_full_seg_size % this->_seg_size == 0, "not a multiple");
   assert(at_empty_transition || is_empty() == (size() == 0), "mismatch");
-  assert((_cache == NULL) == (cache_size() == 0), "mismatch");
+  assert((_cache == NULL) == (this->cache_size() == 0), "mismatch");
 
   if (is_empty()) {
-    assert(_cur_seg_size == segment_size(), "sanity");
+    assert(this->_cur_seg_size == this->segment_size(), "sanity");
   }
 }
 
-template <class E>
-void Stack<E>::zap_segment(E* seg, bool zap_link_field) const
+template <class E, MEMFLAGS F>
+void Stack<E, F>::zap_segment(E* seg, bool zap_link_field) const
 {
   if (!ZapStackSegments) return;
   const size_t zap_bytes = segment_bytes() - (zap_link_field ? 0 : sizeof(E*));
@@ -243,28 +243,28 @@
 }
 #endif
 
-template <class E>
-E* ResourceStack<E>::alloc(size_t bytes)
+template <class E, MEMFLAGS F>
+E* ResourceStack<E, F>::alloc(size_t bytes)
 {
   return (E*) resource_allocate_bytes(bytes);
 }
 
-template <class E>
-void ResourceStack<E>::free(E* addr, size_t bytes)
+template <class E, MEMFLAGS F>
+void ResourceStack<E, F>::free(E* addr, size_t bytes)
 {
   resource_free_bytes((char*) addr, bytes);
 }
 
-template <class E>
-void StackIterator<E>::sync()
+template <class E, MEMFLAGS F>
+void StackIterator<E, F>::sync()
 {
   _full_seg_size = _stack._full_seg_size;
   _cur_seg_size = _stack._cur_seg_size;
   _cur_seg = _stack._cur_seg;
 }
 
-template <class E>
-E* StackIterator<E>::next_addr()
+template <class E, MEMFLAGS F>
+E* StackIterator<E, F>::next_addr()
 {
   assert(!is_empty(), "no items left");
   if (_cur_seg_size == 1) {
--- a/src/share/vm/utilities/taskqueue.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/taskqueue.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -132,8 +132,8 @@
 }
 #endif // TASKQUEUE_STATS
 
-template <unsigned int N>
-class TaskQueueSuper: public CHeapObj {
+template <unsigned int N, MEMFLAGS F>
+class TaskQueueSuper: public CHeapObj<F> {
 protected:
   // Internal type for indexing the queue; also used for the tag.
   typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
@@ -249,22 +249,27 @@
   TASKQUEUE_STATS_ONLY(TaskQueueStats stats;)
 };
 
-template<class E, unsigned int N = TASKQUEUE_SIZE>
-class GenericTaskQueue: public TaskQueueSuper<N> {
-protected:
-  typedef typename TaskQueueSuper<N>::Age Age;
-  typedef typename TaskQueueSuper<N>::idx_t idx_t;
+
 
-  using TaskQueueSuper<N>::_bottom;
-  using TaskQueueSuper<N>::_age;
-  using TaskQueueSuper<N>::increment_index;
-  using TaskQueueSuper<N>::decrement_index;
-  using TaskQueueSuper<N>::dirty_size;
+template <class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE>
+class GenericTaskQueue: public TaskQueueSuper<N, F> {
+protected:
+  typedef typename TaskQueueSuper<N, F>::Age Age;
+  typedef typename TaskQueueSuper<N, F>::idx_t idx_t;
+
+  using TaskQueueSuper<N, F>::_bottom;
+  using TaskQueueSuper<N, F>::_age;
+  using TaskQueueSuper<N, F>::increment_index;
+  using TaskQueueSuper<N, F>::decrement_index;
+  using TaskQueueSuper<N, F>::dirty_size;
 
 public:
-  using TaskQueueSuper<N>::max_elems;
-  using TaskQueueSuper<N>::size;
-  TASKQUEUE_STATS_ONLY(using TaskQueueSuper<N>::stats;)
+  using TaskQueueSuper<N, F>::max_elems;
+  using TaskQueueSuper<N, F>::size;
+
+#if  TASKQUEUE_STATS
+  using TaskQueueSuper<N, F>::stats;
+#endif
 
 private:
   // Slow paths for push, pop_local.  (pop_global has no fast path.)
@@ -302,18 +307,18 @@
   volatile E* _elems;
 };
 
-template<class E, unsigned int N>
-GenericTaskQueue<E, N>::GenericTaskQueue() {
+template<class E, MEMFLAGS F, unsigned int N>
+GenericTaskQueue<E, F, N>::GenericTaskQueue() {
   assert(sizeof(Age) == sizeof(size_t), "Depends on this.");
 }
 
-template<class E, unsigned int N>
-void GenericTaskQueue<E, N>::initialize() {
-  _elems = NEW_C_HEAP_ARRAY(E, N);
+template<class E, MEMFLAGS F, unsigned int N>
+void GenericTaskQueue<E, F, N>::initialize() {
+  _elems = NEW_C_HEAP_ARRAY(E, N, F);
 }
 
-template<class E, unsigned int N>
-void GenericTaskQueue<E, N>::oops_do(OopClosure* f) {
+template<class E, MEMFLAGS F, unsigned int N>
+void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) {
   // tty->print_cr("START OopTaskQueue::oops_do");
   uint iters = size();
   uint index = _bottom;
@@ -329,8 +334,8 @@
   // tty->print_cr("END OopTaskQueue::oops_do");
 }
 
-template<class E, unsigned int N>
-bool GenericTaskQueue<E, N>::push_slow(E t, uint dirty_n_elems) {
+template<class E, MEMFLAGS F, unsigned int N>
+bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) {
   if (dirty_n_elems == N - 1) {
     // Actually means 0, so do the push.
     uint localBot = _bottom;
@@ -349,8 +354,8 @@
 // whenever the queue goes empty which it will do here if this thread
 // gets the last task or in pop_global() if the queue wraps (top == 0
 // and pop_global() succeeds, see pop_global()).
-template<class E, unsigned int N>
-bool GenericTaskQueue<E, N>::pop_local_slow(uint localBot, Age oldAge) {
+template<class E, MEMFLAGS F, unsigned int N>
+bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) {
   // This queue was observed to contain exactly one element; either this
   // thread will claim it, or a competing "pop_global".  In either case,
   // the queue will be logically empty afterwards.  Create a new Age value
@@ -382,8 +387,8 @@
   return false;
 }
 
-template<class E, unsigned int N>
-bool GenericTaskQueue<E, N>::pop_global(E& t) {
+template<class E, MEMFLAGS F, unsigned int N>
+bool GenericTaskQueue<E, F, N>::pop_global(E& t) {
   Age oldAge = _age.get();
   uint localBot = _bottom;
   uint n_elems = size(localBot, oldAge.top());
@@ -402,9 +407,9 @@
   return resAge == oldAge;
 }
 
-template<class E, unsigned int N>
-GenericTaskQueue<E, N>::~GenericTaskQueue() {
-  FREE_C_HEAP_ARRAY(E, _elems);
+template<class E, MEMFLAGS F, unsigned int N>
+GenericTaskQueue<E, F, N>::~GenericTaskQueue() {
+  FREE_C_HEAP_ARRAY(E, _elems, F);
 }
 
 // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for
@@ -418,12 +423,12 @@
 // Note that size() is not hidden--it returns the number of elements in the
 // TaskQueue, and does not include the size of the overflow stack.  This
 // simplifies replacement of GenericTaskQueues with OverflowTaskQueues.
-template<class E, unsigned int N = TASKQUEUE_SIZE>
-class OverflowTaskQueue: public GenericTaskQueue<E, N>
+template<class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE>
+class OverflowTaskQueue: public GenericTaskQueue<E, F, N>
 {
 public:
-  typedef Stack<E>               overflow_t;
-  typedef GenericTaskQueue<E, N> taskqueue_t;
+  typedef Stack<E, F>               overflow_t;
+  typedef GenericTaskQueue<E, F, N> taskqueue_t;
 
   TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;)
 
@@ -445,8 +450,8 @@
   overflow_t _overflow_stack;
 };
 
-template <class E, unsigned int N>
-bool OverflowTaskQueue<E, N>::push(E t)
+template <class E, MEMFLAGS F, unsigned int N>
+bool OverflowTaskQueue<E, F, N>::push(E t)
 {
   if (!taskqueue_t::push(t)) {
     overflow_stack()->push(t);
@@ -455,15 +460,15 @@
   return true;
 }
 
-template <class E, unsigned int N>
-bool OverflowTaskQueue<E, N>::pop_overflow(E& t)
+template <class E, MEMFLAGS F, unsigned int N>
+bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t)
 {
   if (overflow_empty()) return false;
   t = overflow_stack()->pop();
   return true;
 }
 
-class TaskQueueSetSuper: public CHeapObj {
+class TaskQueueSetSuper {
 protected:
   static int randomParkAndMiller(int* seed0);
 public:
@@ -471,8 +476,11 @@
   virtual bool peek() = 0;
 };
 
-template<class T>
-class GenericTaskQueueSet: public TaskQueueSetSuper {
+template <MEMFLAGS F> class TaskQueueSetSuperImpl: public CHeapObj<F>, public TaskQueueSetSuper {
+};
+
+template<class T, MEMFLAGS F>
+class GenericTaskQueueSet: public TaskQueueSetSuperImpl<F> {
 private:
   uint _n;
   T** _queues;
@@ -482,7 +490,7 @@
 
   GenericTaskQueueSet(int n) : _n(n) {
     typedef T* GenericTaskQueuePtr;
-    _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n);
+    _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F);
     for (int i = 0; i < n; i++) {
       _queues[i] = NULL;
     }
@@ -506,19 +514,19 @@
   bool peek();
 };
 
-template<class T> void
-GenericTaskQueueSet<T>::register_queue(uint i, T* q) {
+template<class T, MEMFLAGS F> void
+GenericTaskQueueSet<T, F>::register_queue(uint i, T* q) {
   assert(i < _n, "index out of range.");
   _queues[i] = q;
 }
 
-template<class T> T*
-GenericTaskQueueSet<T>::queue(uint i) {
+template<class T, MEMFLAGS F> T*
+GenericTaskQueueSet<T, F>::queue(uint i) {
   return _queues[i];
 }
 
-template<class T> bool
-GenericTaskQueueSet<T>::steal(uint queue_num, int* seed, E& t) {
+template<class T, MEMFLAGS F> bool
+GenericTaskQueueSet<T, F>::steal(uint queue_num, int* seed, E& t) {
   for (uint i = 0; i < 2 * _n; i++) {
     if (steal_best_of_2(queue_num, seed, t)) {
       TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true));
@@ -529,8 +537,8 @@
   return false;
 }
 
-template<class T> bool
-GenericTaskQueueSet<T>::steal_best_of_all(uint queue_num, int* seed, E& t) {
+template<class T, MEMFLAGS F> bool
+GenericTaskQueueSet<T, F>::steal_best_of_all(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     int best_k;
     uint best_sz = 0;
@@ -553,11 +561,11 @@
   }
 }
 
-template<class T> bool
-GenericTaskQueueSet<T>::steal_1_random(uint queue_num, int* seed, E& t) {
+template<class T, MEMFLAGS F> bool
+GenericTaskQueueSet<T, F>::steal_1_random(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     uint k = queue_num;
-    while (k == queue_num) k = randomParkAndMiller(seed) % _n;
+    while (k == queue_num) k = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
     return _queues[2]->pop_global(t);
   } else if (_n == 2) {
     // Just try the other one.
@@ -569,13 +577,13 @@
   }
 }
 
-template<class T> bool
-GenericTaskQueueSet<T>::steal_best_of_2(uint queue_num, int* seed, E& t) {
+template<class T, MEMFLAGS F> bool
+GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     uint k1 = queue_num;
-    while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n;
+    while (k1 == queue_num) k1 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
     uint k2 = queue_num;
-    while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n;
+    while (k2 == queue_num || k2 == k1) k2 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
     // Sample both and try the larger.
     uint sz1 = _queues[k1]->size();
     uint sz2 = _queues[k2]->size();
@@ -591,8 +599,8 @@
   }
 }
 
-template<class T>
-bool GenericTaskQueueSet<T>::peek() {
+template<class T, MEMFLAGS F>
+bool GenericTaskQueueSet<T, F>::peek() {
   // Try all the queues.
   for (uint j = 0; j < _n; j++) {
     if (_queues[j]->peek())
@@ -602,7 +610,7 @@
 }
 
 // When to terminate from the termination protocol.
-class TerminatorTerminator: public CHeapObj {
+class TerminatorTerminator: public CHeapObj<mtInternal> {
 public:
   virtual bool should_exit_termination() = 0;
 };
@@ -665,8 +673,8 @@
 #endif
 };
 
-template<class E, unsigned int N> inline bool
-GenericTaskQueue<E, N>::push(E t) {
+template<class E, MEMFLAGS F, unsigned int N> inline bool
+GenericTaskQueue<E, F, N>::push(E t) {
   uint localBot = _bottom;
   assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
   idx_t top = _age.top();
@@ -683,8 +691,8 @@
   }
 }
 
-template<class E, unsigned int N> inline bool
-GenericTaskQueue<E, N>::pop_local(E& t) {
+template<class E, MEMFLAGS F, unsigned int N> inline bool
+GenericTaskQueue<E, F, N>::pop_local(E& t) {
   uint localBot = _bottom;
   // This value cannot be N-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
@@ -715,8 +723,8 @@
   }
 }
 
-typedef GenericTaskQueue<oop>             OopTaskQueue;
-typedef GenericTaskQueueSet<OopTaskQueue> OopTaskQueueSet;
+typedef GenericTaskQueue<oop, mtGC>             OopTaskQueue;
+typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;
 
 #ifdef _MSC_VER
 #pragma warning(push)
@@ -796,11 +804,11 @@
 #pragma warning(pop)
 #endif
 
-typedef OverflowTaskQueue<StarTask>           OopStarTaskQueue;
-typedef GenericTaskQueueSet<OopStarTaskQueue> OopStarTaskQueueSet;
+typedef OverflowTaskQueue<StarTask, mtClass>           OopStarTaskQueue;
+typedef GenericTaskQueueSet<OopStarTaskQueue, mtClass> OopStarTaskQueueSet;
 
-typedef OverflowTaskQueue<size_t>             RegionTaskQueue;
-typedef GenericTaskQueueSet<RegionTaskQueue>  RegionTaskQueueSet;
+typedef OverflowTaskQueue<size_t, mtInternal>             RegionTaskQueue;
+typedef GenericTaskQueueSet<RegionTaskQueue, mtClass>     RegionTaskQueueSet;
 
 
 #endif // SHARE_VM_UTILITIES_TASKQUEUE_HPP
--- a/src/share/vm/utilities/vmError.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/vmError.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "compiler/compileBroker.hpp"
 #include "gc_interface/collectedHeap.hpp"
+#include "prims/whitebox.hpp"
 #include "runtime/arguments.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/init.hpp"
@@ -32,6 +33,7 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vm_operations.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
@@ -449,7 +451,9 @@
      // VM version
      st->print_cr("#");
      JDK_Version::current().to_string(buf, sizeof(buf));
-     st->print_cr("# JRE version: %s", buf);
+     const char* runtime_name = JDK_Version::runtime_name() != NULL ?
+                                  JDK_Version::runtime_name() : "";
+     st->print_cr("# JRE version: %s (%s)", runtime_name, buf);
      st->print_cr("# Java VM: %s (%s %s %s %s)",
                    Abstract_VM_Version::vm_name(),
                    Abstract_VM_Version::vm_release(),
@@ -723,6 +727,13 @@
        st->cr();
      }
 
+  STEP(215, "(printing warning if internal testing API used)" )
+
+     if (WhiteBox::used()) {
+       st->print_cr("Unsupported internal testing APIs have been used.");
+       st->cr();
+     }
+
   STEP(220, "(printing environment variables)" )
 
      if (_verbose) {
@@ -810,6 +821,9 @@
   static bool transmit_report_done = false; // done error reporting
   static fdStream log;                  // error log
 
+  // disble NMT to avoid further exception
+  MemTracker::shutdown(MemTracker::NMT_error_reporting);
+
   if (SuppressFatalErrorMessage) {
       os::abort();
   }
--- a/src/share/vm/utilities/workgroup.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/workgroup.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -77,7 +77,7 @@
                   name(),
                   total_workers());
   }
-  _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, total_workers());
+  _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, total_workers(), mtInternal);
   if (gang_workers() == NULL) {
     vm_exit_out_of_memory(0, "Cannot create GangWorker array.");
     return false;
@@ -241,6 +241,7 @@
 
 void GangWorker::initialize() {
   this->initialize_thread_local_storage();
+  this->record_stack_base_and_size();
   assert(_gang != NULL, "No gang to run in");
   os::set_priority(this, NearMaxPriority);
   if (TraceWorkGang) {
@@ -421,7 +422,7 @@
 
 SubTasksDone::SubTasksDone(uint n) :
   _n_tasks(n), _n_threads(1), _tasks(NULL) {
-  _tasks = NEW_C_HEAP_ARRAY(uint, n);
+  _tasks = NEW_C_HEAP_ARRAY(uint, n, mtInternal);
   guarantee(_tasks != NULL, "alloc failure");
   clear();
 }
@@ -476,7 +477,7 @@
 
 
 SubTasksDone::~SubTasksDone() {
-  if (_tasks != NULL) FREE_C_HEAP_ARRAY(jint, _tasks);
+  if (_tasks != NULL) FREE_C_HEAP_ARRAY(jint, _tasks, mtInternal);
 }
 
 // *** SequentialSubTasksDone
--- a/src/share/vm/utilities/workgroup.hpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/workgroup.hpp	Sat Oct 20 00:08:35 2012 -0700
@@ -123,7 +123,7 @@
 // Class AbstractWorkGang:
 // An abstract class representing a gang of workers.
 // You subclass this to supply an implementation of run_task().
-class AbstractWorkGang: public CHeapObj {
+class AbstractWorkGang: public CHeapObj<mtInternal> {
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
@@ -402,7 +402,7 @@
 // subtasks will be identified by integer indices, usually elements of an
 // enumeration type.
 
-class SubTasksDone : public CHeapObj {
+class SubTasksDone: public CHeapObj<mtInternal> {
   uint* _tasks;
   uint _n_tasks;
   // _n_threads is used to determine when a sub task is done.
--- a/src/share/vm/utilities/xmlstream.cpp	Wed Sep 26 21:45:41 2012 -0700
+++ b/src/share/vm/utilities/xmlstream.cpp	Sat Oct 20 00:08:35 2012 -0700
@@ -43,7 +43,7 @@
 #ifdef ASSERT
   _element_depth = 0;
   int   init_len = 100;
-  char* init_buf = NEW_C_HEAP_ARRAY(char, init_len);
+  char* init_buf = NEW_C_HEAP_ARRAY(char, init_len, mtInternal);
   _element_close_stack_low  = init_buf;
   _element_close_stack_high = init_buf + init_len;
   _element_close_stack_ptr  = init_buf + init_len - 1;
@@ -58,7 +58,7 @@
 
 #ifdef ASSERT
 xmlStream::~xmlStream() {
-  FREE_C_HEAP_ARRAY(char, _element_close_stack_low);
+  FREE_C_HEAP_ARRAY(char, _element_close_stack_low, mtInternal);
 }
 #endif
 
@@ -155,14 +155,14 @@
     int old_len = _element_close_stack_high - old_ptr;
     int new_len = old_len * 2;
     if (new_len < 100)  new_len = 100;
-    char* new_low  = NEW_C_HEAP_ARRAY(char, new_len);
+    char* new_low  = NEW_C_HEAP_ARRAY(char, new_len, mtInternal);
     char* new_high = new_low + new_len;
     char* new_ptr  = new_high - old_len;
     memcpy(new_ptr, old_ptr, old_len);
     _element_close_stack_high = new_high;
     _element_close_stack_low  = new_low;
     _element_close_stack_ptr  = new_ptr;
-    FREE_C_HEAP_ARRAY(char, old_low);
+    FREE_C_HEAP_ARRAY(char, old_low, mtInternal);
     push_ptr = new_ptr - (tag_len+1);
   }
   assert(push_ptr >= _element_close_stack_low, "in range");
--- a/test/Makefile	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/Makefile	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1995, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,8 @@
 # Makefile to run various jdk tests
 #
 
+GETMIXEDPATH=echo
+
 # Get OS/ARCH specifics
 OSNAME = $(shell uname -s)
 ifeq ($(OSNAME), SunOS)
@@ -60,7 +62,14 @@
     ARCH = i586
   endif
 endif
-ifeq ($(OSNAME), Windows_NT)
+ifeq ($(PLATFORM),)
+  # detect wether we're running in MKS or cygwin
+  ifeq ($(OSNAME), Windows_NT) # MKS
+    GETMIXEDPATH=dosname -s
+  endif
+  ifeq ($(findstring CYGWIN,$(OSNAME)), CYGWIN)
+    GETMIXEDPATH=cygpath -m -s
+  endif
   PLATFORM = windows
   SLASH_JAVA = J:
   ifeq ($(word 1, $(PROCESSOR_IDENTIFIER)),ia64)
@@ -228,6 +237,24 @@
 
 ################################################################
 
+# wbapitest (make sure the whitebox testing api classes work
+
+wbapitest: prep $(JT_HOME) $(PRODUCT_HOME) $(JTREG)
+	$(JTREG) -a -v:fail,error               \
+          $(JTREG_KEY_OPTION)                   \
+          $(EXTRA_JTREG_OPTIONS)                \
+          -r:$(shell $(GETMIXEDPATH) "$(ABS_TEST_OUTPUT_DIR)")/JTreport    \
+          -w:$(shell $(GETMIXEDPATH) "$(ABS_TEST_OUTPUT_DIR)")/JTwork      \
+          -jdk:$(shell $(GETMIXEDPATH) "$(PRODUCT_HOME)")                  \
+          $(JAVA_OPTIONS:%=-vmoption:%)         \
+          $(shell $(GETMIXEDPATH) "$(TEST_ROOT)")/sanity                   \
+	  || $(BUNDLE_UP_FAILED)
+	$(BUNDLE_UP)
+
+PHONY_LIST += wbapitest
+
+################################################################
+
 # packtest
 
 # Expect JPRT to set JPRT_PACKTEST_HOME.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestByteVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,1274 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect
+ */
+
+public class TestByteVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int ADD_INIT = 0;
+  private static final int BIT_MASK = 0xB7;
+  private static final int VALUE = 3;
+  private static final int SHIFT = 8;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a0 = new byte[ARRLEN];
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    byte[] a3 = new byte[ARRLEN];
+    byte[] a4 = new byte[ARRLEN];
+    short[] p2 = new short[ARRLEN/2];
+      int[] p4 = new   int[ARRLEN/4];
+     long[] p8 = new  long[ARRLEN/8];
+    // Initialize
+    int gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      byte val = (byte)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (byte)VALUE;
+      a3[i] = (byte)-VALUE;
+      a4[i] = (byte)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (byte)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (byte)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (byte)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (byte)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (byte)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (byte)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (byte)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (byte)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (byte)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+      test_pack2(p2, a1);
+      test_unpack2(a0, p2);
+      test_pack2_swap(p2, a1);
+      test_unpack2_swap(a0, p2);
+      test_pack4(p4, a1);
+      test_unpack4(a0, p4);
+      test_pack4_swap(p4, a1);
+      test_unpack4_swap(a0, p4);
+      test_pack8(p8, a1);
+      test_unpack8(a0, p8);
+      test_pack8_swap(p8, a1);
+      test_unpack8_swap(a0, p8);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      int sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (byte)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (byte)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (byte)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (byte)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (byte)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+      test_pack2(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack2_swap(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2_swap(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack4(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
+                                                 (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
+                                                 (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
+                                                 (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack4_swap(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
+                                                      (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
+                                                      (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
+                                                      (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4_swap(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack8(p8, a1);
+      for (int i=0; i<ARRLEN/8; i++) {
+        errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
+                                                 (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
+                                                 (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
+                                                 (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
+                                                 (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
+                                                 (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
+                                                 (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
+                                                 (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack8(a0, p8);
+      for (int i=0; i<(ARRLEN&(-8)); i++) {
+        errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack8_swap(p8, a1);
+      for (int i=0; i<ARRLEN/8; i++) {
+        errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
+                                                      (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
+                                                      (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
+                                                      (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
+                                                      (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
+                                                      (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
+                                                      (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
+                                                      (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack8_swap(a0, p8);
+      for (int i=0; i<(ARRLEN&(-8)); i++) {
+        errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (byte)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (byte)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (byte)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (byte)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (byte)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2_swap(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2_swap(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2_swap: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4_swap(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4_swap(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4_swap: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack8(p8, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack8: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack8(a0, p8);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack8: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack8_swap(p8, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack8_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack8_swap(a0, p8);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack8_swap: " + (end - start));
+
+    return errn;
+  }
+
+  static int test_sum(byte[] a1) {
+    int sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+b);
+    }
+  }
+  static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]-b);
+    }
+  }
+  static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*b);
+    }
+  }
+  static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/b);
+    }
+  }
+  static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]&b);
+    }
+  }
+  static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]|b);
+    }
+  }
+  static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]^b);
+    }
+  }
+  static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>b);
+    }
+  }
+
+  static void test_pack2(short[] p2, byte[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l0 = (short)a1[i*2+0];
+      short l1 = (short)a1[i*2+1];
+      p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
+    }
+  }
+  static void test_unpack2(byte[] a0, short[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l = p2[i];
+      a0[i*2+0] = (byte)(l & 0xFF);
+      a0[i*2+1] = (byte)(l >> 8);
+    }
+  }
+  static void test_pack2_swap(short[] p2, byte[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l0 = (short)a1[i*2+0];
+      short l1 = (short)a1[i*2+1];
+      p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
+    }
+  }
+  static void test_unpack2_swap(byte[] a0, short[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l = p2[i];
+      a0[i*2+0] = (byte)(l >> 8);
+      a0[i*2+1] = (byte)(l & 0xFF);
+    }
+  }
+
+  static void test_pack4(int[] p4, byte[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l0 = (int)a1[i*4+0];
+      int l1 = (int)a1[i*4+1];
+      int l2 = (int)a1[i*4+2];
+      int l3 = (int)a1[i*4+3];
+      p4[i] = (l0 & 0xFF) |
+             ((l1 & 0xFF) <<  8) |
+             ((l2 & 0xFF) << 16) |
+             ((l3 & 0xFF) << 24);
+    }
+  }
+  static void test_unpack4(byte[] a0, int[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l = p4[i];
+      a0[i*4+0] = (byte)(l & 0xFF);
+      a0[i*4+1] = (byte)(l >>  8);
+      a0[i*4+2] = (byte)(l >> 16);
+      a0[i*4+3] = (byte)(l >> 24);
+    }
+  }
+  static void test_pack4_swap(int[] p4, byte[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l0 = (int)a1[i*4+0];
+      int l1 = (int)a1[i*4+1];
+      int l2 = (int)a1[i*4+2];
+      int l3 = (int)a1[i*4+3];
+      p4[i] = (l3 & 0xFF) |
+             ((l2 & 0xFF) <<  8) |
+             ((l1 & 0xFF) << 16) |
+             ((l0 & 0xFF) << 24);
+    }
+  }
+  static void test_unpack4_swap(byte[] a0, int[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l = p4[i];
+      a0[i*4+0] = (byte)(l >> 24);
+      a0[i*4+1] = (byte)(l >> 16);
+      a0[i*4+2] = (byte)(l >>  8);
+      a0[i*4+3] = (byte)(l & 0xFF);
+    }
+  }
+
+  static void test_pack8(long[] p8, byte[] a1) {
+    if (p8.length*8 > a1.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l0 = (long)a1[i*8+0];
+      long l1 = (long)a1[i*8+1];
+      long l2 = (long)a1[i*8+2];
+      long l3 = (long)a1[i*8+3];
+      long l4 = (long)a1[i*8+4];
+      long l5 = (long)a1[i*8+5];
+      long l6 = (long)a1[i*8+6];
+      long l7 = (long)a1[i*8+7];
+      p8[i] = (l0 & 0xFFl) |
+             ((l1 & 0xFFl) <<  8) |
+             ((l2 & 0xFFl) << 16) |
+             ((l3 & 0xFFl) << 24) |
+             ((l4 & 0xFFl) << 32) |
+             ((l5 & 0xFFl) << 40) |
+             ((l6 & 0xFFl) << 48) |
+             ((l7 & 0xFFl) << 56);
+    }
+  }
+  static void test_unpack8(byte[] a0, long[] p8) {
+    if (p8.length*8 > a0.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l = p8[i];
+      a0[i*8+0] = (byte)(l & 0xFFl);
+      a0[i*8+1] = (byte)(l >>  8);
+      a0[i*8+2] = (byte)(l >> 16);
+      a0[i*8+3] = (byte)(l >> 24);
+      a0[i*8+4] = (byte)(l >> 32);
+      a0[i*8+5] = (byte)(l >> 40);
+      a0[i*8+6] = (byte)(l >> 48);
+      a0[i*8+7] = (byte)(l >> 56);
+    }
+  }
+  static void test_pack8_swap(long[] p8, byte[] a1) {
+    if (p8.length*8 > a1.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l0 = (long)a1[i*8+0];
+      long l1 = (long)a1[i*8+1];
+      long l2 = (long)a1[i*8+2];
+      long l3 = (long)a1[i*8+3];
+      long l4 = (long)a1[i*8+4];
+      long l5 = (long)a1[i*8+5];
+      long l6 = (long)a1[i*8+6];
+      long l7 = (long)a1[i*8+7];
+      p8[i] = (l7 & 0xFFl) |
+             ((l6 & 0xFFl) <<  8) |
+             ((l5 & 0xFFl) << 16) |
+             ((l4 & 0xFFl) << 24) |
+             ((l3 & 0xFFl) << 32) |
+             ((l2 & 0xFFl) << 40) |
+             ((l1 & 0xFFl) << 48) |
+             ((l0 & 0xFFl) << 56);
+    }
+  }
+  static void test_unpack8_swap(byte[] a0, long[] p8) {
+    if (p8.length*8 > a0.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l = p8[i];
+      a0[i*8+0] = (byte)(l >> 56);
+      a0[i*8+1] = (byte)(l >> 48);
+      a0[i*8+2] = (byte)(l >> 40);
+      a0[i*8+3] = (byte)(l >> 32);
+      a0[i*8+4] = (byte)(l >> 24);
+      a0[i*8+5] = (byte)(l >> 16);
+      a0[i*8+6] = (byte)(l >>  8);
+      a0[i*8+7] = (byte)(l & 0xFFl);
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestDoubleVect
+ */
+
+public class TestDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final double ADD_INIT = -7500.;
+  private static final double VALUE = 15.;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    double[] a0 = new double[ARRLEN];
+    double[] a1 = new double[ARRLEN];
+    double[] a2 = new double[ARRLEN];
+    double[] a3 = new double[ARRLEN];
+    // Initialize
+    double gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      double val = ADD_INIT+(double)i;
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = VALUE;
+      a3[i] = -VALUE;
+    }
+
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, -VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, -VALUE);
+      test_diva(a0, a1, a3);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      double sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+      // Overwrite with NaN values
+      a1[0] = Double.NaN;
+      a1[1] = Double.POSITIVE_INFINITY;
+      a1[2] = Double.NEGATIVE_INFINITY;
+      a1[3] = Double.MAX_VALUE;
+      a1[4] = Double.MIN_VALUE;
+      a1[5] = Double.MIN_NORMAL;
+
+      a2[6] = a1[0];
+      a2[7] = a1[1];
+      a2[8] = a1[2];
+      a2[9] = a1[3];
+      a2[10] = a1[4];
+      a2[11] = a1[5];
+
+      a3[6] = -a2[6];
+      a3[7] = -a2[7];
+      a3[8] = -a2[8];
+      a3[9] = -a2[9];
+      a3[10] = -a2[10];
+      a3[11] = -a2[11];
+
+      test_addc(a0, a1);
+      errn += verify("test_addc: ", 0, a0[0], (Double.NaN+VALUE));
+      errn += verify("test_addc: ", 1, a0[1], (Double.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 2, a0[2], (Double.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 3, a0[3], (Double.MAX_VALUE+VALUE));
+      errn += verify("test_addc: ", 4, a0[4], (Double.MIN_VALUE+VALUE));
+      errn += verify("test_addc: ", 5, a0[5], (Double.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, VALUE);
+      errn += verify("test_addv: ", 0, a0[0], (Double.NaN+VALUE));
+      errn += verify("test_addv: ", 1, a0[1], (Double.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 2, a0[2], (Double.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 3, a0[3], (Double.MAX_VALUE+VALUE));
+      errn += verify("test_addv: ", 4, a0[4], (Double.MIN_VALUE+VALUE));
+      errn += verify("test_addv: ", 5, a0[5], (Double.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      errn += verify("test_adda: ", 0, a0[0], (Double.NaN+VALUE));
+      errn += verify("test_adda: ", 1, a0[1], (Double.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 2, a0[2], (Double.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 3, a0[3], (Double.MAX_VALUE+VALUE));
+      errn += verify("test_adda: ", 4, a0[4], (Double.MIN_VALUE+VALUE));
+      errn += verify("test_adda: ", 5, a0[5], (Double.MIN_NORMAL+VALUE));
+      errn += verify("test_adda: ", 6, a0[6], ((ADD_INIT+6)+Double.NaN));
+      errn += verify("test_adda: ", 7, a0[7], ((ADD_INIT+7)+Double.POSITIVE_INFINITY));
+      errn += verify("test_adda: ", 8, a0[8], ((ADD_INIT+8)+Double.NEGATIVE_INFINITY));
+      errn += verify("test_adda: ", 9, a0[9], ((ADD_INIT+9)+Double.MAX_VALUE));
+      errn += verify("test_adda: ", 10, a0[10], ((ADD_INIT+10)+Double.MIN_VALUE));
+      errn += verify("test_adda: ", 11, a0[11], ((ADD_INIT+11)+Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      errn += verify("test_subc: ", 0, a0[0], (Double.NaN-VALUE));
+      errn += verify("test_subc: ", 1, a0[1], (Double.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 2, a0[2], (Double.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 3, a0[3], (Double.MAX_VALUE-VALUE));
+      errn += verify("test_subc: ", 4, a0[4], (Double.MIN_VALUE-VALUE));
+      errn += verify("test_subc: ", 5, a0[5], (Double.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, VALUE);
+      errn += verify("test_subv: ", 0, a0[0], (Double.NaN-VALUE));
+      errn += verify("test_subv: ", 1, a0[1], (Double.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 2, a0[2], (Double.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 3, a0[3], (Double.MAX_VALUE-VALUE));
+      errn += verify("test_subv: ", 4, a0[4], (Double.MIN_VALUE-VALUE));
+      errn += verify("test_subv: ", 5, a0[5], (Double.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      errn += verify("test_suba: ", 0, a0[0], (Double.NaN-VALUE));
+      errn += verify("test_suba: ", 1, a0[1], (Double.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 2, a0[2], (Double.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 3, a0[3], (Double.MAX_VALUE-VALUE));
+      errn += verify("test_suba: ", 4, a0[4], (Double.MIN_VALUE-VALUE));
+      errn += verify("test_suba: ", 5, a0[5], (Double.MIN_NORMAL-VALUE));
+      errn += verify("test_suba: ", 6, a0[6], ((ADD_INIT+6)-Double.NaN));
+      errn += verify("test_suba: ", 7, a0[7], ((ADD_INIT+7)-Double.POSITIVE_INFINITY));
+      errn += verify("test_suba: ", 8, a0[8], ((ADD_INIT+8)-Double.NEGATIVE_INFINITY));
+      errn += verify("test_suba: ", 9, a0[9], ((ADD_INIT+9)-Double.MAX_VALUE));
+      errn += verify("test_suba: ", 10, a0[10], ((ADD_INIT+10)-Double.MIN_VALUE));
+      errn += verify("test_suba: ", 11, a0[11], ((ADD_INIT+11)-Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      errn += verify("test_mulc: ", 0, a0[0], (Double.NaN*VALUE));
+      errn += verify("test_mulc: ", 1, a0[1], (Double.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 2, a0[2], (Double.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 3, a0[3], (Double.MAX_VALUE*VALUE));
+      errn += verify("test_mulc: ", 4, a0[4], (Double.MIN_VALUE*VALUE));
+      errn += verify("test_mulc: ", 5, a0[5], (Double.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, VALUE);
+      errn += verify("test_mulv: ", 0, a0[0], (Double.NaN*VALUE));
+      errn += verify("test_mulv: ", 1, a0[1], (Double.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 2, a0[2], (Double.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 3, a0[3], (Double.MAX_VALUE*VALUE));
+      errn += verify("test_mulv: ", 4, a0[4], (Double.MIN_VALUE*VALUE));
+      errn += verify("test_mulv: ", 5, a0[5], (Double.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      errn += verify("test_mula: ", 0, a0[0], (Double.NaN*VALUE));
+      errn += verify("test_mula: ", 1, a0[1], (Double.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 2, a0[2], (Double.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 3, a0[3], (Double.MAX_VALUE*VALUE));
+      errn += verify("test_mula: ", 4, a0[4], (Double.MIN_VALUE*VALUE));
+      errn += verify("test_mula: ", 5, a0[5], (Double.MIN_NORMAL*VALUE));
+      errn += verify("test_mula: ", 6, a0[6], ((ADD_INIT+6)*Double.NaN));
+      errn += verify("test_mula: ", 7, a0[7], ((ADD_INIT+7)*Double.POSITIVE_INFINITY));
+      errn += verify("test_mula: ", 8, a0[8], ((ADD_INIT+8)*Double.NEGATIVE_INFINITY));
+      errn += verify("test_mula: ", 9, a0[9], ((ADD_INIT+9)*Double.MAX_VALUE));
+      errn += verify("test_mula: ", 10, a0[10], ((ADD_INIT+10)*Double.MIN_VALUE));
+      errn += verify("test_mula: ", 11, a0[11], ((ADD_INIT+11)*Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      errn += verify("test_divc: ", 0, a0[0], (Double.NaN/VALUE));
+      errn += verify("test_divc: ", 1, a0[1], (Double.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 2, a0[2], (Double.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 3, a0[3], (Double.MAX_VALUE/VALUE));
+      errn += verify("test_divc: ", 4, a0[4], (Double.MIN_VALUE/VALUE));
+      errn += verify("test_divc: ", 5, a0[5], (Double.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, VALUE);
+      errn += verify("test_divv: ", 0, a0[0], (Double.NaN/VALUE));
+      errn += verify("test_divv: ", 1, a0[1], (Double.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 2, a0[2], (Double.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 3, a0[3], (Double.MAX_VALUE/VALUE));
+      errn += verify("test_divv: ", 4, a0[4], (Double.MIN_VALUE/VALUE));
+      errn += verify("test_divv: ", 5, a0[5], (Double.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      errn += verify("test_diva: ", 0, a0[0], (Double.NaN/VALUE));
+      errn += verify("test_diva: ", 1, a0[1], (Double.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 2, a0[2], (Double.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 3, a0[3], (Double.MAX_VALUE/VALUE));
+      errn += verify("test_diva: ", 4, a0[4], (Double.MIN_VALUE/VALUE));
+      errn += verify("test_diva: ", 5, a0[5], (Double.MIN_NORMAL/VALUE));
+      errn += verify("test_diva: ", 6, a0[6], ((ADD_INIT+6)/Double.NaN));
+      errn += verify("test_diva: ", 7, a0[7], ((ADD_INIT+7)/Double.POSITIVE_INFINITY));
+      errn += verify("test_diva: ", 8, a0[8], ((ADD_INIT+8)/Double.NEGATIVE_INFINITY));
+      errn += verify("test_diva: ", 9, a0[9], ((ADD_INIT+9)/Double.MAX_VALUE));
+      errn += verify("test_diva: ", 10, a0[10], ((ADD_INIT+10)/Double.MIN_VALUE));
+      errn += verify("test_diva: ", 11, a0[11], ((ADD_INIT+11)/Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      errn += verify("test_mulc_n: ", 0, a0[0], (Double.NaN*(-VALUE)));
+      errn += verify("test_mulc_n: ", 1, a0[1], (Double.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 3, a0[3], (Double.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 4, a0[4], (Double.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 5, a0[5], (Double.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, -VALUE);
+      errn += verify("test_mulv_n: ", 0, a0[0], (Double.NaN*(-VALUE)));
+      errn += verify("test_mulv_n: ", 1, a0[1], (Double.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 3, a0[3], (Double.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 4, a0[4], (Double.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 5, a0[5], (Double.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      errn += verify("test_mula_n: ", 0, a0[0], (Double.NaN*(-VALUE)));
+      errn += verify("test_mula_n: ", 1, a0[1], (Double.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 3, a0[3], (Double.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 4, a0[4], (Double.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 5, a0[5], (Double.MIN_NORMAL*(-VALUE)));
+      errn += verify("test_mula_n: ", 6, a0[6], ((ADD_INIT+6)*(-Double.NaN)));
+      errn += verify("test_mula_n: ", 7, a0[7], ((ADD_INIT+7)*(-Double.POSITIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 8, a0[8], ((ADD_INIT+8)*(-Double.NEGATIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 9, a0[9], ((ADD_INIT+9)*(-Double.MAX_VALUE)));
+      errn += verify("test_mula_n: ", 10, a0[10], ((ADD_INIT+10)*(-Double.MIN_VALUE)));
+      errn += verify("test_mula_n: ", 11, a0[11], ((ADD_INIT+11)*(-Double.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      errn += verify("test_divc_n: ", 0, a0[0], (Double.NaN/(-VALUE)));
+      errn += verify("test_divc_n: ", 1, a0[1], (Double.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 3, a0[3], (Double.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 4, a0[4], (Double.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 5, a0[5], (Double.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, -VALUE);
+      errn += verify("test_divv_n: ", 0, a0[0], (Double.NaN/(-VALUE)));
+      errn += verify("test_divv_n: ", 1, a0[1], (Double.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 3, a0[3], (Double.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 4, a0[4], (Double.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 5, a0[5], (Double.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      errn += verify("test_diva_n: ", 0, a0[0], (Double.NaN/(-VALUE)));
+      errn += verify("test_diva_n: ", 1, a0[1], (Double.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 3, a0[3], (Double.MAX_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 4, a0[4], (Double.MIN_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 5, a0[5], (Double.MIN_NORMAL/(-VALUE)));
+      errn += verify("test_diva_n: ", 6, a0[6], ((ADD_INIT+6)/(-Double.NaN)));
+      errn += verify("test_diva_n: ", 7, a0[7], ((ADD_INIT+7)/(-Double.POSITIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 8, a0[8], ((ADD_INIT+8)/(-Double.NEGATIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 9, a0[9], ((ADD_INIT+9)/(-Double.MAX_VALUE)));
+      errn += verify("test_diva_n: ", 10, a0[10], ((ADD_INIT+10)/(-Double.MIN_VALUE)));
+      errn += verify("test_diva_n: ", 11, a0[11], ((ADD_INIT+11)/(-Double.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    return errn;
+  }
+
+  static double test_sum(double[] a1) {
+    double sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+VALUE);
+    }
+  }
+  static void test_addv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+b);
+    }
+  }
+  static void test_adda(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-VALUE);
+    }
+  }
+  static void test_subv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-b);
+    }
+  }
+  static void test_suba(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*b);
+    }
+  }
+  static void test_mula(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/b);
+    }
+  }
+  static void test_diva(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/a2[i]);
+    }
+  }
+
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestFloatVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestFloatVect
+ */
+
+public class TestFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final float ADD_INIT = -7500.f;
+  private static final float VALUE = 15.f;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    float[] a0 = new float[ARRLEN];
+    float[] a1 = new float[ARRLEN];
+    float[] a2 = new float[ARRLEN];
+    float[] a3 = new float[ARRLEN];
+    // Initialize
+    float gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      float val = ADD_INIT+(float)i;
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = VALUE;
+      a3[i] = -VALUE;
+    }
+
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, -VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, -VALUE);
+      test_diva(a0, a1, a3);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      float sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+      // Overwrite with NaN values
+      a1[0] = Float.NaN;
+      a1[1] = Float.POSITIVE_INFINITY;
+      a1[2] = Float.NEGATIVE_INFINITY;
+      a1[3] = Float.MAX_VALUE;
+      a1[4] = Float.MIN_VALUE;
+      a1[5] = Float.MIN_NORMAL;
+
+      a2[6] = a1[0];
+      a2[7] = a1[1];
+      a2[8] = a1[2];
+      a2[9] = a1[3];
+      a2[10] = a1[4];
+      a2[11] = a1[5];
+
+      a3[6] = -a2[6];
+      a3[7] = -a2[7];
+      a3[8] = -a2[8];
+      a3[9] = -a2[9];
+      a3[10] = -a2[10];
+      a3[11] = -a2[11];
+
+      test_addc(a0, a1);
+      errn += verify("test_addc: ", 0, a0[0], (Float.NaN+VALUE));
+      errn += verify("test_addc: ", 1, a0[1], (Float.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 2, a0[2], (Float.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 3, a0[3], (Float.MAX_VALUE+VALUE));
+      errn += verify("test_addc: ", 4, a0[4], (Float.MIN_VALUE+VALUE));
+      errn += verify("test_addc: ", 5, a0[5], (Float.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, VALUE);
+      errn += verify("test_addv: ", 0, a0[0], (Float.NaN+VALUE));
+      errn += verify("test_addv: ", 1, a0[1], (Float.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 2, a0[2], (Float.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 3, a0[3], (Float.MAX_VALUE+VALUE));
+      errn += verify("test_addv: ", 4, a0[4], (Float.MIN_VALUE+VALUE));
+      errn += verify("test_addv: ", 5, a0[5], (Float.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      errn += verify("test_adda: ", 0, a0[0], (Float.NaN+VALUE));
+      errn += verify("test_adda: ", 1, a0[1], (Float.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 2, a0[2], (Float.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 3, a0[3], (Float.MAX_VALUE+VALUE));
+      errn += verify("test_adda: ", 4, a0[4], (Float.MIN_VALUE+VALUE));
+      errn += verify("test_adda: ", 5, a0[5], (Float.MIN_NORMAL+VALUE));
+      errn += verify("test_adda: ", 6, a0[6], ((ADD_INIT+6)+Float.NaN));
+      errn += verify("test_adda: ", 7, a0[7], ((ADD_INIT+7)+Float.POSITIVE_INFINITY));
+      errn += verify("test_adda: ", 8, a0[8], ((ADD_INIT+8)+Float.NEGATIVE_INFINITY));
+      errn += verify("test_adda: ", 9, a0[9], ((ADD_INIT+9)+Float.MAX_VALUE));
+      errn += verify("test_adda: ", 10, a0[10], ((ADD_INIT+10)+Float.MIN_VALUE));
+      errn += verify("test_adda: ", 11, a0[11], ((ADD_INIT+11)+Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      errn += verify("test_subc: ", 0, a0[0], (Float.NaN-VALUE));
+      errn += verify("test_subc: ", 1, a0[1], (Float.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 2, a0[2], (Float.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 3, a0[3], (Float.MAX_VALUE-VALUE));
+      errn += verify("test_subc: ", 4, a0[4], (Float.MIN_VALUE-VALUE));
+      errn += verify("test_subc: ", 5, a0[5], (Float.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, VALUE);
+      errn += verify("test_subv: ", 0, a0[0], (Float.NaN-VALUE));
+      errn += verify("test_subv: ", 1, a0[1], (Float.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 2, a0[2], (Float.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 3, a0[3], (Float.MAX_VALUE-VALUE));
+      errn += verify("test_subv: ", 4, a0[4], (Float.MIN_VALUE-VALUE));
+      errn += verify("test_subv: ", 5, a0[5], (Float.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      errn += verify("test_suba: ", 0, a0[0], (Float.NaN-VALUE));
+      errn += verify("test_suba: ", 1, a0[1], (Float.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 2, a0[2], (Float.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 3, a0[3], (Float.MAX_VALUE-VALUE));
+      errn += verify("test_suba: ", 4, a0[4], (Float.MIN_VALUE-VALUE));
+      errn += verify("test_suba: ", 5, a0[5], (Float.MIN_NORMAL-VALUE));
+      errn += verify("test_suba: ", 6, a0[6], ((ADD_INIT+6)-Float.NaN));
+      errn += verify("test_suba: ", 7, a0[7], ((ADD_INIT+7)-Float.POSITIVE_INFINITY));
+      errn += verify("test_suba: ", 8, a0[8], ((ADD_INIT+8)-Float.NEGATIVE_INFINITY));
+      errn += verify("test_suba: ", 9, a0[9], ((ADD_INIT+9)-Float.MAX_VALUE));
+      errn += verify("test_suba: ", 10, a0[10], ((ADD_INIT+10)-Float.MIN_VALUE));
+      errn += verify("test_suba: ", 11, a0[11], ((ADD_INIT+11)-Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      errn += verify("test_mulc: ", 0, a0[0], (Float.NaN*VALUE));
+      errn += verify("test_mulc: ", 1, a0[1], (Float.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 2, a0[2], (Float.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 3, a0[3], (Float.MAX_VALUE*VALUE));
+      errn += verify("test_mulc: ", 4, a0[4], (Float.MIN_VALUE*VALUE));
+      errn += verify("test_mulc: ", 5, a0[5], (Float.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, VALUE);
+      errn += verify("test_mulv: ", 0, a0[0], (Float.NaN*VALUE));
+      errn += verify("test_mulv: ", 1, a0[1], (Float.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 2, a0[2], (Float.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 3, a0[3], (Float.MAX_VALUE*VALUE));
+      errn += verify("test_mulv: ", 4, a0[4], (Float.MIN_VALUE*VALUE));
+      errn += verify("test_mulv: ", 5, a0[5], (Float.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      errn += verify("test_mula: ", 0, a0[0], (Float.NaN*VALUE));
+      errn += verify("test_mula: ", 1, a0[1], (Float.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 2, a0[2], (Float.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 3, a0[3], (Float.MAX_VALUE*VALUE));
+      errn += verify("test_mula: ", 4, a0[4], (Float.MIN_VALUE*VALUE));
+      errn += verify("test_mula: ", 5, a0[5], (Float.MIN_NORMAL*VALUE));
+      errn += verify("test_mula: ", 6, a0[6], ((ADD_INIT+6)*Float.NaN));
+      errn += verify("test_mula: ", 7, a0[7], ((ADD_INIT+7)*Float.POSITIVE_INFINITY));
+      errn += verify("test_mula: ", 8, a0[8], ((ADD_INIT+8)*Float.NEGATIVE_INFINITY));
+      errn += verify("test_mula: ", 9, a0[9], ((ADD_INIT+9)*Float.MAX_VALUE));
+      errn += verify("test_mula: ", 10, a0[10], ((ADD_INIT+10)*Float.MIN_VALUE));
+      errn += verify("test_mula: ", 11, a0[11], ((ADD_INIT+11)*Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      errn += verify("test_divc: ", 0, a0[0], (Float.NaN/VALUE));
+      errn += verify("test_divc: ", 1, a0[1], (Float.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 2, a0[2], (Float.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 3, a0[3], (Float.MAX_VALUE/VALUE));
+      errn += verify("test_divc: ", 4, a0[4], (Float.MIN_VALUE/VALUE));
+      errn += verify("test_divc: ", 5, a0[5], (Float.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, VALUE);
+      errn += verify("test_divv: ", 0, a0[0], (Float.NaN/VALUE));
+      errn += verify("test_divv: ", 1, a0[1], (Float.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 2, a0[2], (Float.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 3, a0[3], (Float.MAX_VALUE/VALUE));
+      errn += verify("test_divv: ", 4, a0[4], (Float.MIN_VALUE/VALUE));
+      errn += verify("test_divv: ", 5, a0[5], (Float.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      errn += verify("test_diva: ", 0, a0[0], (Float.NaN/VALUE));
+      errn += verify("test_diva: ", 1, a0[1], (Float.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 2, a0[2], (Float.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 3, a0[3], (Float.MAX_VALUE/VALUE));
+      errn += verify("test_diva: ", 4, a0[4], (Float.MIN_VALUE/VALUE));
+      errn += verify("test_diva: ", 5, a0[5], (Float.MIN_NORMAL/VALUE));
+      errn += verify("test_diva: ", 6, a0[6], ((ADD_INIT+6)/Float.NaN));
+      errn += verify("test_diva: ", 7, a0[7], ((ADD_INIT+7)/Float.POSITIVE_INFINITY));
+      errn += verify("test_diva: ", 8, a0[8], ((ADD_INIT+8)/Float.NEGATIVE_INFINITY));
+      errn += verify("test_diva: ", 9, a0[9], ((ADD_INIT+9)/Float.MAX_VALUE));
+      errn += verify("test_diva: ", 10, a0[10], ((ADD_INIT+10)/Float.MIN_VALUE));
+      errn += verify("test_diva: ", 11, a0[11], ((ADD_INIT+11)/Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      errn += verify("test_mulc_n: ", 0, a0[0], (Float.NaN*(-VALUE)));
+      errn += verify("test_mulc_n: ", 1, a0[1], (Float.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 3, a0[3], (Float.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 4, a0[4], (Float.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 5, a0[5], (Float.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, -VALUE);
+      errn += verify("test_mulv_n: ", 0, a0[0], (Float.NaN*(-VALUE)));
+      errn += verify("test_mulv_n: ", 1, a0[1], (Float.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 3, a0[3], (Float.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 4, a0[4], (Float.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 5, a0[5], (Float.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      errn += verify("test_mula_n: ", 0, a0[0], (Float.NaN*(-VALUE)));
+      errn += verify("test_mula_n: ", 1, a0[1], (Float.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 3, a0[3], (Float.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 4, a0[4], (Float.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 5, a0[5], (Float.MIN_NORMAL*(-VALUE)));
+      errn += verify("test_mula_n: ", 6, a0[6], ((ADD_INIT+6)*(-Float.NaN)));
+      errn += verify("test_mula_n: ", 7, a0[7], ((ADD_INIT+7)*(-Float.POSITIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 8, a0[8], ((ADD_INIT+8)*(-Float.NEGATIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 9, a0[9], ((ADD_INIT+9)*(-Float.MAX_VALUE)));
+      errn += verify("test_mula_n: ", 10, a0[10], ((ADD_INIT+10)*(-Float.MIN_VALUE)));
+      errn += verify("test_mula_n: ", 11, a0[11], ((ADD_INIT+11)*(-Float.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      errn += verify("test_divc_n: ", 0, a0[0], (Float.NaN/(-VALUE)));
+      errn += verify("test_divc_n: ", 1, a0[1], (Float.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 3, a0[3], (Float.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 4, a0[4], (Float.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 5, a0[5], (Float.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, -VALUE);
+      errn += verify("test_divv_n: ", 0, a0[0], (Float.NaN/(-VALUE)));
+      errn += verify("test_divv_n: ", 1, a0[1], (Float.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 3, a0[3], (Float.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 4, a0[4], (Float.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 5, a0[5], (Float.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      errn += verify("test_diva_n: ", 0, a0[0], (Float.NaN/(-VALUE)));
+      errn += verify("test_diva_n: ", 1, a0[1], (Float.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 3, a0[3], (Float.MAX_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 4, a0[4], (Float.MIN_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 5, a0[5], (Float.MIN_NORMAL/(-VALUE)));
+      errn += verify("test_diva_n: ", 6, a0[6], ((ADD_INIT+6)/(-Float.NaN)));
+      errn += verify("test_diva_n: ", 7, a0[7], ((ADD_INIT+7)/(-Float.POSITIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 8, a0[8], ((ADD_INIT+8)/(-Float.NEGATIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 9, a0[9], ((ADD_INIT+9)/(-Float.MAX_VALUE)));
+      errn += verify("test_diva_n: ", 10, a0[10], ((ADD_INIT+10)/(-Float.MIN_VALUE)));
+      errn += verify("test_diva_n: ", 11, a0[11], ((ADD_INIT+11)/(-Float.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    return errn;
+  }
+
+  static float test_sum(float[] a1) {
+    float sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+VALUE);
+    }
+  }
+  static void test_addv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+b);
+    }
+  }
+  static void test_adda(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-VALUE);
+    }
+  }
+  static void test_subv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-b);
+    }
+  }
+  static void test_suba(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*b);
+    }
+  }
+  static void test_mula(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/b);
+    }
+  }
+  static void test_diva(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/a2[i]);
+    }
+  }
+
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestIntVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,1012 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestIntVect
+ */
+
+public class TestIntVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int ADD_INIT = Integer.MAX_VALUE-500;
+  private static final int BIT_MASK = 0xEC80F731;
+  private static final int VALUE = 15;
+  private static final int SHIFT = 32;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Integer vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a0 = new int[ARRLEN];
+    int[] a1 = new int[ARRLEN];
+    int[] a2 = new int[ARRLEN];
+    int[] a3 = new int[ARRLEN];
+    int[] a4 = new int[ARRLEN];
+    long[] p2 = new long[ARRLEN/2];
+    // Initialize
+    int gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      int val = (int)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (int)VALUE;
+      a3[i] = (int)-VALUE;
+      a4[i] = (int)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (int)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (int)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (int)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (int)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (int)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (int)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (int)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (int)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (int)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+      test_pack2(p2, a1);
+      test_unpack2(a0, p2);
+      test_pack2_swap(p2, a1);
+      test_unpack2_swap(a0, p2);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      int sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (int)((int)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (int)((int)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (int)((int)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (int)((int)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (int)((int)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (int)((int)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (int)((int)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (int)((int)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (int)((int)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (int)((int)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (int)((int)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (int)((int)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (int)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (int)((int)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (int)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (int)((int)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (int)((int)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (int)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (int)((int)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (int)((int)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (int)((int)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (int)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (int)((int)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (int)((int)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (int)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (int)((int)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (int)((int)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (int)((int)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (int)((int)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (int)((int)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (int)((int)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+      test_pack2(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2: ", i, a0[i], (ADD_INIT+i));
+      }
+
+      test_pack2_swap(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2_swap: ", i, p2[i], ((long)(ADD_INIT+2*i+1) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i) << 32));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2_swap(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2_swap: ", i, a0[i], (ADD_INIT+i));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (int)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (int)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (int)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (int)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (int)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2_swap(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2_swap(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2_swap: " + (end - start));
+
+    return errn;
+  }
+
+  static int test_sum(int[] a1) {
+    int sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]+b);
+    }
+  }
+  static void test_adda(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]-b);
+    }
+  }
+  static void test_suba(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*b);
+    }
+  }
+  static void test_mula(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/b);
+    }
+  }
+  static void test_diva(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]&b);
+    }
+  }
+  static void test_anda(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]|b);
+    }
+  }
+  static void test_ora(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]^b);
+    }
+  }
+  static void test_xora(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>b);
+    }
+  }
+
+  static void test_pack2(long[] p2, int[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l0 = (long)a1[i*2+0];
+      long l1 = (long)a1[i*2+1];
+      p2[i] = (l1 << 32) | (l0 & 0xFFFFFFFFl);
+    }
+  }
+  static void test_unpack2(int[] a0, long[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l = p2[i];
+      a0[i*2+0] = (int)(l & 0xFFFFFFFFl);
+      a0[i*2+1] = (int)(l >> 32);
+    }
+  }
+  static void test_pack2_swap(long[] p2, int[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l0 = (long)a1[i*2+0];
+      long l1 = (long)a1[i*2+1];
+      p2[i] = (l0 << 32) | (l1 & 0xFFFFFFFFl);
+    }
+  }
+  static void test_unpack2_swap(int[] a0, long[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l = p2[i];
+      a0[i*2+0] = (int)(l >> 32);
+      a0[i*2+1] = (int)(l & 0xFFFFFFFFl);
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestLongVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,917 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestLongVect
+ */
+
+public class TestLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final long ADD_INIT = Long.MAX_VALUE-500;
+  private static final long BIT_MASK = 0xEC80F731EC80F731L;
+  private static final int VALUE = 31;
+  private static final int SHIFT = 64;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    long[] a0 = new long[ARRLEN];
+    long[] a1 = new long[ARRLEN];
+    long[] a2 = new long[ARRLEN];
+    long[] a3 = new long[ARRLEN];
+    long[] a4 = new long[ARRLEN];
+    // Initialize
+    long gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      long val = (long)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (long)VALUE;
+      a3[i] = (long)-VALUE;
+      a4[i] = (long)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (long)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (long)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (long)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (long)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (long)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (long)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (long)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (long)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (long)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      long sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (long)((long)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (long)((long)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (long)((long)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (long)((long)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (long)((long)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (long)((long)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (long)((long)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (long)((long)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (long)((long)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (long)((long)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (long)((long)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (long)((long)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (long)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (long)((long)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (long)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (long)((long)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (long)((long)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (long)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (long)((long)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (long)((long)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (long)((long)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (long)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (long)((long)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (long)((long)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (long)((long)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (long)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (long)((long)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (long)((long)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (long)((long)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (long)((long)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (long)((long)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (long)((long)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (long)((long)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (long)((long)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (long)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (long)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (long)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (long)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (long)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    return errn;
+  }
+
+  static long test_sum(long[] a1) {
+    long sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+b);
+    }
+  }
+  static void test_adda(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]-b);
+    }
+  }
+  static void test_suba(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*b);
+    }
+  }
+  static void test_mula(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/b);
+    }
+  }
+  static void test_diva(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]&b);
+    }
+  }
+  static void test_anda(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]|b);
+    }
+  }
+  static void test_ora(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]^b);
+    }
+  }
+  static void test_xora(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(long[] a0, long[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(long[] a0, long[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(long[] a0, long[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>b);
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestShortVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,1127 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestShortVect
+ */
+
+public class TestShortVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int ADD_INIT = Short.MAX_VALUE-500;
+  private static final int BIT_MASK = 0xB731;
+  private static final int VALUE = 7;
+  private static final int SHIFT = 16;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Short vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a0 = new short[ARRLEN];
+    short[] a1 = new short[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    short[] a3 = new short[ARRLEN];
+    short[] a4 = new short[ARRLEN];
+     int[] p2 = new  int[ARRLEN/2];
+    long[] p4 = new long[ARRLEN/4];
+    // Initialize
+    int gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      short val = (short)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (short)VALUE;
+      a3[i] = (short)-VALUE;
+      a4[i] = (short)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (short)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (short)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (short)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (short)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (short)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (short)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (short)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (short)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (short)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+      test_pack2(p2, a1);
+      test_unpack2(a0, p2);
+      test_pack2_swap(p2, a1);
+      test_unpack2_swap(a0, p2);
+      test_pack4(p4, a1);
+      test_unpack4(a0, p4);
+      test_pack4_swap(p4, a1);
+      test_unpack4_swap(a0, p4);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      int sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (short)((short)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (short)((short)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (short)((short)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (short)((short)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (short)((short)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (short)((short)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (short)((short)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (short)((short)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (short)((short)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (short)((short)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (short)((short)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (short)((short)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (short)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (short)((short)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (short)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (short)((short)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (short)((short)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (short)((short)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (short)((short)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (short)((short)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (short)((short)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (short)((short)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (short)((short)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (short)((short)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (short)((short)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (short)((short)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (short)((short)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (short)((short)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (short)((short)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (short)((short)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (short)((short)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+      test_pack2(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+      test_pack2_swap(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2_swap: ", i, p2[i], ((int)(ADD_INIT+2*i+1) & 0xFFFF) | ((int)(ADD_INIT+2*i) << 16));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2_swap(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2_swap: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+      test_pack4(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4: ", i, p4[i],  ((long)(ADD_INIT+4*i+0) & 0xFFFFl) |
+                                                 (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 16)  |
+                                                 (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 32)  |
+                                                 (((long)(ADD_INIT+4*i+3) & 0xFFFFl) << 48));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+      test_pack4_swap(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4_swap: ", i, p4[i],  ((long)(ADD_INIT+4*i+3) & 0xFFFFl) |
+                                                      (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 16)  |
+                                                      (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 32)  |
+                                                      (((long)(ADD_INIT+4*i+0) & 0xFFFFl) << 48));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4_swap(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4_swap: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (short)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (short)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2_swap(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2_swap(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2_swap: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4_swap(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4_swap(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4_swap: " + (end - start));
+
+    return errn;
+  }
+
+  static int test_sum(short[] a1) {
+    int sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+b);
+    }
+  }
+  static void test_adda(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]-b);
+    }
+  }
+  static void test_suba(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*b);
+    }
+  }
+  static void test_mula(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/b);
+    }
+  }
+  static void test_diva(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]&b);
+    }
+  }
+  static void test_anda(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]|b);
+    }
+  }
+  static void test_ora(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]^b);
+    }
+  }
+  static void test_xora(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>b);
+    }
+  }
+
+  static void test_pack2(int[] p2, short[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l0 = (int)a1[i*2+0];
+      int l1 = (int)a1[i*2+1];
+      p2[i] = (l1 << 16) | (l0 & 0xFFFF);
+    }
+  }
+  static void test_unpack2(short[] a0, int[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l = p2[i];
+      a0[i*2+0] = (short)(l & 0xFFFF);
+      a0[i*2+1] = (short)(l >> 16);
+    }
+  }
+  static void test_pack2_swap(int[] p2, short[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l0 = (int)a1[i*2+0];
+      int l1 = (int)a1[i*2+1];
+      p2[i] = (l0 << 16) | (l1 & 0xFFFF);
+    }
+  }
+  static void test_unpack2_swap(short[] a0, int[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l = p2[i];
+      a0[i*2+0] = (short)(l >> 16);
+      a0[i*2+1] = (short)(l & 0xFFFF);
+    }
+  }
+
+  static void test_pack4(long[] p4, short[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l0 = (long)a1[i*4+0];
+      long l1 = (long)a1[i*4+1];
+      long l2 = (long)a1[i*4+2];
+      long l3 = (long)a1[i*4+3];
+      p4[i] = (l0 & 0xFFFFl) |
+             ((l1 & 0xFFFFl) << 16) |
+             ((l2 & 0xFFFFl) << 32) |
+             ((l3 & 0xFFFFl) << 48);
+    }
+  }
+  static void test_unpack4(short[] a0, long[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l = p4[i];
+      a0[i*4+0] = (short)(l & 0xFFFFl);
+      a0[i*4+1] = (short)(l >> 16);
+      a0[i*4+2] = (short)(l >> 32);
+      a0[i*4+3] = (short)(l >> 48);
+    }
+  }
+  static void test_pack4_swap(long[] p4, short[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l0 = (long)a1[i*4+0];
+      long l1 = (long)a1[i*4+1];
+      long l2 = (long)a1[i*4+2];
+      long l3 = (long)a1[i*4+3];
+      p4[i] = (l3 & 0xFFFFl) |
+             ((l2 & 0xFFFFl) << 16) |
+             ((l1 & 0xFFFFl) << 32) |
+             ((l0 & 0xFFFFl) << 48);
+    }
+  }
+  static void test_unpack4_swap(short[] a0, long[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l = p4[i];
+      a0[i*4+0] = (short)(l >> 48);
+      a0[i*4+1] = (short)(l >> 32);
+      a0[i*4+2] = (short)(l >> 16);
+      a0[i*4+3] = (short)(l & 0xFFFFl);
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+}
--- a/test/compiler/6894807/Test6894807.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/compiler/6894807/Test6894807.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -18,27 +18,24 @@
   exit 1
 fi
 
-BIT_FLAG=""
-
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
-    ## for solaris, linux it's HOME
-    FILE_LOCATION=$HOME
-    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" ]
-    then
-        BIT_FLAG=`cat ${FILE_LOCATION}${FS}JDK64BIT | grep -v '^#'`
-    fi
     ;;
   Windows_* )
     NULL=NUL
     PS=";"
     FS="\\"
     ;;
+  CYGWIN_* )
+    NULL=/dev/null
+    PS=";"
+    FS="/"
+    ;;
   * )
     echo "Unrecognized system!"
     exit 1;
@@ -50,9 +47,9 @@
 
 THIS_DIR=`pwd`
 
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -version
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -version
 
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -server IsInstanceTest > test.out 2>&1
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} IsInstanceTest > test.out 2>&1
 
 cat test.out
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestBooleanVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,952 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestBooleanVect
+ */
+
+public class TestBooleanVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Boolean vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    boolean[] a1 = new boolean[ARRLEN];
+    boolean[] a2 = new boolean[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, true);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, true, true);
+      test_ci_neg(a1);
+      test_vi_neg(a2, true);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, true, true);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, true);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, true, true);
+      test_ci_off(a1);
+      test_vi_off(a2, true);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, true, true);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, true, OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, true, true, OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, true);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, true, true);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, true, true);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, true, true);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = false;
+      a2[i] = false;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], false);
+      }
+      test_vi(a2, true);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], true);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], true);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], false);
+        errn += verify("test_2ci: a2", i, a2[i], false);
+      }
+      test_2vi(a1, a2, true, true);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], true);
+        errn += verify("test_2vi: a2", i, a2[i], true);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], false);
+      }
+      test_vi_neg(a2, true);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], true);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], true);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], false);
+        errn += verify("test_2ci_neg: a2", i, a2[i], false);
+      }
+      test_2vi_neg(a1, a2, true, true);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], true);
+        errn += verify("test_2vi_neg: a2", i, a2[i], true);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], false);
+      }
+      test_vi_oppos(a2, true);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], true);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], true);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], false);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], false);
+      }
+      test_2vi_oppos(a1, a2, true, true);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], true);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], true);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], false);
+      }
+      test_vi_off(a2, true);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], true);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], true);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], false);
+        errn += verify("test_2ci_off: a2", i, a2[i], false);
+      }
+      test_2vi_off(a1, a2, true, true);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], true);
+        errn += verify("test_2vi_off: a2", i, a2[i], true);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], false);
+        errn += verify("test_2vi_off: a2", i, a2[i], false);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], false);
+      }
+      test_vi_inv(a2, true, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], true);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], true);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], false);
+        errn += verify("test_2ci_inv: a2", i, a2[i], false);
+      }
+      test_2vi_inv(a1, a2, true, true, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], true);
+        errn += verify("test_2vi_inv: a2", i, a2[i], true);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], false);
+        errn += verify("test_2vi_inv: a2", i, a2[i], false);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = true;
+        a2[i] = false;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        boolean val = (i%SCALE != 0);
+        errn += verify("test_ci_scl: a1", i, a1[i], val);
+      }
+      test_vi_scl(a2, true);
+      for (int i=0; i<ARRLEN; i++) {
+        boolean val = (i%SCALE == 0);
+        errn += verify("test_vi_scl: a2", i, a2[i], val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_scl: a1", i, a1[i], true);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], true);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], false);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], false);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], false);
+        }
+      }
+      test_2vi_scl(a1, a2, false, true);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], true);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], false);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], false);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], true);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_vi(a2, true);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], false);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], true);
+      }
+      test_vi(a2, false);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], false);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], true);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], false);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], false);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], false);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], false);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_2vi_aln(a1, a2, true, true);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], true);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], false);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], false);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], true);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_vi(a2, true);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], false);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], true);
+      }
+      test_vi(a2, false);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], false);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], true);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], false);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], false);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], false);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], false);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+        a2[i] = false;
+      }
+      test_2vi_unaln(a1, a2, true, true);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], true);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], false);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], false);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], true);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (i > 0);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = false;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        boolean v = (i%ALIGN_OFF > 0);
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = false;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], false);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        boolean v = (i%ALIGN_OFF > 0);
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], false);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], false);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+      }
+      test_2vi_aln(a1, a1, true, true);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], true);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], true);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (i > 0);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = false;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        boolean v = (i%UNALIGN_OFF > 0);
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = false;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], false);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        boolean v = (i%UNALIGN_OFF > 0);
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], false);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], false);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = false;
+      }
+      test_2vi_unaln(a1, a1, true, true);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], true);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], true);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, true, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, true, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, true, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, true, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, true, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, true, true, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, true, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, true, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, true, true);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(boolean[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = false;
+    }
+  }
+  static void test_vi(boolean[] a, boolean b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = false;
+      b[i] = false;
+    }
+  }
+  static void test_2vi(boolean[] a, boolean[] b, boolean c, boolean d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(boolean[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = false;
+    }
+  }
+  static void test_vi_neg(boolean[] a, boolean b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(boolean[] a, boolean[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(boolean[] a, boolean[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = false;
+      b[i] = false;
+    }
+  }
+  static void test_2vi_neg(boolean[] a, boolean[] b, boolean c, boolean d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(boolean[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = false;
+    }
+  }
+  static void test_vi_oppos(boolean[] a, boolean b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(boolean[] a, boolean[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(boolean[] a, boolean[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = false;
+      b[i] = false;
+    }
+  }
+  static void test_2vi_oppos(boolean[] a, boolean[] b, boolean c, boolean d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(boolean[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = false;
+    }
+  }
+  static void test_vi_off(boolean[] a, boolean b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = false;
+      b[i+OFFSET] = false;
+    }
+  }
+  static void test_2vi_off(boolean[] a, boolean[] b, boolean c, boolean d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(boolean[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = false;
+    }
+  }
+  static void test_vi_inv(boolean[] a, boolean b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(boolean[] a, boolean[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(boolean[] a, boolean[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = false;
+      b[i+k] = false;
+    }
+  }
+  static void test_2vi_inv(boolean[] a, boolean[] b, boolean c, boolean d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(boolean[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = false;
+    }
+  }
+  static void test_vi_scl(boolean[] a, boolean b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(boolean[] a, boolean[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(boolean[] a, boolean[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = false;
+      b[i*SCALE] = false;
+    }
+  }
+  static void test_2vi_scl(boolean[] a, boolean[] b, boolean c, boolean d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = false;
+      b[i] = false;
+    }
+  }
+  static void test_2vi_aln(boolean[] a, boolean[] b, boolean c, boolean d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(boolean[] a, boolean[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = false;
+      b[i] = false;
+    }
+  }
+  static void test_2vi_unaln(boolean[] a, boolean[] b, boolean c, boolean d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, boolean elem, boolean val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestByteDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteDoubleVect
+ */
+
+public class TestByteDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte + Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    double[] b1 = new double[ARRLEN];
+    double[] b2 = new double[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (byte)123, 103.);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (byte)123, 103.);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (byte)123, 103.);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (byte)123, 103.);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (byte)123, 103.);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.;
+      b2[i] = -1.;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.);
+      }
+      test_vi(a2, b2, (byte)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+      }
+      test_vi_neg(a2, b2, (byte)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+      }
+      test_vi_oppos(a2, b2, (byte)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_vi_aln(a1, b1, (byte)123, 103.);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (byte)123, 103.);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (double)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (double)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (byte)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (byte)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (byte)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (byte)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (byte)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(byte[] a, double[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi(byte[] a, double[] b, byte c, double d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(byte[] a, byte[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(byte[] a, double[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_neg(byte[] a, double[] b, byte c, double d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(byte[] a, byte[] b, double[] c, double[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(byte[] a, double[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_oppos(byte[] a, double[] b, byte c, double d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(byte[] a, byte[] b, double[] c, double[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(byte[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_aln(byte[] a, double[] b, byte c, double d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(byte[] a, byte[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(byte[] a, byte[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(byte[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_unaln(byte[] a, double[] b, byte c, double d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(byte[] a, byte[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(byte[] a, byte[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestByteFloatVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteFloatVect
+ */
+
+public class TestByteFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte + Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    float[] b1 = new float[ARRLEN];
+    float[] b2 = new float[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (byte)123, 103.f);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (byte)123, 103.f);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (byte)123, 103.f);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (byte)123, 103.f);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (byte)123, 103.f);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.f;
+      b2[i] = -1.f;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.f);
+      }
+      test_vi(a2, b2, (byte)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.f);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+      }
+      test_vi_neg(a2, b2, (byte)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+      }
+      test_vi_oppos(a2, b2, (byte)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_vi_aln(a1, b1, (byte)123, 103.f);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (byte)123, 103.f);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (float)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.f;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (float)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.f;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (byte)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (byte)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (byte)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (byte)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (byte)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(byte[] a, float[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi(byte[] a, float[] b, byte c, float d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(byte[] a, byte[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(byte[] a, float[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_neg(byte[] a, float[] b, byte c, float d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(byte[] a, byte[] b, float[] c, float[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(byte[] a, float[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_oppos(byte[] a, float[] b, byte c, float d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(byte[] a, byte[] b, float[] c, float[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(byte[] a, float[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_aln(byte[] a, float[] b, byte c, float d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(byte[] a, byte[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(byte[] a, byte[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(byte[] a, float[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_unaln(byte[] a, float[] b, byte c, float d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(byte[] a, byte[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(byte[] a, byte[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestByteIntVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteIntVect
+ */
+
+public class TestByteIntVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte + Integer vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    int[] b1 = new int[ARRLEN];
+    int[] b2 = new int[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (byte)123, (int)103);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (byte)123, (int)103);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (byte)123, (int)103);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (byte)123, (int)103);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (byte)123, (int)103);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1;
+      b2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci: b1", i, b1[i], (int)-103);
+      }
+      test_vi(a2, b2, (byte)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi: b2", i, b2[i], (int)103);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], (int)-103);
+      }
+      test_vi_neg(a2, b2, (byte)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], (int)103);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], (int)-103);
+      }
+      test_vi_oppos(a2, b2, (byte)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], (int)103);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)123);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (int)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (int)-1);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_aln(a1, b1, (byte)123, (int)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (int)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (int)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (byte)123, (int)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (int)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (int)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (int)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (int)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (byte)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (byte)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (byte)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (byte)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (byte)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(byte[] a, int[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi(byte[] a, int[] b, byte c, int d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(byte[] a, byte[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(byte[] a, int[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_neg(byte[] a, int[] b, byte c, int d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(byte[] a, byte[] b, int[] c, int[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(byte[] a, int[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_oppos(byte[] a, int[] b, byte c, int d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(byte[] a, byte[] b, int[] c, int[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(byte[] a, int[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_aln(byte[] a, int[] b, byte c, int d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(byte[] a, byte[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(byte[] a, byte[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(byte[] a, int[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_unaln(byte[] a, int[] b, byte c, int d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(byte[] a, byte[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(byte[] a, byte[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestByteLongVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteLongVect
+ */
+
+public class TestByteLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte + Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    long[] b1 = new long[ARRLEN];
+    long[] b2 = new long[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (byte)123, (long)103);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (byte)123, (long)103);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (byte)123, (long)103);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (byte)123, (long)103);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (byte)123, (long)103);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1;
+      b2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci: b1", i, b1[i], (long)-103);
+      }
+      test_vi(a2, b2, (byte)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi: b2", i, b2[i], (long)103);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], (long)-103);
+      }
+      test_vi_neg(a2, b2, (byte)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], (long)103);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], (long)-103);
+      }
+      test_vi_oppos(a2, b2, (byte)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], (long)103);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)123);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (long)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (long)-1);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_aln(a1, b1, (byte)123, (long)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (long)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (byte)123, (long)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (long)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (long)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (long)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (long)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (byte)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (byte)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (byte)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (byte)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (byte)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(byte[] a, long[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi(byte[] a, long[] b, byte c, long d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(byte[] a, byte[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(byte[] a, long[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_neg(byte[] a, long[] b, byte c, long d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(byte[] a, byte[] b, long[] c, long[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(byte[] a, long[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_oppos(byte[] a, long[] b, byte c, long d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(byte[] a, byte[] b, long[] c, long[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(byte[] a, long[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_aln(byte[] a, long[] b, byte c, long d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(byte[] a, byte[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(byte[] a, byte[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(byte[] a, long[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_unaln(byte[] a, long[] b, byte c, long d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(byte[] a, byte[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(byte[] a, byte[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestByteShortVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteShortVect
+ */
+
+public class TestByteShortVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte + Short vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    short[] b1 = new short[ARRLEN];
+    short[] b2 = new short[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (byte)123, (short)103);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (byte)123, (short)103);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (byte)123, (short)103);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (byte)123, (short)103);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (byte)123, (short)103);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1;
+      b2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci: b1", i, b1[i], (short)-103);
+      }
+      test_vi(a2, b2, (byte)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi: b2", i, b2[i], (short)103);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], (short)-103);
+      }
+      test_vi_neg(a2, b2, (byte)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], (short)103);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], (short)-103);
+      }
+      test_vi_oppos(a2, b2, (byte)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], (short)103);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)123);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (short)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (short)-1);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_aln(a1, b1, (byte)123, (short)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (short)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (short)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (byte)123, (short)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (short)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (short)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+        b1[i] = (short)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (short)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (byte)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (byte)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (byte)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (byte)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (byte)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(byte[] a, short[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi(byte[] a, short[] b, byte c, short d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(byte[] a, byte[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(byte[] a, short[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_neg(byte[] a, short[] b, byte c, short d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(byte[] a, byte[] b, short[] c, short[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(byte[] a, short[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_oppos(byte[] a, short[] b, byte c, short d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(byte[] a, byte[] b, short[] c, short[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(byte[] a, short[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_aln(byte[] a, short[] b, byte c, short d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(byte[] a, byte[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(byte[] a, byte[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(byte[] a, short[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_unaln(byte[] a, short[] b, byte c, short d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(byte[] a, byte[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(byte[] a, byte[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestByteVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteVect
+ */
+
+public class TestByteVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, (byte)123);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, (byte)123, (byte)103);
+      test_ci_neg(a1);
+      test_vi_neg(a2, (byte)123);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, (byte)123, (byte)103);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, (byte)123);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, (byte)123, (byte)103);
+      test_ci_off(a1);
+      test_vi_off(a2, (byte)123);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, (byte)123, (byte)103);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, (byte)123, OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, (byte)123, (byte)103, OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, (byte)123);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, (byte)123, (byte)103);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, (byte)123, (byte)103);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, (byte)123, (byte)103);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+      }
+      test_vi(a2, (byte)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (byte)123);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (byte)123);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], (byte)-123);
+        errn += verify("test_2ci: a2", i, a2[i], (byte)-103);
+      }
+      test_2vi(a1, a2, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], (byte)123);
+        errn += verify("test_2vi: a2", i, a2[i], (byte)103);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+      }
+      test_vi_neg(a2, (byte)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], (byte)-123);
+        errn += verify("test_2ci_neg: a2", i, a2[i], (byte)-103);
+      }
+      test_2vi_neg(a1, a2, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], (byte)123);
+        errn += verify("test_2vi_neg: a2", i, a2[i], (byte)103);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+      }
+      test_vi_oppos(a2, (byte)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], (byte)-123);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], (byte)-103);
+      }
+      test_2vi_oppos(a1, a2, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], (byte)123);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], (byte)103);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], (byte)-123);
+      }
+      test_vi_off(a2, (byte)123);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], (byte)123);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], (byte)123);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], (byte)-123);
+        errn += verify("test_2ci_off: a2", i, a2[i], (byte)-103);
+      }
+      test_2vi_off(a1, a2, (byte)123, (byte)103);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (byte)123);
+        errn += verify("test_2vi_off: a2", i, a2[i], (byte)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (byte)-1);
+        errn += verify("test_2vi_off: a2", i, a2[i], (byte)-1);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], (byte)-123);
+      }
+      test_vi_inv(a2, (byte)123, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], (byte)123);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], (byte)123);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], (byte)-123);
+        errn += verify("test_2ci_inv: a2", i, a2[i], (byte)-103);
+      }
+      test_2vi_inv(a1, a2, (byte)123, (byte)103, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (byte)123);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (byte)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (byte)-1);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (byte)-1);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : -123;
+        errn += verify("test_ci_scl: a1", i, a1[i], (byte)val);
+      }
+      test_vi_scl(a2, (byte)123);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_vi_scl: a2", i, a2[i], (byte)val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_cp_scl: a1", i, a1[i], (byte)val);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], (byte)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (byte)-123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], (byte)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (byte)-103);
+        }
+      }
+      test_2vi_scl(a1, a2, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], (byte)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (byte)123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], (byte)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (byte)103);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (byte)123);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+      }
+      test_vi(a2, (byte)-123);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (byte)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (byte)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_aln(a1, a2, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (byte)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (byte)123);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+      }
+      test_vi(a2, (byte)-123);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (byte)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (byte)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_unaln(a1, a2, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (byte)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (byte)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_aln(a1, a1, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (byte)103);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (byte)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (byte)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (byte)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_unaln(a1, a1, (byte)123, (byte)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (byte)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (byte)103);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, (byte)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, (byte)123, (byte)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, (byte)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, (byte)123, (byte)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, (byte)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, (byte)123, (byte)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, (byte)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, (byte)123, (byte)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, (byte)123, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, (byte)123, (byte)103, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, (byte)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, (byte)123, (byte)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, (byte)123, (byte)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, (byte)123, (byte)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(byte[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi(byte[] a, byte b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi(byte[] a, byte[] b, byte c, byte d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(byte[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi_neg(byte[] a, byte b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(byte[] a, byte[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(byte[] a, byte[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_neg(byte[] a, byte[] b, byte c, byte d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(byte[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+    }
+  }
+  static void test_vi_oppos(byte[] a, byte b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(byte[] a, byte[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(byte[] a, byte[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_oppos(byte[] a, byte[] b, byte c, byte d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(byte[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+    }
+  }
+  static void test_vi_off(byte[] a, byte b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+      b[i+OFFSET] = -103;
+    }
+  }
+  static void test_2vi_off(byte[] a, byte[] b, byte c, byte d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(byte[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+    }
+  }
+  static void test_vi_inv(byte[] a, byte b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(byte[] a, byte[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(byte[] a, byte[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+      b[i+k] = -103;
+    }
+  }
+  static void test_2vi_inv(byte[] a, byte[] b, byte c, byte d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(byte[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+    }
+  }
+  static void test_vi_scl(byte[] a, byte b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(byte[] a, byte[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(byte[] a, byte[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+      b[i*SCALE] = -103;
+    }
+  }
+  static void test_2vi_scl(byte[] a, byte[] b, byte c, byte d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_aln(byte[] a, byte[] b, byte c, byte d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(byte[] a, byte[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_unaln(byte[] a, byte[] b, byte c, byte d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestCharShortVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestCharShortVect
+ */
+
+public class TestCharShortVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Char + Short vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    char[] a1 = new char[ARRLEN];
+    char[] a2 = new char[ARRLEN];
+    short[] b1 = new short[ARRLEN];
+    short[] b2 = new short[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (char)123, (short)103);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (char)123, (short)103);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (char)123, (short)103);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (char)123, (short)103);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (char)123, (short)103);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (char)-1;
+      a2[i] = (char)-1;
+      b1[i] = (short)-1;
+      b2[i] = (short)-1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (char)-123);
+        errn += verify("test_ci: b1", i, b1[i], (short)-103);
+      }
+      test_vi(a2, b2, (char)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (char)123);
+        errn += verify("test_vi: b2", i, b2[i], (short)103);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (char)123);
+        errn += verify("test_cp: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+        b1[i] = (short)-1;
+        b2[i] = (short)-1;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (char)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], (short)-103);
+      }
+      test_vi_neg(a2, b2, (char)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (char)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], (short)103);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (char)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+        b1[i] = (short)-1;
+        b2[i] = (short)-1;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (char)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], (short)-103);
+      }
+      test_vi_oppos(a2, b2, (char)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (char)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], (short)103);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (char)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)123;
+        b1[i] = (short)-1;
+        b2[i] = (short)123;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (char)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (char)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = (char)-123;
+        b2[i] = (short)-123;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)123);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        b1[i] = (short)-1;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (char)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (short)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (short)-1);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        b1[i] = (short)-1;
+      }
+      test_vi_aln(a1, b1, (char)123, (short)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (char)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)123;
+        b1[i] = (short)-1;
+        b2[i] = (short)123;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (char)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (char)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = (char)-123;
+        b2[i] = (short)-123;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        b1[i] = (short)-1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (char)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (short)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (short)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        b1[i] = (short)-1;
+      }
+      test_vi_unaln(a1, b1, (char)123, (short)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (char)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (short)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (char)i;
+        b1[i] = (short)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        b1[i] = (short)-1;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (char)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (short)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = (char)-1;
+        b1[i+ALIGN_OFF] = (short)-1;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (char)i;
+        b1[i] = (short)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        b1[i] = (short)-1;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (char)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (short)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = (char)-1;
+        b1[i+UNALIGN_OFF] = (short)-1;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (char)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (char)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (char)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (char)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (char)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(char[] a, short[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = (char)-123;
+      b[i] = (short)-103;
+    }
+  }
+  static void test_vi(char[] a, short[] b, char c, short d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(char[] a, char[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(char[] a, short[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = (char)-123;
+      b[i] = (short)-103;
+    }
+  }
+  static void test_vi_neg(char[] a, short[] b, char c, short d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(char[] a, char[] b, short[] c, short[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(char[] a, short[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = (char)-123;
+      b[i] = (short)-103;
+    }
+  }
+  static void test_vi_oppos(char[] a, short[] b, char c, short d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(char[] a, char[] b, short[] c, short[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(char[] a, short[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = (char)-123;
+      b[i] = (short)-103;
+    }
+  }
+  static void test_vi_aln(char[] a, short[] b, char c, short d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(char[] a, char[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(char[] a, char[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(char[] a, short[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = (char)-123;
+      b[i] = (short)-103;
+    }
+  }
+  static void test_vi_unaln(char[] a, short[] b, char c, short d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(char[] a, char[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(char[] a, char[] b, short[] c, short[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, char elem, char val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestCharVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestCharVect
+ */
+
+public class TestCharVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Char vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    char[] a1 = new char[ARRLEN];
+    char[] a2 = new char[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, (char)123);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, (char)123, (char)103);
+      test_ci_neg(a1);
+      test_vi_neg(a2, (char)123);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, (char)123, (char)103);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, (char)123);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, (char)123, (char)103);
+      test_ci_off(a1);
+      test_vi_off(a2, (char)123);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, (char)123, (char)103);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, (char)123, OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, (char)123, (char)103, OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, (char)123);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, (char)123, (char)103);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, (char)123, (char)103);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, (char)123, (char)103);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (char)-1;
+      a2[i] = (char)-1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (char)-123);
+      }
+      test_vi(a2, (char)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (char)123);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (char)123);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], (char)-123);
+        errn += verify("test_2ci: a2", i, a2[i], (char)-103);
+      }
+      test_2vi(a1, a2, (char)123, (char)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], (char)123);
+        errn += verify("test_2vi: a2", i, a2[i], (char)103);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (char)-123);
+      }
+      test_vi_neg(a2, (char)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (char)123);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (char)123);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], (char)-123);
+        errn += verify("test_2ci_neg: a2", i, a2[i], (char)-103);
+      }
+      test_2vi_neg(a1, a2, (char)123, (char)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], (char)123);
+        errn += verify("test_2vi_neg: a2", i, a2[i], (char)103);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (char)-123);
+      }
+      test_vi_oppos(a2, (char)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (char)123);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (char)123);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], (char)-123);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], (char)-103);
+      }
+      test_2vi_oppos(a1, a2, (char)123, (char)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], (char)123);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], (char)103);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], (char)-123);
+      }
+      test_vi_off(a2, (char)123);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], (char)123);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], (char)123);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], (char)-123);
+        errn += verify("test_2ci_off: a2", i, a2[i], (char)-103);
+      }
+      test_2vi_off(a1, a2, (char)123, (char)103);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (char)123);
+        errn += verify("test_2vi_off: a2", i, a2[i], (char)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (char)-1);
+        errn += verify("test_2vi_off: a2", i, a2[i], (char)-1);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], (char)-123);
+      }
+      test_vi_inv(a2, (char)123, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], (char)123);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], (char)123);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], (char)-123);
+        errn += verify("test_2ci_inv: a2", i, a2[i], (char)-103);
+      }
+      test_2vi_inv(a1, a2, (char)123, (char)103, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (char)123);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (char)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (char)-1);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (char)-1);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : -123;
+        errn += verify("test_ci_scl: a1", i, a1[i], (char)val);
+      }
+      test_vi_scl(a2, (char)123);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_vi_scl: a2", i, a2[i], (char)val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_cp_scl: a1", i, a1[i], (char)val);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], (char)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (char)-123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], (char)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (char)-103);
+        }
+      }
+      test_2vi_scl(a1, a2, (char)123, (char)103);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], (char)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (char)123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], (char)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (char)103);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_vi(a2, (char)123);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (char)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (char)123);
+      }
+      test_vi(a2, (char)-123);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (char)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (char)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (char)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_2vi_aln(a1, a2, (char)123, (char)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (char)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (char)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (char)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_vi(a2, (char)123);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (char)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (char)123);
+      }
+      test_vi(a2, (char)-123);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (char)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (char)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (char)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+        a2[i] = (char)-1;
+      }
+      test_2vi_unaln(a1, a2, (char)123, (char)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (char)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (char)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (char)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (char)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (char)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (char)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = (char)-1;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (char)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (char)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+      }
+      test_2vi_aln(a1, a1, (char)123, (char)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (char)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (char)103);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (char)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (char)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = (char)-1;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (char)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (char)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = (char)-1;
+      }
+      test_2vi_unaln(a1, a1, (char)123, (char)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (char)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (char)103);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, (char)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, (char)123, (char)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, (char)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, (char)123, (char)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, (char)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, (char)123, (char)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, (char)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, (char)123, (char)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, (char)123, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, (char)123, (char)103, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, (char)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, (char)123, (char)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, (char)123, (char)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, (char)123, (char)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(char[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = (char)-123;
+    }
+  }
+  static void test_vi(char[] a, char b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(char[] a, char[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(char[] a, char[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = (char)-123;
+      b[i] = (char)-103;
+    }
+  }
+  static void test_2vi(char[] a, char[] b, char c, char d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(char[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = (char)-123;
+    }
+  }
+  static void test_vi_neg(char[] a, char b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(char[] a, char[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(char[] a, char[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = (char)-123;
+      b[i] = (char)-103;
+    }
+  }
+  static void test_2vi_neg(char[] a, char[] b, char c, char d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(char[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = (char)-123;
+    }
+  }
+  static void test_vi_oppos(char[] a, char b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(char[] a, char[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(char[] a, char[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = (char)-123;
+      b[i] = (char)-103;
+    }
+  }
+  static void test_2vi_oppos(char[] a, char[] b, char c, char d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(char[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = (char)-123;
+    }
+  }
+  static void test_vi_off(char[] a, char b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(char[] a, char[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(char[] a, char[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = (char)-123;
+      b[i+OFFSET] = (char)-103;
+    }
+  }
+  static void test_2vi_off(char[] a, char[] b, char c, char d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(char[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = (char)-123;
+    }
+  }
+  static void test_vi_inv(char[] a, char b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(char[] a, char[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(char[] a, char[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = (char)-123;
+      b[i+k] = (char)-103;
+    }
+  }
+  static void test_2vi_inv(char[] a, char[] b, char c, char d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(char[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = (char)-123;
+    }
+  }
+  static void test_vi_scl(char[] a, char b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(char[] a, char[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(char[] a, char[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = (char)-123;
+      b[i*SCALE] = (char)-103;
+    }
+  }
+  static void test_2vi_scl(char[] a, char[] b, char c, char d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(char[] a, char[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(char[] a, char[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(char[] a, char[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = (char)-123;
+      b[i] = (char)-103;
+    }
+  }
+  static void test_2vi_aln(char[] a, char[] b, char c, char d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(char[] a, char[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(char[] a, char[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(char[] a, char[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = (char)-123;
+      b[i] = (char)-103;
+    }
+  }
+  static void test_2vi_unaln(char[] a, char[] b, char c, char d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, char elem, char val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestDoubleVect
+ */
+
+public class TestDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    double[] a1 = new double[ARRLEN];
+    double[] a2 = new double[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, 123.);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, 123., 103.);
+      test_ci_neg(a1);
+      test_vi_neg(a2, 123.);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, 123., 103.);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, 123.);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, 123., 103.);
+      test_ci_off(a1);
+      test_vi_off(a2, 123.);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, 123., 103.);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, 123., OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, 123., 103., OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, 123.);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, 123., 103.);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, 123., 103.);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, 123., 103.);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], -123.);
+      }
+      test_vi(a2, 123.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], 123.);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], 123.);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], -123.);
+        errn += verify("test_2ci: a2", i, a2[i], -103.);
+      }
+      test_2vi(a1, a2, 123., 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], 123.);
+        errn += verify("test_2vi: a2", i, a2[i], 103.);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], -123.);
+      }
+      test_vi_neg(a2, 123.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], 123.);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], 123.);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], -123.);
+        errn += verify("test_2ci_neg: a2", i, a2[i], -103.);
+      }
+      test_2vi_neg(a1, a2, 123., 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], 123.);
+        errn += verify("test_2vi_neg: a2", i, a2[i], 103.);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], -123.);
+      }
+      test_vi_oppos(a2, 123.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], 123.);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], 123.);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], -123.);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], -103.);
+      }
+      test_2vi_oppos(a1, a2, 123., 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], 123.);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], 103.);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], -123.);
+      }
+      test_vi_off(a2, 123.);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], 123.);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], 123.);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], -123.);
+        errn += verify("test_2ci_off: a2", i, a2[i], -103.);
+      }
+      test_2vi_off(a1, a2, 123., 103.);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], 123.);
+        errn += verify("test_2vi_off: a2", i, a2[i], 103.);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], -1.);
+        errn += verify("test_2vi_off: a2", i, a2[i], -1.);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], -123.);
+      }
+      test_vi_inv(a2, 123., OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], 123.);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], 123.);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], -123.);
+        errn += verify("test_2ci_inv: a2", i, a2[i], -103.);
+      }
+      test_2vi_inv(a1, a2, 123., 103., OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], 123.);
+        errn += verify("test_2vi_inv: a2", i, a2[i], 103.);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], -1.);
+        errn += verify("test_2vi_inv: a2", i, a2[i], -1.);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : -123;
+        errn += verify("test_ci_scl: a1", i, a1[i], (double)val);
+      }
+      test_vi_scl(a2, 123.);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_vi_scl: a2", i, a2[i], (double)val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_cp_scl: a1", i, a1[i], (double)val);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], -1.);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], -123.);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], -1.);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], -103.);
+        }
+      }
+      test_2vi_scl(a1, a2, 123., 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], -1.);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], 123.);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], -1.);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], 103.);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, 123.);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], 123.);
+      }
+      test_vi(a2, -123.);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], -123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], -123.);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], -103.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], -1.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_aln(a1, a2, 123., 103.);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], 123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], -1.);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, 123.);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], 123.);
+      }
+      test_vi(a2, -123.);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], -123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], -123.);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], -103.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], -1.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_unaln(a1, a2, 123., 103.);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], 123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], -1.);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], 103.);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (double)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (double)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (double)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -103.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_aln(a1, a1, 123., 103.);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 103.);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (double)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (double)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (double)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -103.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_unaln(a1, a1, 123., 103.);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 103.);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, 123.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, 123., 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, 123.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, 123., 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, 123.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, 123., 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, 123.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, 123., 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, 123., OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, 123., 103., OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, 123.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, 123., 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, 123., 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, 123., 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(double[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123.;
+    }
+  }
+  static void test_vi(double[] a, double b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(double[] a, double[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(double[] a, double[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123.;
+      b[i] = -103.;
+    }
+  }
+  static void test_2vi(double[] a, double[] b, double c, double d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(double[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123.;
+    }
+  }
+  static void test_vi_neg(double[] a, double b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(double[] a, double[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(double[] a, double[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123.;
+      b[i] = -103.;
+    }
+  }
+  static void test_2vi_neg(double[] a, double[] b, double c, double d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(double[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123.;
+    }
+  }
+  static void test_vi_oppos(double[] a, double b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(double[] a, double[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(double[] a, double[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123.;
+      b[i] = -103.;
+    }
+  }
+  static void test_2vi_oppos(double[] a, double[] b, double c, double d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(double[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123.;
+    }
+  }
+  static void test_vi_off(double[] a, double b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(double[] a, double[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(double[] a, double[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123.;
+      b[i+OFFSET] = -103.;
+    }
+  }
+  static void test_2vi_off(double[] a, double[] b, double c, double d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(double[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123.;
+    }
+  }
+  static void test_vi_inv(double[] a, double b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(double[] a, double[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(double[] a, double[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123.;
+      b[i+k] = -103.;
+    }
+  }
+  static void test_2vi_inv(double[] a, double[] b, double c, double d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(double[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123.;
+    }
+  }
+  static void test_vi_scl(double[] a, double b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(double[] a, double[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(double[] a, double[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123.;
+      b[i*SCALE] = -103.;
+    }
+  }
+  static void test_2vi_scl(double[] a, double[] b, double c, double d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(double[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(double[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(double[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123.;
+      b[i] = -103.;
+    }
+  }
+  static void test_2vi_aln(double[] a, double[] b, double c, double d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(double[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(double[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(double[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123.;
+      b[i] = -103.;
+    }
+  }
+  static void test_2vi_unaln(double[] a, double[] b, double c, double d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestFloatDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestFloatDoubleVect
+ */
+
+public class TestFloatDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Float + Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    float[] a1 = new float[ARRLEN];
+    float[] a2 = new float[ARRLEN];
+    double[] b1 = new double[ARRLEN];
+    double[] b2 = new double[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, 123.f, 103.);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, 123.f, 103.);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, 123.f, 103.);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, 123.f, 103.);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, 123.f, 103.);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.;
+      b2[i] = -1.;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], -123.f);
+        errn += verify("test_ci: b1", i, b1[i], -103.);
+      }
+      test_vi(a2, b2, 123.f, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], 123.f);
+        errn += verify("test_vi: b2", i, b2[i], 103.);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], 123.f);
+        errn += verify("test_cp: b1", i, b1[i], 103.);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], -123.f);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+      }
+      test_vi_neg(a2, b2, 123.f, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], 123.f);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], 123.f);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], -123.f);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+      }
+      test_vi_oppos(a2, b2, 123.f, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], 123.f);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], 123.f);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], -1.f);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], 123.f);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], -123.f);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], 123.f);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], -123.f);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_vi_aln(a1, b1, 123.f, 103.);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], 123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], -1.f);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], -1.f);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], 123.f);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], -123.f);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], 123.f);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], -123.f);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, 123.f, 103.);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], 123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], -1.f);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (float)i;
+        b1[i] = (double)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (float)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], -1.f);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (float)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (float)i;
+        b1[i] = (double)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (float)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], -1.f);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (float)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, 123.f, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, 123.f, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, 123.f, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, 123.f, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, 123.f, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(float[] a, double[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123.f;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi(float[] a, double[] b, float c, double d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(float[] a, float[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(float[] a, double[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123.f;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_neg(float[] a, double[] b, float c, double d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(float[] a, float[] b, double[] c, double[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(float[] a, double[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123.f;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_oppos(float[] a, double[] b, float c, double d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(float[] a, float[] b, double[] c, double[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(float[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123.f;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_aln(float[] a, double[] b, float c, double d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(float[] a, float[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(float[] a, float[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(float[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123.f;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_unaln(float[] a, double[] b, float c, double d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(float[] a, float[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(float[] a, float[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestFloatVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestFloatVect
+ */
+
+public class TestFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    float[] a1 = new float[ARRLEN];
+    float[] a2 = new float[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, 123.f);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, 123.f, 103.f);
+      test_ci_neg(a1);
+      test_vi_neg(a2, 123.f);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, 123.f, 103.f);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, 123.f);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, 123.f, 103.f);
+      test_ci_off(a1);
+      test_vi_off(a2, 123.f);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, 123.f, 103.f);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, 123.f, OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, 123.f, 103.f, OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, 123.f);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, 123.f, 103.f);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, 123.f, 103.f);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, 123.f, 103.f);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], -123.f);
+      }
+      test_vi(a2, 123.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], 123.f);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], 123.f);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], -123.f);
+        errn += verify("test_2ci: a2", i, a2[i], -103.f);
+      }
+      test_2vi(a1, a2, 123.f, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], 123.f);
+        errn += verify("test_2vi: a2", i, a2[i], 103.f);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], -123.f);
+      }
+      test_vi_neg(a2, 123.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], 123.f);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], 123.f);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], -123.f);
+        errn += verify("test_2ci_neg: a2", i, a2[i], -103.f);
+      }
+      test_2vi_neg(a1, a2, 123.f, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], 123.f);
+        errn += verify("test_2vi_neg: a2", i, a2[i], 103.f);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], -123.f);
+      }
+      test_vi_oppos(a2, 123.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], 123.f);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], 123.f);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], -123.f);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], -103.f);
+      }
+      test_2vi_oppos(a1, a2, 123.f, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], 123.f);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], 103.f);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], -123.f);
+      }
+      test_vi_off(a2, 123.f);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], 123.f);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], 123.f);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], -123.f);
+        errn += verify("test_2ci_off: a2", i, a2[i], -103.f);
+      }
+      test_2vi_off(a1, a2, 123.f, 103.f);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], 123.f);
+        errn += verify("test_2vi_off: a2", i, a2[i], 103.f);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], -1.f);
+        errn += verify("test_2vi_off: a2", i, a2[i], -1.f);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], -123.f);
+      }
+      test_vi_inv(a2, 123.f, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], 123.f);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], 123.f);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], -123.f);
+        errn += verify("test_2ci_inv: a2", i, a2[i], -103.f);
+      }
+      test_2vi_inv(a1, a2, 123.f, 103.f, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], 123.f);
+        errn += verify("test_2vi_inv: a2", i, a2[i], 103.f);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], -1.f);
+        errn += verify("test_2vi_inv: a2", i, a2[i], -1.f);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : -123;
+        errn += verify("test_ci_scl: a1", i, a1[i], (float)val);
+      }
+      test_vi_scl(a2, 123.f);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_vi_scl: a2", i, a2[i], (float)val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_cp_scl: a1", i, a1[i], (float)val);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], -1.f);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], -123.f);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], -1.f);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], -103.f);
+        }
+      }
+      test_2vi_scl(a1, a2, 123.f, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], -1.f);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], 123.f);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], -1.f);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], 103.f);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, 123.f);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], 123.f);
+      }
+      test_vi(a2, -123.f);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], -123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], -123.f);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], -103.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], -1.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_aln(a1, a2, 123.f, 103.f);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], 123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], -1.f);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, 123.f);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], 123.f);
+      }
+      test_vi(a2, -123.f);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], -123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], -123.f);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], -103.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], -1.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_unaln(a1, a2, 123.f, 103.f);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], 123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], -1.f);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], 103.f);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (float)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (float)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (float)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -103.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_aln(a1, a1, 123.f, 103.f);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 103.f);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (float)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (float)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (float)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -103.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_unaln(a1, a1, 123.f, 103.f);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 103.f);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, 123.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, 123.f, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, 123.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, 123.f, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, 123.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, 123.f, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, 123.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, 123.f, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, 123.f, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, 123.f, 103.f, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, 123.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, 123.f, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, 123.f, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, 123.f, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(float[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123.f;
+    }
+  }
+  static void test_vi(float[] a, float b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(float[] a, float[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(float[] a, float[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123.f;
+      b[i] = -103.f;
+    }
+  }
+  static void test_2vi(float[] a, float[] b, float c, float d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(float[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123.f;
+    }
+  }
+  static void test_vi_neg(float[] a, float b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(float[] a, float[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(float[] a, float[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123.f;
+      b[i] = -103.f;
+    }
+  }
+  static void test_2vi_neg(float[] a, float[] b, float c, float d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(float[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123.f;
+    }
+  }
+  static void test_vi_oppos(float[] a, float b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(float[] a, float[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(float[] a, float[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123.f;
+      b[i] = -103.f;
+    }
+  }
+  static void test_2vi_oppos(float[] a, float[] b, float c, float d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(float[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123.f;
+    }
+  }
+  static void test_vi_off(float[] a, float b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(float[] a, float[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(float[] a, float[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123.f;
+      b[i+OFFSET] = -103.f;
+    }
+  }
+  static void test_2vi_off(float[] a, float[] b, float c, float d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(float[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123.f;
+    }
+  }
+  static void test_vi_inv(float[] a, float b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(float[] a, float[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(float[] a, float[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123.f;
+      b[i+k] = -103.f;
+    }
+  }
+  static void test_2vi_inv(float[] a, float[] b, float c, float d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(float[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123.f;
+    }
+  }
+  static void test_vi_scl(float[] a, float b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(float[] a, float[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(float[] a, float[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123.f;
+      b[i*SCALE] = -103.f;
+    }
+  }
+  static void test_2vi_scl(float[] a, float[] b, float c, float d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(float[] a, float[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(float[] a, float[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(float[] a, float[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123.f;
+      b[i] = -103.f;
+    }
+  }
+  static void test_2vi_aln(float[] a, float[] b, float c, float d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(float[] a, float[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(float[] a, float[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(float[] a, float[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123.f;
+      b[i] = -103.f;
+    }
+  }
+  static void test_2vi_unaln(float[] a, float[] b, float c, float d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestIntDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntDoubleVect
+ */
+
+public class TestIntDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Integer + Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a1 = new int[ARRLEN];
+    int[] a2 = new int[ARRLEN];
+    double[] b1 = new double[ARRLEN];
+    double[] b2 = new double[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (int)123, 103.);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (int)123, 103.);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (int)123, 103.);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (int)123, 103.);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (int)123, 103.);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.;
+      b2[i] = -1.;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.);
+      }
+      test_vi(a2, b2, (int)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (int)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (int)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+      }
+      test_vi_neg(a2, b2, (int)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+      }
+      test_vi_oppos(a2, b2, (int)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_vi_aln(a1, b1, (int)123, 103.);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (int)123, 103.);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (int)i;
+        b1[i] = (double)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (int)i;
+        b1[i] = (double)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (int)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (int)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (int)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (int)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (int)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(int[] a, double[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi(int[] a, double[] b, int c, double d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(int[] a, int[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(int[] a, double[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_neg(int[] a, double[] b, int c, double d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(int[] a, int[] b, double[] c, double[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(int[] a, double[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_oppos(int[] a, double[] b, int c, double d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(int[] a, int[] b, double[] c, double[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(int[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_aln(int[] a, double[] b, int c, double d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(int[] a, int[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(int[] a, int[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(int[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_unaln(int[] a, double[] b, int c, double d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(int[] a, int[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(int[] a, int[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestIntFloatVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntFloatVect
+ */
+
+public class TestIntFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Integer + Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a1 = new int[ARRLEN];
+    int[] a2 = new int[ARRLEN];
+    float[] b1 = new float[ARRLEN];
+    float[] b2 = new float[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (int)123, 103.f);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (int)123, 103.f);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (int)123, 103.f);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (int)123, 103.f);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (int)123, 103.f);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.f;
+      b2[i] = -1.f;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.f);
+      }
+      test_vi(a2, b2, (int)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (int)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.f);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (int)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+      }
+      test_vi_neg(a2, b2, (int)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+      }
+      test_vi_oppos(a2, b2, (int)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_vi_aln(a1, b1, (int)123, 103.f);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (int)123, 103.f);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (int)i;
+        b1[i] = (float)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.f;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (int)i;
+        b1[i] = (float)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.f;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (int)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (int)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (int)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (int)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (int)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(int[] a, float[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi(int[] a, float[] b, int c, float d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(int[] a, int[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(int[] a, float[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_neg(int[] a, float[] b, int c, float d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(int[] a, int[] b, float[] c, float[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(int[] a, float[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_oppos(int[] a, float[] b, int c, float d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(int[] a, int[] b, float[] c, float[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(int[] a, float[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_aln(int[] a, float[] b, int c, float d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(int[] a, int[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(int[] a, int[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(int[] a, float[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_unaln(int[] a, float[] b, int c, float d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(int[] a, int[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(int[] a, int[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestIntLongVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntLongVect
+ */
+
+public class TestIntLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Integer + Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a1 = new int[ARRLEN];
+    int[] a2 = new int[ARRLEN];
+    long[] b1 = new long[ARRLEN];
+    long[] b2 = new long[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (int)123, (long)103);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (int)123, (long)103);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (int)123, (long)103);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (int)123, (long)103);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (int)123, (long)103);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1;
+      b2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci: b1", i, b1[i], (long)-103);
+      }
+      test_vi(a2, b2, (int)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (int)123);
+        errn += verify("test_vi: b2", i, b2[i], (long)103);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (int)123);
+        errn += verify("test_cp: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], (long)-103);
+      }
+      test_vi_neg(a2, b2, (int)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], (long)103);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], (long)-103);
+      }
+      test_vi_oppos(a2, b2, (int)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], (long)103);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)123);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (long)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (long)-1);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_aln(a1, b1, (int)123, (long)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (long)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (int)123, (long)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (int)i;
+        b1[i] = (long)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (long)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (int)i;
+        b1[i] = (long)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (long)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (int)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (int)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (int)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (int)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (int)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(int[] a, long[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi(int[] a, long[] b, int c, long d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(int[] a, int[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(int[] a, long[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_neg(int[] a, long[] b, int c, long d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(int[] a, int[] b, long[] c, long[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(int[] a, long[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_oppos(int[] a, long[] b, int c, long d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(int[] a, int[] b, long[] c, long[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(int[] a, long[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_aln(int[] a, long[] b, int c, long d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(int[] a, int[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(int[] a, int[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(int[] a, long[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_unaln(int[] a, long[] b, int c, long d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(int[] a, int[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(int[] a, int[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestIntVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntVect
+ */
+
+public class TestIntVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Integer vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a1 = new int[ARRLEN];
+    int[] a2 = new int[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, (int)123);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, (int)123, (int)103);
+      test_ci_neg(a1);
+      test_vi_neg(a2, (int)123);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, (int)123, (int)103);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, (int)123);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, (int)123, (int)103);
+      test_ci_off(a1);
+      test_vi_off(a2, (int)123);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, (int)123, (int)103);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, (int)123, OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, (int)123, (int)103, OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, (int)123);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, (int)123, (int)103);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, (int)123, (int)103);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, (int)123, (int)103);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (int)-123);
+      }
+      test_vi(a2, (int)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (int)123);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (int)123);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], (int)-123);
+        errn += verify("test_2ci: a2", i, a2[i], (int)-103);
+      }
+      test_2vi(a1, a2, (int)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], (int)123);
+        errn += verify("test_2vi: a2", i, a2[i], (int)103);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+      }
+      test_vi_neg(a2, (int)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], (int)-123);
+        errn += verify("test_2ci_neg: a2", i, a2[i], (int)-103);
+      }
+      test_2vi_neg(a1, a2, (int)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], (int)123);
+        errn += verify("test_2vi_neg: a2", i, a2[i], (int)103);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+      }
+      test_vi_oppos(a2, (int)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], (int)-123);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], (int)-103);
+      }
+      test_2vi_oppos(a1, a2, (int)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], (int)123);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], (int)103);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], (int)-123);
+      }
+      test_vi_off(a2, (int)123);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], (int)123);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], (int)123);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], (int)-123);
+        errn += verify("test_2ci_off: a2", i, a2[i], (int)-103);
+      }
+      test_2vi_off(a1, a2, (int)123, (int)103);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (int)123);
+        errn += verify("test_2vi_off: a2", i, a2[i], (int)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (int)-1);
+        errn += verify("test_2vi_off: a2", i, a2[i], (int)-1);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], (int)-123);
+      }
+      test_vi_inv(a2, (int)123, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], (int)123);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], (int)123);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], (int)-123);
+        errn += verify("test_2ci_inv: a2", i, a2[i], (int)-103);
+      }
+      test_2vi_inv(a1, a2, (int)123, (int)103, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (int)123);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (int)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (int)-1);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (int)-1);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : -123;
+        errn += verify("test_ci_scl: a1", i, a1[i], (int)val);
+      }
+      test_vi_scl(a2, (int)123);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_vi_scl: a2", i, a2[i], (int)val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_cp_scl: a1", i, a1[i], (int)val);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], (int)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (int)-123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], (int)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (int)-103);
+        }
+      }
+      test_2vi_scl(a1, a2, (int)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], (int)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (int)123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], (int)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (int)103);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (int)123);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+      }
+      test_vi(a2, (int)-123);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (int)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (int)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_aln(a1, a2, (int)123, (int)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (int)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (int)123);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+      }
+      test_vi(a2, (int)-123);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (int)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (int)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_unaln(a1, a2, (int)123, (int)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (int)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (int)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (int)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (int)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_aln(a1, a1, (int)123, (int)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (int)103);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (int)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (int)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (int)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_unaln(a1, a1, (int)123, (int)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (int)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (int)103);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, (int)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, (int)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, (int)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, (int)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, (int)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, (int)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, (int)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, (int)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, (int)123, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, (int)123, (int)103, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, (int)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, (int)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, (int)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, (int)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(int[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi(int[] a, int b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(int[] a, int[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(int[] a, int[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi(int[] a, int[] b, int c, int d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(int[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi_neg(int[] a, int b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(int[] a, int[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(int[] a, int[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_neg(int[] a, int[] b, int c, int d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(int[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+    }
+  }
+  static void test_vi_oppos(int[] a, int b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(int[] a, int[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(int[] a, int[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_oppos(int[] a, int[] b, int c, int d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(int[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+    }
+  }
+  static void test_vi_off(int[] a, int b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(int[] a, int[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(int[] a, int[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+      b[i+OFFSET] = -103;
+    }
+  }
+  static void test_2vi_off(int[] a, int[] b, int c, int d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(int[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+    }
+  }
+  static void test_vi_inv(int[] a, int b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(int[] a, int[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(int[] a, int[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+      b[i+k] = -103;
+    }
+  }
+  static void test_2vi_inv(int[] a, int[] b, int c, int d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(int[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+    }
+  }
+  static void test_vi_scl(int[] a, int b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(int[] a, int[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(int[] a, int[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+      b[i*SCALE] = -103;
+    }
+  }
+  static void test_2vi_scl(int[] a, int[] b, int c, int d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(int[] a, int[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(int[] a, int[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(int[] a, int[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_aln(int[] a, int[] b, int c, int d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(int[] a, int[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(int[] a, int[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(int[] a, int[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_unaln(int[] a, int[] b, int c, int d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestLongDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongDoubleVect
+ */
+
+public class TestLongDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Long + Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    long[] a1 = new long[ARRLEN];
+    long[] a2 = new long[ARRLEN];
+    double[] b1 = new double[ARRLEN];
+    double[] b2 = new double[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (long)123, 103.);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (long)123, 103.);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (long)123, 103.);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (long)123, 103.);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (long)123, 103.);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.;
+      b2[i] = -1.;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (long)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.);
+      }
+      test_vi(a2, b2, (long)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (long)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (long)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (long)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+      }
+      test_vi_neg(a2, b2, (long)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (long)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (long)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+      }
+      test_vi_oppos(a2, b2, (long)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (long)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_vi_aln(a1, b1, (long)123, 103.);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (long)123, 103.);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (long)i;
+        b1[i] = (double)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (long)i;
+        b1[i] = (double)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (long)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (long)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (long)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (long)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (long)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(long[] a, double[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi(long[] a, double[] b, long c, double d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(long[] a, long[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(long[] a, double[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_neg(long[] a, double[] b, long c, double d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(long[] a, long[] b, double[] c, double[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(long[] a, double[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_oppos(long[] a, double[] b, long c, double d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(long[] a, long[] b, double[] c, double[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(long[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_aln(long[] a, double[] b, long c, double d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(long[] a, long[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(long[] a, long[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(long[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_unaln(long[] a, double[] b, long c, double d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(long[] a, long[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(long[] a, long[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestLongFloatVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongFloatVect
+ */
+
+public class TestLongFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Long + Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    long[] a1 = new long[ARRLEN];
+    long[] a2 = new long[ARRLEN];
+    float[] b1 = new float[ARRLEN];
+    float[] b2 = new float[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (long)123, 103.f);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (long)123, 103.f);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (long)123, 103.f);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (long)123, 103.f);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (long)123, 103.f);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.f;
+      b2[i] = -1.f;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (long)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.f);
+      }
+      test_vi(a2, b2, (long)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (long)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.f);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (long)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (long)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+      }
+      test_vi_neg(a2, b2, (long)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (long)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (long)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+      }
+      test_vi_oppos(a2, b2, (long)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (long)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_vi_aln(a1, b1, (long)123, 103.f);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (long)123, 103.f);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (long)i;
+        b1[i] = (float)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.f;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (long)i;
+        b1[i] = (float)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.f;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (long)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (long)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (long)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (long)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (long)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(long[] a, float[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi(long[] a, float[] b, long c, float d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(long[] a, long[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(long[] a, float[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_neg(long[] a, float[] b, long c, float d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(long[] a, long[] b, float[] c, float[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(long[] a, float[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_oppos(long[] a, float[] b, long c, float d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(long[] a, long[] b, float[] c, float[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(long[] a, float[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_aln(long[] a, float[] b, long c, float d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(long[] a, long[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(long[] a, long[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(long[] a, float[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_unaln(long[] a, float[] b, long c, float d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(long[] a, long[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(long[] a, long[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestLongVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongVect
+ */
+
+public class TestLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    long[] a1 = new long[ARRLEN];
+    long[] a2 = new long[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, (long)123);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, (long)123, (long)103);
+      test_ci_neg(a1);
+      test_vi_neg(a2, (long)123);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, (long)123, (long)103);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, (long)123);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, (long)123, (long)103);
+      test_ci_off(a1);
+      test_vi_off(a2, (long)123);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, (long)123, (long)103);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, (long)123, OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, (long)123, (long)103, OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, (long)123);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, (long)123, (long)103);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, (long)123, (long)103);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, (long)123, (long)103);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (long)-123);
+      }
+      test_vi(a2, (long)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (long)123);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (long)123);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], (long)-123);
+        errn += verify("test_2ci: a2", i, a2[i], (long)-103);
+      }
+      test_2vi(a1, a2, (long)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], (long)123);
+        errn += verify("test_2vi: a2", i, a2[i], (long)103);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (long)-123);
+      }
+      test_vi_neg(a2, (long)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (long)123);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (long)123);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], (long)-123);
+        errn += verify("test_2ci_neg: a2", i, a2[i], (long)-103);
+      }
+      test_2vi_neg(a1, a2, (long)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], (long)123);
+        errn += verify("test_2vi_neg: a2", i, a2[i], (long)103);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (long)-123);
+      }
+      test_vi_oppos(a2, (long)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (long)123);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (long)123);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], (long)-123);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], (long)-103);
+      }
+      test_2vi_oppos(a1, a2, (long)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], (long)123);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], (long)103);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], (long)-123);
+      }
+      test_vi_off(a2, (long)123);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], (long)123);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], (long)123);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], (long)-123);
+        errn += verify("test_2ci_off: a2", i, a2[i], (long)-103);
+      }
+      test_2vi_off(a1, a2, (long)123, (long)103);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (long)123);
+        errn += verify("test_2vi_off: a2", i, a2[i], (long)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (long)-1);
+        errn += verify("test_2vi_off: a2", i, a2[i], (long)-1);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], (long)-123);
+      }
+      test_vi_inv(a2, (long)123, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], (long)123);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], (long)123);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], (long)-123);
+        errn += verify("test_2ci_inv: a2", i, a2[i], (long)-103);
+      }
+      test_2vi_inv(a1, a2, (long)123, (long)103, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (long)123);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (long)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (long)-1);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (long)-1);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : -123;
+        errn += verify("test_ci_scl: a1", i, a1[i], (long)val);
+      }
+      test_vi_scl(a2, (long)123);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_vi_scl: a2", i, a2[i], (long)val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_cp_scl: a1", i, a1[i], (long)val);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], (long)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (long)-123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], (long)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (long)-103);
+        }
+      }
+      test_2vi_scl(a1, a2, (long)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], (long)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (long)123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], (long)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (long)103);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (long)123);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (long)123);
+      }
+      test_vi(a2, (long)-123);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (long)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (long)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_aln(a1, a2, (long)123, (long)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (long)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (long)123);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (long)123);
+      }
+      test_vi(a2, (long)-123);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (long)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (long)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_unaln(a1, a2, (long)123, (long)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (long)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (long)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (long)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (long)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (long)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_aln(a1, a1, (long)123, (long)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (long)103);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (long)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (long)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (long)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (long)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_unaln(a1, a1, (long)123, (long)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (long)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (long)103);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, (long)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, (long)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, (long)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, (long)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, (long)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, (long)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, (long)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, (long)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, (long)123, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, (long)123, (long)103, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, (long)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, (long)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, (long)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, (long)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(long[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi(long[] a, long b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(long[] a, long[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(long[] a, long[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi(long[] a, long[] b, long c, long d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(long[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi_neg(long[] a, long b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(long[] a, long[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(long[] a, long[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_neg(long[] a, long[] b, long c, long d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(long[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+    }
+  }
+  static void test_vi_oppos(long[] a, long b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(long[] a, long[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(long[] a, long[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_oppos(long[] a, long[] b, long c, long d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(long[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+    }
+  }
+  static void test_vi_off(long[] a, long b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(long[] a, long[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(long[] a, long[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+      b[i+OFFSET] = -103;
+    }
+  }
+  static void test_2vi_off(long[] a, long[] b, long c, long d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(long[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+    }
+  }
+  static void test_vi_inv(long[] a, long b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(long[] a, long[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(long[] a, long[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+      b[i+k] = -103;
+    }
+  }
+  static void test_2vi_inv(long[] a, long[] b, long c, long d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(long[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+    }
+  }
+  static void test_vi_scl(long[] a, long b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(long[] a, long[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(long[] a, long[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+      b[i*SCALE] = -103;
+    }
+  }
+  static void test_2vi_scl(long[] a, long[] b, long c, long d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(long[] a, long[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(long[] a, long[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(long[] a, long[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_aln(long[] a, long[] b, long c, long d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(long[] a, long[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(long[] a, long[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(long[] a, long[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_unaln(long[] a, long[] b, long c, long d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestShortDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortDoubleVect
+ */
+
+public class TestShortDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Short + Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a1 = new short[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    double[] b1 = new double[ARRLEN];
+    double[] b2 = new double[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (short)123, 103.);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (short)123, 103.);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (short)123, 103.);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (short)123, 103.);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (short)123, 103.);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.;
+      b2[i] = -1.;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.);
+      }
+      test_vi(a2, b2, (short)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (short)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (short)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+      }
+      test_vi_neg(a2, b2, (short)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.;
+        b2[i] = -1.;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+      }
+      test_vi_oppos(a2, b2, (short)123, 103.);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_vi_aln(a1, b1, (short)123, 103.);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.;
+        b2[i] = 123.;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (short)123, 103.);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (double)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (double)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (short)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (short)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (short)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (short)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (short)123, 103.);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(short[] a, double[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi(short[] a, double[] b, short c, double d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(short[] a, short[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(short[] a, double[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_neg(short[] a, double[] b, short c, double d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(short[] a, short[] b, double[] c, double[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(short[] a, double[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_oppos(short[] a, double[] b, short c, double d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(short[] a, short[] b, double[] c, double[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(short[] a, double[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_aln(short[] a, double[] b, short c, double d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(short[] a, short[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(short[] a, short[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(short[] a, double[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.;
+    }
+  }
+  static void test_vi_unaln(short[] a, double[] b, short c, double d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(short[] a, short[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(short[] a, short[] b, double[] c, double[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestShortFloatVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortFloatVect
+ */
+
+public class TestShortFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Short + Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a1 = new short[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    float[] b1 = new float[ARRLEN];
+    float[] b2 = new float[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (short)123, 103.f);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (short)123, 103.f);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (short)123, 103.f);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (short)123, 103.f);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (short)123, 103.f);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1.f;
+      b2[i] = -1.f;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci: b1", i, b1[i], -103.f);
+      }
+      test_vi(a2, b2, (short)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (short)123);
+        errn += verify("test_vi: b2", i, b2[i], 103.f);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (short)123);
+        errn += verify("test_cp: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+      }
+      test_vi_neg(a2, b2, (short)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1.f;
+        b2[i] = -1.f;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+      }
+      test_vi_oppos(a2, b2, (short)123, 103.f);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_vi_aln(a1, b1, (short)123, 103.f);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1.f;
+        b2[i] = 123.f;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123.f;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (short)123, 103.f);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (float)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1.f;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (float)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1.f;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1.f;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (short)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (short)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (short)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (short)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (short)123, 103.f);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(short[] a, float[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi(short[] a, float[] b, short c, float d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(short[] a, short[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(short[] a, float[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_neg(short[] a, float[] b, short c, float d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(short[] a, short[] b, float[] c, float[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(short[] a, float[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_oppos(short[] a, float[] b, short c, float d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(short[] a, short[] b, float[] c, float[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(short[] a, float[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_aln(short[] a, float[] b, short c, float d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(short[] a, short[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(short[] a, short[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(short[] a, float[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103.f;
+    }
+  }
+  static void test_vi_unaln(short[] a, float[] b, short c, float d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(short[] a, short[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(short[] a, short[] b, float[] c, float[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestShortIntVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortIntVect
+ */
+
+public class TestShortIntVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Short + Integer vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a1 = new short[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    int[] b1 = new int[ARRLEN];
+    int[] b2 = new int[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (short)123, (int)103);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (short)123, (int)103);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (short)123, (int)103);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (short)123, (int)103);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (short)123, (int)103);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1;
+      b2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci: b1", i, b1[i], (int)-103);
+      }
+      test_vi(a2, b2, (short)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (short)123);
+        errn += verify("test_vi: b2", i, b2[i], (int)103);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (short)123);
+        errn += verify("test_cp: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], (int)-103);
+      }
+      test_vi_neg(a2, b2, (short)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], (int)103);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], (int)-103);
+      }
+      test_vi_oppos(a2, b2, (short)123, (int)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], (int)103);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)123);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (int)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (int)-1);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_aln(a1, b1, (short)123, (int)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (int)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (int)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (short)123, (int)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (int)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (int)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (int)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (int)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (int)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (short)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (short)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (short)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (short)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (short)123, (int)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(short[] a, int[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi(short[] a, int[] b, short c, int d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(short[] a, short[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(short[] a, int[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_neg(short[] a, int[] b, short c, int d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(short[] a, short[] b, int[] c, int[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(short[] a, int[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_oppos(short[] a, int[] b, short c, int d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(short[] a, short[] b, int[] c, int[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(short[] a, int[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_aln(short[] a, int[] b, short c, int d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(short[] a, short[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(short[] a, short[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(short[] a, int[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_unaln(short[] a, int[] b, short c, int d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(short[] a, short[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(short[] a, short[] b, int[] c, int[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestShortLongVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortLongVect
+ */
+
+public class TestShortLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Short + Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a1 = new short[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    long[] b1 = new long[ARRLEN];
+    long[] b2 = new long[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+      test_vi(a2, b2, (short)123, (long)103);
+      test_cp(a1, a2, b1, b2);
+      test_ci_neg(a1, b1);
+      test_vi_neg(a1, b1, (short)123, (long)103);
+      test_cp_neg(a1, a2, b1, b2);
+      test_ci_oppos(a1, b1);
+      test_vi_oppos(a1, b1, (short)123, (long)103);
+      test_cp_oppos(a1, a2, b1, b2);
+      test_ci_aln(a1, b1);
+      test_vi_aln(a1, b1, (short)123, (long)103);
+      test_cp_alndst(a1, a2, b1, b2);
+      test_cp_alnsrc(a1, a2, b1, b2);
+      test_ci_unaln(a1, b1);
+      test_vi_unaln(a1, b1, (short)123, (long)103);
+      test_cp_unalndst(a1, a2, b1, b2);
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+      b1[i] = -1;
+      b2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci: b1", i, b1[i], (long)-103);
+      }
+      test_vi(a2, b2, (short)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (short)123);
+        errn += verify("test_vi: b2", i, b2[i], (long)103);
+      }
+      test_cp(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (short)123);
+        errn += verify("test_cp: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_neg(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_neg: b1", i, b1[i], (long)-103);
+      }
+      test_vi_neg(a2, b2, (short)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_neg: b2", i, b2[i], (long)103);
+      }
+      test_cp_neg(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_neg: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+        b1[i] = -1;
+        b2[i] = -1;
+      }
+      test_ci_oppos(a1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+        errn += verify("test_ci_oppos: b1", i, b1[i], (long)-103);
+      }
+      test_vi_oppos(a2, b2, (short)123, (long)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+        errn += verify("test_vi_oppos: b2", i, b2[i], (long)103);
+      }
+      test_cp_oppos(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_oppos: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_alndst(a1, a2, b1, b2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alndst: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_alnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)123);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_aln(a1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (long)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_aln: b1", i, b1[i], (long)-1);
+      }
+
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_aln(a1, b1, (short)123, (long)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_aln: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = 123;
+        b1[i] = -1;
+        b2[i] = 123;
+      }
+      test_cp_unalndst(a1, a2, b1, b2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalndst: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a2[i] = -123;
+        b2[i] = -123;
+      }
+      test_cp_unalnsrc(a1, a2, b1, b2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+        errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_ci_unaln(a1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (long)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_ci_unaln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_vi_unaln(a1, b1, (short)123, (long)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_vi_unaln: b1", i, b1[i], (long)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (long)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_alndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (long)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+        b1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1, b1, b1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)v);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (short)i;
+        b1[i] = (long)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+        b1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1, b1, b1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (long)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+        b1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1, b1, b1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+        errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)v);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, b2, (short)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a1, b1, (short)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a1, b1, (short)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_aln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_aln(a1, b1, (short)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_unaln(a1, b1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_unaln(a1, b1, (short)123, (long)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2, b1, b2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    return errn;
+  }
+
+  static void test_ci(short[] a, long[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi(short[] a, long[] b, short c, long d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp(short[] a, short[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_neg(short[] a, long[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_neg(short[] a, long[] b, short c, long d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_cp_neg(short[] a, short[] b, long[] c, long[] d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+      c[i] = d[i];
+    }
+  }
+  static void test_ci_oppos(short[] a, long[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_oppos(short[] a, long[] b, short c, long d) {
+    int limit = a.length-1;
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_cp_oppos(short[] a, short[] b, long[] c, long[] d) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+      c[limit-i] = d[i];
+    }
+  }
+  static void test_ci_aln(short[] a, long[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_aln(short[] a, long[] b, short c, long d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_alndst(short[] a, short[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+      c[i+ALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_alnsrc(short[] a, short[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+      c[i] = d[i+ALIGN_OFF];
+    }
+  }
+  static void test_ci_unaln(short[] a, long[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_vi_unaln(short[] a, long[] b, short c, long d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(short[] a, short[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+      c[i+UNALIGN_OFF] = d[i];
+    }
+  }
+  static void test_cp_unalnsrc(short[] a, short[] b, long[] c, long[] d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+      c[i] = d[i+UNALIGN_OFF];
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7119644/TestShortVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortVect
+ */
+
+public class TestShortVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int OFFSET = 3;
+  private static final int SCALE = 2;
+  private static final int ALIGN_OFF = 8;
+  private static final int UNALIGN_OFF = 5;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Short vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a1 = new short[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+      test_vi(a2, (short)123);
+      test_cp(a1, a2);
+      test_2ci(a1, a2);
+      test_2vi(a1, a2, (short)123, (short)103);
+      test_ci_neg(a1);
+      test_vi_neg(a2, (short)123);
+      test_cp_neg(a1, a2);
+      test_2ci_neg(a1, a2);
+      test_2vi_neg(a1, a2, (short)123, (short)103);
+      test_ci_oppos(a1);
+      test_vi_oppos(a2, (short)123);
+      test_cp_oppos(a1, a2);
+      test_2ci_oppos(a1, a2);
+      test_2vi_oppos(a1, a2, (short)123, (short)103);
+      test_ci_off(a1);
+      test_vi_off(a2, (short)123);
+      test_cp_off(a1, a2);
+      test_2ci_off(a1, a2);
+      test_2vi_off(a1, a2, (short)123, (short)103);
+      test_ci_inv(a1, OFFSET);
+      test_vi_inv(a2, (short)123, OFFSET);
+      test_cp_inv(a1, a2, OFFSET);
+      test_2ci_inv(a1, a2, OFFSET);
+      test_2vi_inv(a1, a2, (short)123, (short)103, OFFSET);
+      test_ci_scl(a1);
+      test_vi_scl(a2, (short)123);
+      test_cp_scl(a1, a2);
+      test_2ci_scl(a1, a2);
+      test_2vi_scl(a1, a2, (short)123, (short)103);
+      test_cp_alndst(a1, a2);
+      test_cp_alnsrc(a1, a2);
+      test_2ci_aln(a1, a2);
+      test_2vi_aln(a1, a2, (short)123, (short)103);
+      test_cp_unalndst(a1, a2);
+      test_cp_unalnsrc(a1, a2);
+      test_2ci_unaln(a1, a2);
+      test_2vi_unaln(a1, a2, (short)123, (short)103);
+    }
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = -1;
+      a2[i] = -1;
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_ci(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci: a1", i, a1[i], (short)-123);
+      }
+      test_vi(a2, (short)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi: a2", i, a2[i], (short)123);
+      }
+      test_cp(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp: a1", i, a1[i], (short)123);
+      }
+      test_2ci(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci: a1", i, a1[i], (short)-123);
+        errn += verify("test_2ci: a2", i, a2[i], (short)-103);
+      }
+      test_2vi(a1, a2, (short)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi: a1", i, a1[i], (short)123);
+        errn += verify("test_2vi: a2", i, a2[i], (short)103);
+      }
+      // Reset for negative stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_neg(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+      }
+      test_vi_neg(a2, (short)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+      }
+      test_cp_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+      }
+      test_2ci_neg(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_neg: a1", i, a1[i], (short)-123);
+        errn += verify("test_2ci_neg: a2", i, a2[i], (short)-103);
+      }
+      test_2vi_neg(a1, a2, (short)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_neg: a1", i, a1[i], (short)123);
+        errn += verify("test_2vi_neg: a2", i, a2[i], (short)103);
+      }
+      // Reset for opposite stride
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_oppos(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+      }
+      test_vi_oppos(a2, (short)123);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+      }
+      test_cp_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+      }
+      test_2ci_oppos(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2ci_oppos: a1", i, a1[i], (short)-123);
+        errn += verify("test_2ci_oppos: a2", i, a2[i], (short)-103);
+      }
+      test_2vi_oppos(a1, a2, (short)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_2vi_oppos: a1", i, a1[i], (short)123);
+        errn += verify("test_2vi_oppos: a2", i, a2[i], (short)103);
+      }
+      // Reset for indexing with offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_off(a1);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_off: a1", i, a1[i], (short)-123);
+      }
+      test_vi_off(a2, (short)123);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_off: a2", i, a2[i], (short)123);
+      }
+      test_cp_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_off: a1", i, a1[i], (short)123);
+      }
+      test_2ci_off(a1, a2);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_off: a1", i, a1[i], (short)-123);
+        errn += verify("test_2ci_off: a2", i, a2[i], (short)-103);
+      }
+      test_2vi_off(a1, a2, (short)123, (short)103);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (short)123);
+        errn += verify("test_2vi_off: a2", i, a2[i], (short)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_off: a1", i, a1[i], (short)-1);
+        errn += verify("test_2vi_off: a2", i, a2[i], (short)-1);
+      }
+      // Reset for indexing with invariant offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_inv(a1, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_ci_inv: a1", i, a1[i], (short)-123);
+      }
+      test_vi_inv(a2, (short)123, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_vi_inv: a2", i, a2[i], (short)123);
+      }
+      test_cp_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_cp_inv: a1", i, a1[i], (short)123);
+      }
+      test_2ci_inv(a1, a2, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2ci_inv: a1", i, a1[i], (short)-123);
+        errn += verify("test_2ci_inv: a2", i, a2[i], (short)-103);
+      }
+      test_2vi_inv(a1, a2, (short)123, (short)103, OFFSET);
+      for (int i=OFFSET; i<ARRLEN; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (short)123);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (short)103);
+      }
+      for (int i=0; i<OFFSET; i++) {
+        errn += verify("test_2vi_inv: a1", i, a1[i], (short)-1);
+        errn += verify("test_2vi_inv: a2", i, a2[i], (short)-1);
+      }
+      // Reset for indexing with scale
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_ci_scl(a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : -123;
+        errn += verify("test_ci_scl: a1", i, a1[i], (short)val);
+      }
+      test_vi_scl(a2, (short)123);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_vi_scl: a2", i, a2[i], (short)val);
+      }
+      test_cp_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        int val = (i%SCALE != 0) ? -1 : 123;
+        errn += verify("test_cp_scl: a1", i, a1[i], (short)val);
+      }
+      test_2ci_scl(a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a1", i, a1[i], (short)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (short)-123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2ci_scl: a2", i, a2[i], (short)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (short)-103);
+        }
+      }
+      test_2vi_scl(a1, a2, (short)123, (short)103);
+      for (int i=0; i<ARRLEN; i++) {
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a1", i, a1[i], (short)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (short)123);
+        }
+        if (i%SCALE != 0) {
+          errn += verify("test_2vi_scl: a2", i, a2[i], (short)-1);
+        } else if (i*SCALE < ARRLEN) {
+          errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (short)103);
+        }
+      }
+      // Reset for 2 arrays with relative aligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (short)123);
+      test_cp_alndst(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+      }
+      test_vi(a2, (short)-123);
+      test_cp_alnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_aln(a1, a2);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (short)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln: a2", i, a2[i], (short)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_aln(a1, a2, (short)123, (short)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln: a2", i, a2[i], (short)103);
+      }
+
+      // Reset for 2 arrays with relative unaligned offset
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_vi(a2, (short)123);
+      test_cp_unalndst(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+      }
+      test_vi(a2, (short)-123);
+      test_cp_unalnsrc(a1, a2);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2ci_unaln(a1, a2);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (short)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln: a2", i, a2[i], (short)-1);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+        a2[i] = -1;
+      }
+      test_2vi_unaln(a1, a2, (short)123, (short)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a1", i, a1[i], (short)-1);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln: a2", i, a2[i], (short)103);
+      }
+
+      // Reset for aligned overlap initialization
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i] = (short)i;
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_alndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+      }
+      for (int i=0; i<ALIGN_OFF; i++) {
+        a1[i+ALIGN_OFF] = -1;
+      }
+      test_cp_alnsrc(a1, a1);
+      for (int i=0; i<ALIGN_OFF; i++) {
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+      }
+      for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%ALIGN_OFF;
+        errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_aln(a1, a1);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (short)-103);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_aln(a1, a1, (short)123, (short)103);
+      for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (short)103);
+      }
+
+      // Reset for unaligned overlap initialization
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i] = (short)i;
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_cp_unalndst(a1, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+      }
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        a1[i+UNALIGN_OFF] = -1;
+      }
+      test_cp_unalnsrc(a1, a1);
+      for (int i=0; i<UNALIGN_OFF; i++) {
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+      }
+      for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+        int v = i%UNALIGN_OFF;
+        errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2ci_unaln(a1, a1);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (short)-103);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (short)-123);
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a1[i] = -1;
+      }
+      test_2vi_unaln(a1, a1, (short)123, (short)103);
+      for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (short)123);
+      }
+      for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+        errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (short)103);
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi(a2, (short)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi(a1, a2, (short)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_neg(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_neg(a2, (short)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_neg(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_neg: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_neg(a1, a2, (short)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_neg: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_oppos(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_oppos(a2, (short)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_oppos(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_oppos: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_oppos(a1, a2, (short)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_oppos: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_off(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_off(a2, (short)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_off(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_off: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_off(a1, a2, (short)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_off: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_inv(a1, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_inv(a2, (short)123, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_inv(a1, a2, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_inv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_inv(a1, a2, (short)123, (short)103, OFFSET);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_inv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ci_scl(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_vi_scl(a2, (short)123);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_vi_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_scl(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_scl: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_scl(a1, a2, (short)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_scl: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_alnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_alnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_aln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_aln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_aln(a1, a2, (short)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_aln: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalndst(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalndst: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_cp_unalnsrc(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_cp_unalnsrc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2ci_unaln(a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2ci_unaln: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_2vi_unaln(a1, a2, (short)123, (short)103);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_2vi_unaln: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_ci(short[] a) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi(short[] a, short b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp(short[] a, short[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci(short[] a, short[] b) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi(short[] a, short[] b, short c, short d) {
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_neg(short[] a) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+    }
+  }
+  static void test_vi_neg(short[] a, short b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b;
+    }
+  }
+  static void test_cp_neg(short[] a, short[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = b[i];
+    }
+  }
+  static void test_2ci_neg(short[] a, short[] b) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_neg(short[] a, short[] b, short c, short d) {
+    for (int i = a.length-1; i >= 0; i-=1) {
+      a[i] = c;
+      b[i] = d;
+    }
+  }
+  static void test_ci_oppos(short[] a) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+    }
+  }
+  static void test_vi_oppos(short[] a, short b) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[limit-i] = b;
+    }
+  }
+  static void test_cp_oppos(short[] a, short[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[i] = b[limit-i];
+    }
+  }
+  static void test_2ci_oppos(short[] a, short[] b) {
+    int limit = a.length-1;
+    for (int i = 0; i < a.length; i+=1) {
+      a[limit-i] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_oppos(short[] a, short[] b, short c, short d) {
+    int limit = a.length-1;
+    for (int i = limit; i >= 0; i-=1) {
+      a[i] = c;
+      b[limit-i] = d;
+    }
+  }
+  static void test_ci_off(short[] a) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+    }
+  }
+  static void test_vi_off(short[] a, short b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b;
+    }
+  }
+  static void test_cp_off(short[] a, short[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = b[i+OFFSET];
+    }
+  }
+  static void test_2ci_off(short[] a, short[] b) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = -123;
+      b[i+OFFSET] = -103;
+    }
+  }
+  static void test_2vi_off(short[] a, short[] b, short c, short d) {
+    for (int i = 0; i < a.length-OFFSET; i+=1) {
+      a[i+OFFSET] = c;
+      b[i+OFFSET] = d;
+    }
+  }
+  static void test_ci_inv(short[] a, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+    }
+  }
+  static void test_vi_inv(short[] a, short b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b;
+    }
+  }
+  static void test_cp_inv(short[] a, short[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = b[i+k];
+    }
+  }
+  static void test_2ci_inv(short[] a, short[] b, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = -123;
+      b[i+k] = -103;
+    }
+  }
+  static void test_2vi_inv(short[] a, short[] b, short c, short d, int k) {
+    for (int i = 0; i < a.length-k; i+=1) {
+      a[i+k] = c;
+      b[i+k] = d;
+    }
+  }
+  static void test_ci_scl(short[] a) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+    }
+  }
+  static void test_vi_scl(short[] a, short b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b;
+    }
+  }
+  static void test_cp_scl(short[] a, short[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = b[i*SCALE];
+    }
+  }
+  static void test_2ci_scl(short[] a, short[] b) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = -123;
+      b[i*SCALE] = -103;
+    }
+  }
+  static void test_2vi_scl(short[] a, short[] b, short c, short d) {
+    for (int i = 0; i*SCALE < a.length; i+=1) {
+      a[i*SCALE] = c;
+      b[i*SCALE] = d;
+    }
+  }
+  static void test_cp_alndst(short[] a, short[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_alnsrc(short[] a, short[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = b[i+ALIGN_OFF];
+    }
+  }
+  static void test_2ci_aln(short[] a, short[] b) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i+ALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_aln(short[] a, short[] b, short c, short d) {
+    for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+ALIGN_OFF] = d;
+    }
+  }
+  static void test_cp_unalndst(short[] a, short[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = b[i];
+    }
+  }
+  static void test_cp_unalnsrc(short[] a, short[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = b[i+UNALIGN_OFF];
+    }
+  }
+  static void test_2ci_unaln(short[] a, short[] b) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i+UNALIGN_OFF] = -123;
+      b[i] = -103;
+    }
+  }
+  static void test_2vi_unaln(short[] a, short[] b, short c, short d) {
+    for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+      a[i] = c;
+      b[i+UNALIGN_OFF] = d;
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7160610/Test7160610.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7160610
+ * @summary Unknown Native Code compilation issue.
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-OptimizeFill Test7160610
+ */
+
+public class Test7160610 {
+  private static final byte[] BYTE_ARRAY = new byte[7];
+  private static int[] anIntArray1190 = new int[32768];
+  private static int[] anIntArray1191 = new int[32768];
+
+  public static void main(String arg[]) {
+    int i = 256;
+    for(int j = BYTE_ARRAY[2]; j < anIntArray1190.length; j++) {
+      anIntArray1190[j] = BYTE_ARRAY[2];
+    }
+
+    for(int k = BYTE_ARRAY[2]; (k ^ BYTE_ARRAY[1]) > -5001; k++) {
+      int i1 = (int)(Math.random() * 128D * (double)i);
+      anIntArray1190[i1] = (int)(Math.random() * 256D);
+    }
+
+    for(int l = BYTE_ARRAY[2]; (l ^ BYTE_ARRAY[1]) > -21; l++) {
+      for(int j1 = BYTE_ARRAY[0]; j1 < i + -BYTE_ARRAY[0]; j1++) {
+        for(int k1 = BYTE_ARRAY[0]; (k1 ^ BYTE_ARRAY[1]) > -128; k1++) {
+          int l1 = k1 - -(j1 << 0x26cb6487);
+          anIntArray1191[l1] = (anIntArray1190[l1 + -BYTE_ARRAY[0]] - -anIntArray1190[l1 - -BYTE_ARRAY[0]] - -anIntArray1190[-128 + l1] - -anIntArray1190[128 + l1]) / BYTE_ARRAY[6];
+        }
+      }
+      int ai[] = anIntArray1190;
+      anIntArray1190 = anIntArray1191;
+      anIntArray1191 = ai;
+    }
+  }
+
+  static {
+    BYTE_ARRAY[6] = 4;
+    BYTE_ARRAY[5] = 5;
+    BYTE_ARRAY[4] = 3;
+    BYTE_ARRAY[3] = 2;
+    BYTE_ARRAY[2] = 0;
+    BYTE_ARRAY[1] = -1;
+    BYTE_ARRAY[0] = 1;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7177917/Test7177917.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * Micro-benchmark for Math.pow() and Math.exp()
+ */
+
+import java.util.*;
+
+public class Test7177917 {
+
+  static double d;
+
+  static Random r = new Random(0);
+
+  static long  m_pow(double[][] values) {
+    double res = 0;
+    long start = System.nanoTime();
+    for (int i = 0; i < values.length; i++) {
+      res += Math.pow(values[i][0], values[i][1]);
+    }
+    long stop = System.nanoTime();
+    d = res;
+    return (stop - start) / 1000;
+  }
+
+  static long  m_exp(double[] values) {
+    double res = 0;
+    long start = System.nanoTime();
+    for (int i = 0; i < values.length; i++) {
+      res += Math.exp(values[i]);
+    }
+    long stop = System.nanoTime();
+    d = res;
+    return (stop - start) / 1000;
+  }
+
+  static double[][] pow_values(int nb) {
+    double[][] res = new double[nb][2];
+    for (int i = 0; i < nb; i++) {
+      double ylogx = (1 + (r.nextDouble() * 2045)) - 1023; // 2045 rather than 2046 as a safety margin
+      double x = Math.abs(Double.longBitsToDouble(r.nextLong()));
+      while (x != x) {
+        x = Math.abs(Double.longBitsToDouble(r.nextLong()));
+      }
+      double logx = Math.log(x) / Math.log(2);
+      double y = ylogx / logx;
+
+      res[i][0] = x;
+      res[i][1] = y;
+    }
+    return res;
+  }
+
+  static double[] exp_values(int nb) {
+    double[] res = new double[nb];
+    for (int i = 0; i < nb; i++) {
+      double ylogx = (1 + (r.nextDouble() * 2045)) - 1023; // 2045 rather than 2046 as a safety margin
+      double x = Math.E;
+      double logx = Math.log(x) / Math.log(2);
+      double y = ylogx / logx;
+      res[i] = y;
+    }
+    return res;
+  }
+
+  static public void main(String[] args) {
+    {
+      // warmup
+      double[][] warmup_values = pow_values(10);
+      m_pow(warmup_values);
+
+      for (int i = 0; i < 20000; i++) {
+        m_pow(warmup_values);
+      }
+      // test pow perf
+      double[][] values = pow_values(1000000);
+      System.out.println("==> POW " + m_pow(values));
+
+      // force uncommon trap
+      double[][] nan_values = new double[1][2];
+      nan_values[0][0] = Double.NaN;
+      nan_values[0][1] = Double.NaN;
+      m_pow(nan_values);
+
+      // force recompilation
+      for (int i = 0; i < 20000; i++) {
+        m_pow(warmup_values);
+      }
+
+      // test pow perf again
+      System.out.println("==> POW " + m_pow(values));
+    }
+    {
+      // warmup
+      double[] warmup_values = exp_values(10);
+      m_exp(warmup_values);
+
+      for (int i = 0; i < 20000; i++) {
+        m_exp(warmup_values);
+      }
+
+      // test pow perf
+      double[] values = exp_values(1000000);
+      System.out.println("==> EXP " + m_exp(values));
+
+      // force uncommon trap
+      double[] nan_values = new double[1];
+      nan_values[0] = Double.NaN;
+      m_exp(nan_values);
+
+      // force recompilation
+      for (int i = 0; i < 20000; i++) {
+        m_exp(warmup_values);
+      }
+
+      // test pow perf again
+      System.out.println("==> EXP " + m_exp(values));
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7190310/Test7190310.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * Manual test
+ */
+
+import java.lang.ref.*;
+
+public class Test7190310 {
+  private static Object str = new Object() {
+    public String toString() {
+      return "The Object";
+    }
+
+    protected void finalize() throws Throwable {
+      System.out.println("The Object is being finalized");
+      super.finalize();
+    }
+  };
+  private final static ReferenceQueue<Object> rq =
+      new ReferenceQueue<Object>();
+  private final static WeakReference<Object> wr =
+      new WeakReference<Object>(str, rq);
+
+  public static void main(String[] args)
+      throws InterruptedException {
+    Thread reader = new Thread() {
+      public void run() {
+        while (wr.get() != null) {
+        }
+        System.out.println("wr.get() returned null");
+      }
+    };
+
+    Thread queueReader = new Thread() {
+      public void run() {
+        try {
+          Reference<? extends Object> ref = rq.remove();
+          System.out.println(ref);
+          System.out.println("queueReader returned, ref==wr is "
+              + (ref == wr));
+        } catch (InterruptedException e) {
+          System.err.println("Sleep interrupted - exiting");
+        }
+      }
+    };
+
+    reader.start();
+    queueReader.start();
+
+    Thread.sleep(1000);
+    str = null;
+    System.gc();
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7190310/Test7190310_unsafe.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 7190310
+ * @summary Inlining WeakReference.get(), and hoisting $referent may lead to non-terminating loops
+ * @run main/othervm -Xbatch Test7190310_unsafe
+ */
+
+import java.lang.ref.*;
+import java.lang.reflect.*;
+import sun.misc.Unsafe;
+
+public class Test7190310_unsafe {
+
+  static class TestObject {
+    public String toString() {
+      return "TestObject";
+    }
+  };
+
+  private static TestObject str = new TestObject();
+  private static final WeakReference ref = new WeakReference(str);
+
+  private TestObject obj;
+
+  public static void main(String[] args) throws Exception {
+    Class c = Test7190310_unsafe.class.getClassLoader().loadClass("sun.misc.Unsafe");
+    Field f = c.getDeclaredField("theUnsafe");
+    f.setAccessible(true);
+    Unsafe unsafe = (Unsafe)f.get(c);
+
+    f = Reference.class.getDeclaredField("referent");
+    f.setAccessible(true);
+    long referent_offset = unsafe.objectFieldOffset(f);
+
+    Test7190310_unsafe t = new Test7190310_unsafe();
+    TestObject o = new TestObject();
+    t.obj = o;
+
+    // Warmup (compile methods)
+    System.err.println("Warmup");
+    Object obj = null;
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef0(ref);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef1(unsafe, ref, referent_offset);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef2(unsafe, ref, referent_offset);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef3(unsafe, ref, referent_offset);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef4(unsafe, t, referent_offset);
+    }
+
+    // Access verification
+    System.err.println("Verification");
+    if (!verifyGet(referent_offset, unsafe)) {
+      System.exit(97);
+    }
+
+    obj = getRef3(unsafe, t, referent_offset);
+    if (obj != o) {
+      System.out.println("FAILED: unsafe.getObject(Object, " + referent_offset + ") " + obj + " != " + o);
+      System.exit(97);
+    }
+    obj = getRef4(unsafe, t, referent_offset);
+    if (obj != o) {
+      System.out.println("FAILED: unsafe.getObject(Test7190310, " + referent_offset + ") " + obj + " != " + o);
+      System.exit(97);
+    }
+  }
+
+  static boolean verifyGet(long referent_offset, Unsafe unsafe) throws Exception {
+    // Access verification
+    System.out.println("referent: " + str);
+    Object obj = getRef0(ref);
+    if (obj != str) {
+      System.out.println("FAILED: weakRef.get() " + obj + " != " + str);
+      return false;
+    }
+    obj = getRef1(unsafe, ref, referent_offset);
+    if (obj != str) {
+      System.out.println("FAILED: unsafe.getObject(weakRef, " + referent_offset + ") " + obj + " != " + str);
+      return false;
+    }
+    obj = getRef2(unsafe, ref, referent_offset);
+    if (obj != str) {
+      System.out.println("FAILED: unsafe.getObject(abstRef, " + referent_offset + ") " + obj + " != " + str);
+      return false;
+    }
+    obj = getRef3(unsafe, ref, referent_offset);
+    if (obj != str) {
+      System.out.println("FAILED: unsafe.getObject(Object, " + referent_offset + ") " + obj + " != " + str);
+      return false;
+    }
+    return true;
+  }
+
+  static Object getRef0(WeakReference ref) throws Exception {
+    return ref.get();
+  }
+  static Object getRef1(Unsafe unsafe, WeakReference ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+  static Object getRef2(Unsafe unsafe, Reference ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+  static Object getRef3(Unsafe unsafe, Object ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+  static Object getRef4(Unsafe unsafe, Test7190310_unsafe ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestByteVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect
+ */
+
+public class TestByteVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Byte vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a0 = new byte[ARRLEN];
+    byte[] a1 = new byte[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (byte)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (byte)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (byte)(i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (byte)(i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (byte)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (byte)(i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (byte)(i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(byte[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(i&3);
+    }
+  }
+  static void test_addi(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+(i&3));
+    }
+  }
+  static void test_lsai(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<(i&3));
+    }
+  }
+  static void test_unrl_init(byte[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (byte)(i&3);
+    }
+  }
+  static void test_unrl_addi(byte[] a0, byte[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (byte)(a1[i+0]+0);
+      a0[i+1] = (byte)(a1[i+1]+1);
+      a0[i+2] = (byte)(a1[i+2]+2);
+      a0[i+3] = (byte)(a1[i+3]+3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (byte)(a1[i]+(i&3));
+    }
+  }
+  static void test_unrl_lsai(byte[] a0, byte[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (byte)(a1[i+0]<<0);
+      a0[i+1] = (byte)(a1[i+1]<<1);
+      a0[i+2] = (byte)(a1[i+2]<<2);
+      a0[i+3] = (byte)(a1[i+3]<<3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (byte)(a1[i]<<(i&3));
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestDoubleVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestDoubleVect
+ */
+
+public class TestDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    double[] a0 = new double[ARRLEN];
+    double[] a1 = new double[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (double)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_divi(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_divi(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (double)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (double)(i+(i&3)));
+      }
+      test_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divi: ", i, a0[i], (double)i/(double)((i&3)+1));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (double)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (double)(i+(i&3)));
+      }
+      test_unrl_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_divi: ", i, a0[i], (double)i/(double)((i&3)+1));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_divi: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(double[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (double)(i&3);
+    }
+  }
+  static void test_addi(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]+(double)(i&3);
+    }
+  }
+  static void test_divi(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]/(double)((i&3)+1);
+    }
+  }
+  static void test_unrl_init(double[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0.;
+      a0[i+1] = 1.;
+      a0[i+2] = 2.;
+      a0[i+3] = 3.;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (double)(i&3);
+    }
+  }
+  static void test_unrl_addi(double[] a0, double[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]+0.;
+      a0[i+1] = a1[i+1]+1.;
+      a0[i+2] = a1[i+2]+2.;
+      a0[i+3] = a1[i+3]+3.;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]+(double)(i&3);
+    }
+  }
+  static void test_unrl_divi(double[] a0, double[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]/1.;
+      a0[i+1] = a1[i+1]/2.;
+      a0[i+2] = a1[i+2]/3.;
+      a0[i+3] = a1[i+3]/4.;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]/(double)((i&3)+1);
+    }
+  }
+
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestFloatVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestFloatVect
+ */
+
+public class TestFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    float[] a0 = new float[ARRLEN];
+    float[] a1 = new float[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (float)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_divi(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_divi(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (float)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (float)(i+(i&3)));
+      }
+      test_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divi: ", i, a0[i], (float)i/(float)((i&3)+1));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (float)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (float)(i+(i&3)));
+      }
+      test_unrl_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_divi: ", i, a0[i], (float)i/(float)((i&3)+1));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_divi: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(float[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (float)(i&3);
+    }
+  }
+  static void test_addi(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]+(float)(i&3);
+    }
+  }
+  static void test_divi(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]/(float)((i&3)+1);
+    }
+  }
+  static void test_unrl_init(float[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0.f;
+      a0[i+1] = 1.f;
+      a0[i+2] = 2.f;
+      a0[i+3] = 3.f;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (float)(i&3);
+    }
+  }
+  static void test_unrl_addi(float[] a0, float[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]+0.f;
+      a0[i+1] = a1[i+1]+1.f;
+      a0[i+2] = a1[i+2]+2.f;
+      a0[i+3] = a1[i+3]+3.f;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]+(float)(i&3);
+    }
+  }
+  static void test_unrl_divi(float[] a0, float[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]/1.f;
+      a0[i+1] = a1[i+1]/2.f;
+      a0[i+2] = a1[i+2]/3.f;
+      a0[i+3] = a1[i+3]/4.f;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]/(float)((i&3)+1);
+    }
+  }
+
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestIntVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestIntVect
+ */
+
+public class TestIntVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Integer vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a0 = new int[ARRLEN];
+    int[] a1 = new int[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(int[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (i&3);
+    }
+  }
+  static void test_addi(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]+(i&3);
+    }
+  }
+  static void test_lsai(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]<<(i&3);
+    }
+  }
+  static void test_unrl_init(int[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (i&3);
+    }
+  }
+  static void test_unrl_addi(int[] a0, int[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]+0;
+      a0[i+1] = a1[i+1]+1;
+      a0[i+2] = a1[i+2]+2;
+      a0[i+3] = a1[i+3]+3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]+(i&3);
+    }
+  }
+  static void test_unrl_lsai(int[] a0, int[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]<<0;
+      a0[i+1] = a1[i+1]<<1;
+      a0[i+2] = a1[i+2]<<2;
+      a0[i+3] = a1[i+3]<<3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]<<(i&3);
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestLongVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestLongVect
+ */
+
+public class TestLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    long[] a0 = new long[ARRLEN];
+    long[] a1 = new long[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (long)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (long)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (long)(i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (long)(i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (long)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (long)(i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (long)(i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(long[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(i&3);
+    }
+  }
+  static void test_addi(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+(i&3));
+    }
+  }
+  static void test_lsai(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<(i&3));
+    }
+  }
+  static void test_unrl_init(long[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (long)(i&3);
+    }
+  }
+  static void test_unrl_addi(long[] a0, long[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (long)(a1[i+0]+0);
+      a0[i+1] = (long)(a1[i+1]+1);
+      a0[i+2] = (long)(a1[i+2]+2);
+      a0[i+3] = (long)(a1[i+3]+3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (long)(a1[i]+(i&3));
+    }
+  }
+  static void test_unrl_lsai(long[] a0, long[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (long)(a1[i+0]<<0);
+      a0[i+1] = (long)(a1[i+1]<<1);
+      a0[i+2] = (long)(a1[i+2]<<2);
+      a0[i+3] = (long)(a1[i+3]<<3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (long)(a1[i]<<(i&3));
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestShortVect.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestShortVect
+ */
+
+public class TestShortVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Short vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a0 = new short[ARRLEN];
+    short[] a1 = new short[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (short)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (short)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (short)(i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (short)(i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (short)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (short)(i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (short)(i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(short[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(i&3);
+    }
+  }
+  static void test_addi(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+(i&3));
+    }
+  }
+  static void test_lsai(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<(i&3));
+    }
+  }
+  static void test_unrl_init(short[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (short)(i&3);
+    }
+  }
+  static void test_unrl_addi(short[] a0, short[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (short)(a1[i+0]+0);
+      a0[i+1] = (short)(a1[i+1]+1);
+      a0[i+2] = (short)(a1[i+2]+2);
+      a0[i+3] = (short)(a1[i+3]+3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (short)(a1[i]+(i&3));
+    }
+  }
+  static void test_unrl_lsai(short[] a0, short[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (short)(a1[i+0]<<0);
+      a0[i+1] = (short)(a1[i+1]<<1);
+      a0[i+2] = (short)(a1[i+2]<<2);
+      a0[i+3] = (short)(a1[i+3]<<3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (short)(a1[i]<<(i&3));
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- a/test/gc/6941923/test6941923.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/gc/6941923/test6941923.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -9,12 +9,12 @@
 ## skip on windows
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
     ;;
-  Windows_* )
+  Windows_* | CYGWIN_* )
     echo "Test skipped for Windows"
     exit 0 
     ;;
@@ -30,7 +30,7 @@
   exit 0
 fi
 
-$JAVA_HOME/bin/java -version > $NULL 2>&1
+$JAVA_HOME/bin/java ${TESTVMOPTS} -version > $NULL 2>&1
 
 if [ $? != 0 ]; then
   echo "Wrong JAVA_HOME? JAVA_HOME: $JAVA_HOME"
@@ -119,7 +119,7 @@
 
 options="-Xloggc:$logfile -XX:+UseConcMarkSweepGC -XX:+PrintGC -XX:+PrintGCDetails -XX:+UseGCLogFileRotation  -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=$gclogsize"
 echo "Test gc log rotation in same file, wait for $tts minutes ...."
-$JAVA_HOME/bin/java $options $testname $tts
+$JAVA_HOME/bin/java ${TESTVMOPTS} $options $testname $tts
 if [ $? != 0 ]; then
   echo "$msgfail"
   exit -1
@@ -148,7 +148,7 @@
 numoffiles=3
 options="-Xloggc:$logfile -XX:+UseConcMarkSweepGC -XX:+PrintGC -XX:+PrintGCDetails -XX:+UseGCLogFileRotation  -XX:NumberOfGCLogFiles=$numoffiles -XX:GCLogFileSize=$gclogsize"
 echo "Test gc log rotation in $numoffiles files, wait for $tts minutes ...."
-$JAVA_HOME/bin/java $options $testname $tts
+$JAVA_HOME/bin/java ${TESTVMOPTS} $options $testname $tts
 if [ $? != 0 ]; then
   echo "$msgfail"
   exit -1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/7168848/HumongousAlloc.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test Humongous.java
+ * @bug 7168848
+ * @summary G1: humongous object allocations should initiate marking cycles when necessary
+ * @run main/othervm -Xms100m -Xmx100m -XX:+PrintGC -XX:G1HeapRegionSize=1m -XX:+UseG1GC  HumongousAlloc
+ *
+ */
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.ManagementFactory;
+import java.util.List;
+
+public class HumongousAlloc {
+
+    public static byte[] dummy;
+    private static int sleepFreq = 40;
+    private static int sleepTime = 1000;
+    private static double size = 0.75;
+    private static int iterations = 50;
+    private static int MB = 1024 * 1024;
+
+    public static void allocate(int size, int sleepTime, int sleepFreq) throws InterruptedException {
+        System.out.println("Will allocate objects of size: " + size
+                + " bytes and sleep for " + sleepTime
+                + " ms after every " + sleepFreq + "th allocation.");
+        int count = 0;
+        while (count < iterations) {
+            for (int i = 0; i < sleepFreq; i++) {
+                dummy = new byte[size - 16];
+            }
+            Thread.sleep(sleepTime);
+            count++;
+        }
+    }
+
+    public static void main(String[] args) throws InterruptedException {
+        allocate((int) (size * MB), sleepTime, sleepFreq);
+        List<GarbageCollectorMXBean> collectors = ManagementFactory.getGarbageCollectorMXBeans();
+        for (GarbageCollectorMXBean collector : collectors) {
+            if (collector.getName().contains("G1 Old")) {
+               long count = collector.getCollectionCount();
+                if (count > 0) {
+                    throw new RuntimeException("Failed: FullGCs should not have happened. The number of FullGC run is " + count);
+                }
+                else {
+                    System.out.println("Passed.");
+                }
+            }
+        }
+    }
+}
+
--- a/test/runtime/6626217/Test6626217.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/runtime/6626217/Test6626217.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 # 
-#  Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+#  Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
 #  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 # 
 #  This code is free software; you can redistribute it and/or modify it
@@ -46,24 +46,16 @@
   exit 1
 fi
 
-BIT_FLAG=""
-
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
     RM=/bin/rm
     CP=/bin/cp
     MV=/bin/mv
-    ## for solaris, linux it's HOME
-    FILE_LOCATION=$HOME
-    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" ]
-    then
-        BIT_FLAG=`cat ${FILE_LOCATION}${FS}JDK64BIT`
-    fi
     ;;
   Windows_* )
     NULL=NUL
@@ -73,6 +65,14 @@
     CP=cp
     MV=mv
     ;;
+  CYGWIN_* )
+    NULL=/dev/null
+    PS=";"
+    FS="/"
+    RM=rm
+    CP=cp
+    MV=mv
+    ;;
   * )
     echo "Unrecognized system!"
     exit 1;
@@ -87,7 +87,7 @@
 JAVA=${TESTJAVA}${FS}bin${FS}java
 JAVAC=${TESTJAVA}${FS}bin${FS}javac
 
-${JAVA} ${BIT_FLAG} -version
+${JAVA} ${TESTVMOPTS} -version
 
 # Current directory is scratch directory, copy all the test source there
 # (for the subsequent moves to work).
@@ -113,7 +113,7 @@
 ${MV} many_loader.impl1 many_loader.class
 ${RM} many_loader.java
 
-${JAVA} ${BIT_FLAG} -Xverify -Xint -cp . bug_21227 >test.out 2>&1
+${JAVA} ${TESTVMOPTS} -Xverify -Xint -cp . bug_21227 >test.out 2>&1
 grep "loader constraint" test.out
 exit $?
 
--- a/test/runtime/6878713/Test6878713.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/runtime/6878713/Test6878713.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -25,27 +25,24 @@
   exit 1
 fi
 
-BIT_FLAG=""
-
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
-    ## for solaris, linux it's HOME
-    FILE_LOCATION=$HOME
-    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" ]
-    then
-        BIT_FLAG=`cat ${FILE_LOCATION}${FS}JDK64BIT | grep -v '^#'`
-    fi
     ;;
   Windows_* )
     NULL=NUL
     PS=";"
     FS="\\"
     ;;
+  CYGWIN_* )
+    NULL=/dev/null
+    PS=";"
+    FS="/"
+    ;;
   * )
     echo "Unrecognized system!"
     exit 1;
@@ -57,11 +54,11 @@
 
 THIS_DIR=`pwd`
 
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -version
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -version
 
 ${TESTJAVA}${FS}bin${FS}jar xvf ${TESTSRC}${FS}testcase.jar
 
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} OOMCrashClass1960_2 > test.out 2>&1
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} OOMCrashClass1960_2 > test.out 2>&1
 
 if [ -s core -o -s "hs_*.log" ]
 then
--- a/test/runtime/6929067/Test6929067.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/runtime/6929067/Test6929067.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -19,8 +19,6 @@
   echo "If this is incorrect, try setting the variable manually."
 fi
 
-BIT_FLAG=""
-
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
@@ -29,20 +27,25 @@
     PS=":"
     FS="/"
     ;;
-  SunOS | Windows_* | *BSD)
-    NULL=NUL
-    PS=";"
-    FS="\\"
+  * )
     echo "Test passed; only valid for Linux"
     exit 0;
     ;;
-  * )
-    echo "Unrecognized system!"
-    exit 1;
-    ;;
 esac
 
-LD_LIBRARY_PATH=.:${TESTJAVA}/jre/lib/i386/client:/usr/openwin/lib:/usr/dt/lib:/usr/lib:$LD_LIBRARY_PATH
+# Choose arch: i386 or amd64 (test is Linux-specific)
+# Cannot simply look at TESTVMOPTS as -d64 is not
+# passed if there is only a 64-bit JVM available.
+
+${TESTJAVA}/bin/java ${TESTVMOPTS} -version 2>1 | grep "64-Bit" >/dev/null
+if [ "$?" = "0" ]
+then
+  ARCH=amd64
+else
+  ARCH=i386
+fi
+
+LD_LIBRARY_PATH=.:${TESTJAVA}/jre/lib/${ARCH}/client:/usr/openwin/lib:/usr/dt/lib:/usr/lib:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH
 
 THIS_DIR=`pwd`
@@ -51,10 +54,10 @@
 cp ${TESTSRC}${FS}T.java ${THIS_DIR}
 
 
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -fullversion
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -fullversion
 
 ${TESTJAVA}${FS}bin${FS}javac T.java
 
-gcc -o invoke -I${TESTJAVA}/include -I${TESTJAVA}/include/linux invoke.c ${TESTJAVA}/jre/lib/i386/client/libjvm.so
+gcc -o invoke -I${TESTJAVA}/include -I${TESTJAVA}/include/linux invoke.c ${TESTJAVA}/jre/lib/${ARCH}/client/libjvm.so
 ./invoke
 exit $?
--- a/test/runtime/7020373/Test7020373.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/runtime/7020373/Test7020373.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -2,10 +2,10 @@
 
 ##
 ## @test
-## @bug 7020373 7055247
+## @bug 7020373 7055247 7053586 7185550
 ## @key cte_test
 ## @summary JSR rewriting can overflow memory address size variables
-## @ignore Ignore it until 7053586 fixed
+## @ignore Ignore it as 7053586 test uses lots of memory. See bug report for detail.
 ## @run shell Test7020373.sh
 ##
 
@@ -27,27 +27,24 @@
   exit 1
 fi
 
-BIT_FLAG=""
-
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
-    ## for solaris, linux it's HOME
-    FILE_LOCATION=$HOME
-    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" ]
-    then
-        BIT_FLAG=`cat ${FILE_LOCATION}${FS}JDK64BIT | grep -v '^#'`
-    fi
     ;;
   Windows_* )
     NULL=NUL
     PS=";"
     FS="\\"
     ;;
+  CYGWIN_* )
+    NULL=/dev/null
+    PS=";"
+    FS="/"
+    ;;
   * )
     echo "Unrecognized system!"
     exit 1;
@@ -59,11 +56,11 @@
 
 THIS_DIR=`pwd`
 
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -version
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -version
 
 ${TESTJAVA}${FS}bin${FS}jar xvf ${TESTSRC}${FS}testcase.jar
 
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} OOMCrashClass4000_1 > test.out 2>&1
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} OOMCrashClass4000_1 > test.out 2>&1
 
 cat test.out
 
@@ -74,7 +71,7 @@
     echo "Test Failed"
     exit 1
 else
-    grep "java.lang.LinkageError" test.out
+    egrep "java.lang.LinkageError|java.lang.NoSuchMethodError|Main method not found in class OOMCrashClass4000_1|insufficient memory" test.out
     if [ $? = 0 ]
     then
         echo "Test Passed"
--- a/test/runtime/7051189/Xchecksig.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/runtime/7051189/Xchecksig.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -1,5 +1,5 @@
 # 
-#  Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+#  Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
 #  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 # 
 #  This code is free software; you can redistribute it and/or modify it
@@ -41,20 +41,12 @@
 fi
 
 
-BIT_FLAG=""
-
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     FS="/"
-    ## for solaris, linux it's HOME
-    FILE_LOCATION=$HOME
-    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" ]
-    then
-        BIT_FLAG=`cat ${FILE_LOCATION}${FS}JDK64BIT`
-    fi
     ;;
-  Windows_* )
+  Windows_* | CYGWIN_* )
     printf "Not testing libjsig.so on Windows. PASSED.\n "
     exit 0
     ;;
@@ -69,20 +61,16 @@
 
 # LD_PRELOAD arch needs to match the binary we run, so run the java
 # 64-bit binary directly if we are testing 64-bit (bin/ARCH/java).
-
-# However JPRT runs: .../solaris_x64_5.10-debug/bin/java
-# ..which is 32-bit, when it has built the 64-bit version to test.
-#
-# How does this script know we are meant to run the 64-bit version?
-# Can check for the path of the binary containing "x64" on Solaris.
+# Check if TESTVMOPS contains -d64, but cannot use 
+# java ${TESTVMOPS} to run "java -d64"  with LD_PRELOAD.
 
 if [ ${OS} -eq "SunOS" ]
 then
-  printf  "SunOS test JAVA=${JAVA}"
-  printf ${JAVA} | grep x64 > /dev/null
+  printf  "SunOS test TESTVMOPTS = ${TESTVMOPTS}"
+  printf ${TESTVMOPTS} | grep d64 > /dev/null
   if [ $? -eq 0 ]
   then
-    printf "SunOS x64 test, forcing -d64\n"
+    printf "SunOS 64-bit test\n"
     BIT_FLAG=-d64
   fi
 fi
@@ -127,20 +115,19 @@
   printf "Skipping test: libjsig missing for given architecture: ${LIBJSIG}\n"
   exit 0
 fi
-# Use java -version to test, java version info appeas on stderr,
+# Use java -version to test, java version info appears on stderr,
 # the libjsig message we are removing appears on stdout.
 
 # grep returns zero meaning found, non-zero means not found:
 
-LD_PRELOAD=${LIBJSIG} ${JAVA} ${BIT_FLAG} -Xcheck:jni -version 2>&1  | grep "libjsig is activated"
-
+LD_PRELOAD=${LIBJSIG} ${JAVA} ${TESTVMOPTS} -Xcheck:jni -version 2>&1  | grep "libjsig is activated"
 if [ $? -eq 0 ]; then
   printf "Failed: -Xcheck:jni prints message when libjsig.so is loaded.\n"
   exit 1
 fi
 
 
-LD_PRELOAD=${LIBJSIG} ${JAVA} ${BIT_FLAG} -Xcheck:jni -verbose:jni -version 2>&1 | grep "libjsig is activated"
+LD_PRELOAD=${LIBJSIG} ${JAVA} ${TESTVMOPTS} -Xcheck:jni -verbose:jni -version 2>&1 | grep "libjsig is activated"
 if [ $? != 0 ]; then
   printf "Failed: -Xcheck:jni does not print message when libjsig.so is loaded and -verbose:jni is set.\n"
   exit 1
--- a/test/runtime/7110720/Test7110720.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/runtime/7110720/Test7110720.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -37,7 +37,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     FS="/"
     RM=/bin/rm
     CP=/bin/cp
@@ -55,6 +55,12 @@
     CP=cp
     MV=mv
     ;;
+  CYGWIN_* )
+    FS="/"
+    RM=rm
+    CP=cp
+    MV=mv
+    ;;
   * )
     echo "Unrecognized system!"
     exit 1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/7116786/Test7116786.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test Test7116786
+ * @summary verify that VerifyError messages are as expected
+ * @library testcases.jar
+ * @run main/othervm -Xverify:all Test7116786
+ */
+
+
+/**
+ * This class contains information regarding when a VerifyError is thrown
+ * in the verifier.  Most of the data is informational-only, and can be
+ * used to track down where and why VerifyErrors are thrown.  As such it
+ * is possible the information may go out-of-date.
+ *
+ * The only fields used for the purpose of testing is the 'caseName' and
+ * the 'message'.  The 'caseName' corresponds to a classfile which exhibits
+ * the VerifyError, and the 'message' is a regular expression which we expect
+ * to match the verify error message.  If the 'message' doesn't match what
+ * we expect, it warrents investigation to see if we are still triggering
+ * the VerifyError that we expect.  It could simply just be that the message
+ * changed, which is fine.
+ *
+ * Some cases are not testable, either because the code is probably unreachable
+ * or the test classfile would be too onerous to create.  These cases are
+ * marked with 'testable' == false, and the test runner will skip them.
+ */
+class Case {
+    private String caseName;    // Name of the case
+    private String file;        // Source file where VerifyError is thrown
+    private String location;    // enclosing function or switch case
+    private String description; // What causes this VerifyError
+    private String message;     // The VerifyError message used.
+
+    private boolean testable;   // Whether this case is testable or not.
+
+    public Case(String caseName, String file, boolean testable,
+                String location, String description, String message) {
+        this.caseName = caseName;
+        this.file = file;
+        this.testable = testable;
+        this.location = location;
+        this.description = description;
+        this.message = message;
+    }
+
+    String getCaseName() { return this.caseName; }
+    String getFile() { return this.file; }
+    String getLocation() { return this.location; }
+    String getDescription() { return this.description; }
+    String getMessage() { return this.message; }
+
+    boolean isTestable() { return this.testable; }
+}
+
+/**
+ * These are the locations in the source code where VerifyErrors are thrown
+ * as of today, 2012/07/18.  These may change as the verification code is
+ * modified, which is ok.  This test is trying to provide coverage for all
+ * VerifyErrors (just to make sure there are no crashes) and it's probably
+ * not necessary to update it every time the VM changes.
+ */
+class VerifyErrorCases {
+    public static final Case[] cases = {
+
+        new Case("case00", "stackMapFrame.cpp", true, "pop_stack_ex",
+                 "stack underflow",
+                 "Operand stack underflow"),
+
+        new Case("case01", "stackMapFrame.cpp", true, "pop_stack_ex",
+                 "stack pop not assignable to expected",
+                 "Bad type on operand stack"),
+
+        new Case("case02", "stackMapFrame.cpp", true, "get_local",
+                 "local index out-of-bounds",
+                 "Local variable table overflow"),
+
+        new Case("case03", "stackMapFrame.cpp", true, "get_local",
+                 "local not assignable to expected",
+                 "Bad local variable type"),
+
+        new Case("case04", "stackMapFrame.cpp", true, "get_local_2",
+                 "local index out-of-bounds [type2]",
+                 "get long/double overflows locals"),
+
+        new Case("case05", "stackMapFrame.cpp", true, "get_local_2",
+                 "local not assignabled to expected [type2]",
+                 "Bad local variable type"),
+
+        /* Unreachable: Can't split long/double on stack */
+        new Case("case06", "stackMapFrame.cpp", false, "get_local_2",
+                 "local second-word not assignabled to expected",
+                 "Bad local variable type"),
+
+        new Case("case07", "stackMapFrame.cpp", true, "set_local",
+                 "local index out-of-bounds",
+                 "Local variable table overflow"),
+
+        new Case("case08", "stackMapFrame.cpp", true, "set_local_2",
+                 "local index out-of-bounds [type2]",
+                 "Local variable table overflow"),
+
+        new Case("case09", "stackMapFrame.hpp", true, "push_stack",
+                 "stack overflow",
+                 "Operand stack overflow"),
+
+        new Case("case10", "stackMapFrame.hpp", true, "push_stack_2",
+                 "stack overflow [type2]",
+                 "Operand stack overflow"),
+
+        new Case("case11", "stackMapFrame.hpp", true, "pop_stack",
+                 "stack underflow",
+                 "Operand stack underflow"),
+
+        new Case("case12", "stackMapTable.cpp", true, "StackMapTable ctor",
+                 "stackmap offset beyond code size",
+                 "StackMapTable error: bad offset"),
+
+        new Case("case13", "stackMapTable.cpp", true, "match_stackmap",
+                 "no stackmap frame at expected location",
+                 "Expecting a stackmap frame at branch target "),
+
+        new Case("case14", "stackMapTable.cpp", true, "check_jump_target",
+                 "no stackmap frame at jump location or bad jump",
+                 "Inconsistent stackmap frames at branch target "),
+
+        new Case("case15", "stackMapTable.cpp", true, "check_new_object",
+                 "backward jump with uninit",
+                 "Uninitialized object exists on backward branch "),
+
+        /* Unreachable: wide instructions verified during bytecode analysis */
+        new Case("case16", "verifier.cpp", false, "loop header",
+                 "bad op in wide instruction",
+                 "Bad wide instruction"),
+
+        new Case("case17", "verifier.cpp", true, "case iaload",
+                 "TOS not X array",
+                 "Bad type on operand stack in iaload"),
+
+        new Case("case18", "verifier.cpp", true, "case baload",
+                 "TOS not X array",
+                 "Bad type on operand stack in baload"),
+
+        new Case("case19", "verifier.cpp", true, "case caload",
+                 "TOS not X array",
+                 "Bad type on operand stack in caload"),
+
+        new Case("case20", "verifier.cpp", true, "case saload",
+                 "TOS not X array",
+                 "Bad type on operand stack in saload"),
+
+        new Case("case21", "verifier.cpp", true, "case laload",
+                 "TOS not X array",
+                 "Bad type on operand stack in laload"),
+
+        new Case("case22", "verifier.cpp", true, "case faload",
+                 "TOS not X array",
+                 "Bad type on operand stack in faload"),
+
+        new Case("case23", "verifier.cpp", true, "case daload",
+                 "TOS not X array",
+                 "Bad type on operand stack in daload"),
+
+        new Case("case24", "verifier.cpp", true, "case aaload",
+                 "TOS not X array",
+                 "Bad type on operand stack in aaload"),
+
+        new Case("case25", "verifier.cpp", true, "case iastore",
+                 "TOS not int array",
+                 "Bad type on operand stack in iastore"),
+
+        new Case("case26", "verifier.cpp", true, "case bastore",
+                 "TOS not byte array",
+                 "Bad type on operand stack in bastore"),
+
+        new Case("case27", "verifier.cpp", true, "case castore",
+                 "TOS not char array",
+                 "Bad type on operand stack in castore"),
+
+        new Case("case28", "verifier.cpp", true, "case sastore",
+                 "TOS not short array",
+                 "Bad type on operand stack in sastore"),
+
+        new Case("case29", "verifier.cpp", true, "case lastore",
+                 "TOS not long array",
+                 "Bad type on operand stack in lastore"),
+
+        new Case("case30", "verifier.cpp", true, "case fastore",
+                 "TOS not float array",
+                 "Bad type on operand stack in fastore"),
+
+        new Case("case31", "verifier.cpp", true, "case dastore",
+                 "TOS not double array",
+                 "Bad type on operand stack in dastore"),
+
+        new Case("case32", "verifier.cpp", true, "case aastore",
+                 "TOS not object array",
+                 "Bad type on operand stack in aastore"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case33", "verifier.cpp", false, "case pop2",
+                 "TOS is category2_1st (would split)",
+                 "Bad type on operand stack in pop2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at stack depth 2 with category_1 on TOS which is not
+         * possible. */
+        new Case("case34", "verifier.cpp", false, "case dup_x2",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup_x2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case35", "verifier.cpp", false, "case dup2",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case36", "verifier.cpp", false, "case dup2_x1",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2_x1"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case37", "verifier.cpp", false, "case dup2_x2",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2_x2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at stack depth 3 with either 2 category_1 or 1
+         * category_2 on TOS, which is not possible. */
+        new Case("case38", "verifier.cpp", false, "case dup2_x2",
+                 "TOS-3 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2_x2"),
+
+        new Case("case39", "verifier.cpp", true, "case return",
+                 "return type of method is not void",
+                 "Method expects a return value"),
+
+        new Case("case40", "verifier.cpp", true, "case return",
+                 "return with uninitialized this ",
+                 "Constructor must call super() or this() before return"),
+
+        new Case("case41", "verifier.cpp", true, "case new",
+                 "cp index not a class type",
+                 "Illegal new instruction"),
+
+        new Case("case42", "verifier.cpp", true, "case arraylength",
+                 "TOS is not an array",
+                 "Bad type on operand stack in arraylength"),
+
+        new Case("case43", "verifier.cpp", true, "case multianewarray",
+                 "CP index does not refer to array type",
+                 "Illegal constant pool index in multianewarray instruction"),
+
+        new Case("case44", "verifier.cpp", true, "case multianewarray",
+                 "Bad dimension (<1) or does not match CP signature",
+                 "Illegal dimension in multianewarray instruction: "),
+
+        new Case("case45", "verifier.cpp", true, "case default",
+                 "Unrecognized bytecode",
+                 "Bad instruction: "),
+
+        new Case("case46", "verifier.cpp", true, "loop end",
+                 "control flow falls off method",
+                 "Control flow falls through code end"),
+
+        new Case("case47", "verifier.cpp", true, "generate_code_data",
+                 "illegal bytecode via RawBytecodeStream (breakpoint)",
+                 "Bad instruction"),
+
+        new Case("case48", "verifier.cpp", true, "generate_code_data",
+                 "illegal bytecode via RawBytecodeStream (other illegal)",
+                 "Bad instruction"),
+
+        new Case("case49", "verifier.cpp", true,
+                 "verify_exception_handler_table",
+                 "catch_type is not throwable",
+                 "Catch type is not a subclass of Throwable in " +
+                 "exception handler "),
+
+        new Case("case50", "verifier.cpp", true, "verify_stackmap_table",
+                 "missing a stack map frame @ target location (mid table)",
+                 "Expecting a stack map frame"),
+
+        new Case("case51", "verifier.cpp", true, "verify_stackmap_table",
+                 "stack map does not match?",
+                 "Instruction type does not match stack map"),
+
+        new Case("case52", "verifier.cpp", true, "verify_stackmap_table",
+                 "missing a stack map frame @ target location (end of table)",
+                 "Expecting a stack map frame"),
+
+        new Case("case53", "verifier.cpp", true,
+                 "verify_exception_handler_targets",
+                 "stackmap mismatch at exception handler",
+                 "Stack map does not match the one at exception handler "),
+
+        new Case("case54", "verifier.cpp", true, "verify_cp_index",
+                 "constant pool index is out-of-bounds",
+                 "Illegal constant pool index "),
+
+        new Case("case55", "verifier.cpp", true, "verify_cp_type",
+                 "constant pool entry is not expected type",
+                 "Illegal type at constant pool entry "),
+
+        new Case("case56", "verifier.cpp", true, "verify_cp_class_type",
+                 "constant pool entry is not an object type",
+                 "Illegal type at constant pool entry "),
+
+        /* Unreachable: verify_cp_type gates this case */
+        new Case("case57", "verifier.cpp", false, "verify_ldc",
+                 "invalid constant pool index in ldc",
+                 "Invalid index in ldc"),
+
+        new Case("case58", "verifier.cpp", true, "verify_switch",
+                 "bad switch padding",
+                 "Nonzero padding byte in lookswitch or tableswitch"),
+
+        new Case("case59", "verifier.cpp", true, "verify_switch",
+                 "tableswitch low is greater than high",
+                 "low must be less than or equal to high in tableswitch"),
+
+        /* Unreachable on 64-bit?  Only way to get here is to overflow
+         * the 'keys' variable which can't happen on 64-bit since we're dealing
+         * with 32-bit values.  Perhaps reachable on 32-bit but the
+         * triggering class would be quite large */
+        new Case("case60", "verifier.cpp", false, "verify_switch",
+                 "high - low + 1 < 0 (overflow?)",
+                 "too many keys in tableswitch"),
+
+        /* Would have to create a 16G classfile to trip this.  Possible but
+         * not reasonable to do in a test.  */
+        new Case("case61", "verifier.cpp", false, "verify_switch",
+                 "lookupswitch keys < 0",
+                 "number of keys in lookupswitch less than 0"),
+
+        new Case("case62", "verifier.cpp", true, "verify_switch",
+                 "lookupswitch keys out-of-order",
+                 "Bad lookupswitch instruction"),
+
+        /* Unreachable: Class file parser verifies Fieldref contents */
+        new Case("case63", "verifier.cpp", false, "verify_field_instructions",
+                 "referenced class is not an CP object",
+                 "Expecting reference to class in class "),
+
+        new Case("case64", "verifier.cpp", true, "verify_field_instructions",
+                 "TOS not assignable to field type in putfield",
+                 "Bad type on operand stack in putfield"),
+
+        new Case("case65", "verifier.cpp", true, "verify_field_instructions",
+                 "TOS not assignable to class when accessing protected field",
+                 "Bad access to protected data in getfield"),
+
+        new Case("case66", "verifier.cpp", true, "verify_invoke_init",
+                 "Uninit_this is not of the current type or it's supertype",
+                 "Bad <init> method call"),
+
+        /* Unreachable:  Stack map parsing ensures valid type and new
+         * instructions have a valid BCI. */
+        new Case("case67", "verifier.cpp", false, "verify_invoke_init",
+                 "Uninit type with bad new instruction index",
+                 "Expecting new instruction"),
+
+        new Case("case68", "verifier.cpp", true, "verify_invoke_init",
+                 "calling other class's <init> method",
+                 "Call to wrong <init> method"),
+
+        new Case("case69", "verifier.cpp", true, "verify_invoke_init",
+                 "Calling protected <init> and type unassignable from current",
+                 "Bad access to protected <init> method"),
+
+        new Case("case70", "verifier.cpp", true, "verify_invoke_init",
+                 "TOS is not an uninitialized (or Uninit_this) type",
+                 "Bad operand type when invoking <init>"),
+
+        new Case("case71", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Arg count in instruction doesn't match signature",
+                 "Inconsistent args count operand in invokeinterface"),
+
+        new Case("case72", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Non-zero pad in invokeinterface",
+                 "Fourth operand byte of invokeinterface must be zero"),
+
+        new Case("case73", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Non-zero pad in invokedynamic",
+                 "Third and fourth operand bytes of " +
+                 "invokedynamic must be zero"),
+
+        new Case("case74", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Non-invokespecial trying to invoke a '<' method",
+                 "Illegal call to internal method"),
+
+        new Case("case75", "verifier.cpp", true, "verify_invoke_instructions",
+                 "invokespecial and current unassignable from referenced type",
+                 "Bad invokespecial instruction: current class isn't " +
+                 "assignable to reference class."),
+
+        new Case("case76", "verifier.cpp", true, "verify_invoke_instructions",
+                 "TOS not assignable to current when calling protected method",
+                 "Bad access to protected data in invokevirtual"),
+
+        /* Unreachable:  class file parser enforces void signature */
+        new Case("case77", "verifier.cpp", false, "verify_invoke_instructions",
+                 "<init> method is not void return",
+                 "Return type must be void in <init> method"),
+
+        new Case("case78", "verifier.cpp", true, "get_newarray_type",
+                 "newarray type invalid",
+                 "Illegal newarray instruction"),
+
+        new Case("case79", "verifier.cpp", true, "verify_return_value",
+                 "void return from method which has a return value",
+                 "Method expects a return value"),
+
+        new Case("case80", "verifier.cpp", true, "verify_return_value",
+                 "TOS type does not match signature",
+                 "Bad return type"),
+
+        new Case("case81", "verifier.cpp", true, "verify_stackmap_table",
+                 "stack map does not match (flags)",
+                 "Instruction type does not match stack map")
+    };
+}
+
+public class Test7116786 {
+    public static void main(String argv[]) throws Exception {
+        for (Case c : VerifyErrorCases.cases) {
+            System.out.println("******** " + c.getCaseName() + " ********");
+            if (c.isTestable()) {
+                try {
+                    ClassLoader cl = Test7116786.class.getClassLoader();
+                    Class<?> cls = Class.forName(c.getCaseName(), true, cl);
+                    throw new RuntimeException(
+                        "++ FAIL: No verify error encountered");
+                } catch (VerifyError ve) {
+                    String message = c.getMessage();
+                    String veMessage = ve.getMessage();
+                    System.out.print(veMessage);
+                    if (!veMessage.startsWith(message)) {
+                        // We're not seeing the message we expect.  Could be
+                        // that we've gotten the wrong VerifyError case, or
+                        // maybe the message changed.
+                        System.out.println("++ FAIL? " +
+                            "Message does not match what was expected: " +
+                            message);
+                        continue;
+                    }
+                    if (!veMessage.contains("Exception Details:") &&
+                        !veMessage.contains("Reason:")) {
+                        System.out.println("++ FAIL: No details found");
+                        throw new RuntimeException("FAIL: No details found");
+                    }
+                    System.out.println("++ PASS");
+                }
+            } else {
+               System.out.println("++ SKIPPED");
+            }
+        }
+    }
+}
Binary file test/runtime/7116786/testcases.jar has changed
--- a/test/runtime/7158800/Test7158800.sh	Wed Sep 26 21:45:41 2012 -0700
+++ b/test/runtime/7158800/Test7158800.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -46,7 +46,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
@@ -56,6 +56,11 @@
     PS=";"
     FS="\\"
     ;;
+  CYGWIN_* )
+    NULL=/dev/null
+    PS=";"
+    FS="/"
+    ;;
   * )
     echo "Unrecognized system!"
     exit 1;
@@ -67,13 +72,13 @@
 
 THIS_DIR=`pwd`
 
-${TESTJAVA}${FS}bin${FS}java -fullversion
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -fullversion
 
 ${TESTJAVA}${FS}bin${FS}javac -d . ${TESTSRC}${FS}InternTest.java
 
 cp ${TESTSRC}${FS}badstrings.txt .
 
-${TESTJAVA}${FS}bin${FS}java -XX:+PrintStringTableStatistics -XX:+TraceSafepointCleanupTime InternTest bad > test.out 2>&1 &
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -XX:+PrintStringTableStatistics -XX:+TraceSafepointCleanupTime InternTest bad > test.out 2>&1 &
 C_PID=$!
 
 sleep 60
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/7158988/FieldMonitor.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2012 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test FieldMonitor.java
+ * @bug 7158988
+ * @summary verify jvm does not crash while debugging
+ * @run shell TestFieldMonitor.sh
+ * @author axel.siebenborn@sap.com
+ */
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import com.sun.jdi.Bootstrap;
+import com.sun.jdi.Field;
+import com.sun.jdi.ReferenceType;
+import com.sun.jdi.VirtualMachine;
+import com.sun.jdi.connect.Connector;
+import com.sun.jdi.connect.IllegalConnectorArgumentsException;
+import com.sun.jdi.connect.LaunchingConnector;
+import com.sun.jdi.connect.VMStartException;
+import com.sun.jdi.event.ClassPrepareEvent;
+import com.sun.jdi.event.Event;
+import com.sun.jdi.event.EventQueue;
+import com.sun.jdi.event.EventSet;
+import com.sun.jdi.event.ModificationWatchpointEvent;
+import com.sun.jdi.event.VMDeathEvent;
+import com.sun.jdi.event.VMDisconnectEvent;
+import com.sun.jdi.request.ClassPrepareRequest;
+import com.sun.jdi.request.EventRequest;
+import com.sun.jdi.request.EventRequestManager;
+import com.sun.jdi.request.ModificationWatchpointRequest;
+
+public class FieldMonitor {
+
+  public static final String CLASS_NAME = "TestPostFieldModification";
+  public static final String FIELD_NAME = "value";
+  public static final String ARGUMENTS = "-Xshare:off -XX:+PrintGC";
+
+  public static void main(String[] args)
+      throws IOException, InterruptedException {
+
+    StringBuffer sb = new StringBuffer();
+
+    for (int i=0; i < args.length; i++) {
+        sb.append(' ');
+        sb.append(args[i]);
+    }
+    //VirtualMachine vm = launchTarget(sb.toString());
+    VirtualMachine vm = launchTarget(CLASS_NAME);
+
+    System.out.println("Vm launched");
+    // set watch field on already loaded classes
+    List<ReferenceType> referenceTypes = vm
+        .classesByName(CLASS_NAME);
+    for (ReferenceType refType : referenceTypes) {
+      addFieldWatch(vm, refType);
+    }
+    // watch for loaded classes
+    addClassWatch(vm);
+
+    // process events
+    EventQueue eventQueue = vm.eventQueue();
+    // resume the vm
+
+    Process process = vm.process();
+
+
+    // Copy target's output and error to our output and error.
+    Thread outThread = new StreamRedirectThread("out reader", process.getInputStream());
+    Thread errThread = new StreamRedirectThread("error reader", process.getErrorStream());
+
+    errThread.start();
+    outThread.start();
+
+
+    vm.resume();
+    boolean connected = true;
+    while (connected) {
+      EventSet eventSet = eventQueue.remove();
+      for (Event event : eventSet) {
+        if (event instanceof VMDeathEvent
+            || event instanceof VMDisconnectEvent) {
+          // exit
+          connected = false;
+        } else if (event instanceof ClassPrepareEvent) {
+          // watch field on loaded class
+          System.out.println("ClassPrepareEvent");
+          ClassPrepareEvent classPrepEvent = (ClassPrepareEvent) event;
+          ReferenceType refType = classPrepEvent
+              .referenceType();
+          addFieldWatch(vm, refType);
+        } else if (event instanceof ModificationWatchpointEvent) {
+          System.out.println("sleep for 500 ms");
+          Thread.sleep(500);
+          System.out.println("resume...");
+
+          ModificationWatchpointEvent modEvent = (ModificationWatchpointEvent) event;
+          System.out.println("old="
+              + modEvent.valueCurrent());
+          System.out.println("new=" + modEvent.valueToBe());
+          System.out.println();
+        }
+      }
+      eventSet.resume();
+    }
+    // Shutdown begins when event thread terminates
+    try {
+        errThread.join(); // Make sure output is forwarded
+        outThread.join();
+    } catch (InterruptedException exc) {
+        // we don't interrupt
+    }
+  }
+
+  /**
+   * Find a com.sun.jdi.CommandLineLaunch connector
+   */
+  static LaunchingConnector findLaunchingConnector() {
+    List <Connector> connectors = Bootstrap.virtualMachineManager().allConnectors();
+    Iterator <Connector> iter = connectors.iterator();
+    while (iter.hasNext()) {
+      Connector connector = iter.next();
+      if (connector.name().equals("com.sun.jdi.CommandLineLaunch")) {
+        return (LaunchingConnector)connector;
+      }
+    }
+    throw new Error("No launching connector");
+  }
+  /**
+   * Return the launching connector's arguments.
+   */
+ static Map <String,Connector.Argument> connectorArguments(LaunchingConnector connector, String mainArgs) {
+      Map<String,Connector.Argument> arguments = connector.defaultArguments();
+      for (String key : arguments.keySet()) {
+        System.out.println(key);
+      }
+
+      Connector.Argument mainArg = (Connector.Argument)arguments.get("main");
+      if (mainArg == null) {
+          throw new Error("Bad launching connector");
+      }
+      mainArg.setValue(mainArgs);
+
+      Connector.Argument optionsArg = (Connector.Argument)arguments.get("options");
+      if (optionsArg == null) {
+        throw new Error("Bad launching connector");
+      }
+      optionsArg.setValue(ARGUMENTS);
+      return arguments;
+  }
+
+ static VirtualMachine launchTarget(String mainArgs) {
+    LaunchingConnector connector = findLaunchingConnector();
+    Map  arguments = connectorArguments(connector, mainArgs);
+    try {
+        return (VirtualMachine) connector.launch(arguments);
+    } catch (IOException exc) {
+        throw new Error("Unable to launch target VM: " + exc);
+    } catch (IllegalConnectorArgumentsException exc) {
+        throw new Error("Internal error: " + exc);
+    } catch (VMStartException exc) {
+        throw new Error("Target VM failed to initialize: " +
+                        exc.getMessage());
+    }
+}
+
+
+  private static void addClassWatch(VirtualMachine vm) {
+    EventRequestManager erm = vm.eventRequestManager();
+    ClassPrepareRequest classPrepareRequest = erm
+        .createClassPrepareRequest();
+    classPrepareRequest.addClassFilter(CLASS_NAME);
+    classPrepareRequest.setEnabled(true);
+  }
+
+
+  private static void addFieldWatch(VirtualMachine vm,
+      ReferenceType refType) {
+    EventRequestManager erm = vm.eventRequestManager();
+    Field field = refType.fieldByName(FIELD_NAME);
+    ModificationWatchpointRequest modificationWatchpointRequest = erm
+        .createModificationWatchpointRequest(field);
+    modificationWatchpointRequest.setSuspendPolicy(EventRequest.SUSPEND_EVENT_THREAD);
+    modificationWatchpointRequest.setEnabled(true);
+  }
+}
+
+class StreamRedirectThread extends Thread {
+
+  private final BufferedReader in;
+
+  private static final int BUFFER_SIZE = 2048;
+
+  /**
+   * Set up for copy.
+   * @param name  Name of the thread
+   * @param in    Stream to copy from
+   * @param out   Stream to copy to
+   */
+  StreamRedirectThread(String name, InputStream in) {
+    super(name);
+    this.in = new BufferedReader(new InputStreamReader(in));
+  }
+
+  /**
+   * Copy.
+   */
+  public void run() {
+    try {
+      String line;
+        while ((line = in.readLine ()) != null) {
+          System.out.println ("testvm: " + line);
+      }
+     System.out.flush();
+    } catch(IOException exc) {
+      System.err.println("Child I/O Transfer - " + exc);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/7158988/TestFieldMonitor.sh	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,75 @@
+#!/bin/sh
+
+if [ "${TESTSRC}" = "" ]
+then TESTSRC=.
+fi
+
+if [ "${TESTJAVA}" = "" ]
+then
+  PARENT=`dirname \`which java\``
+  TESTJAVA=`dirname ${PARENT}`
+  echo "TESTJAVA not set, selecting " ${TESTJAVA}
+  echo "If this is incorrect, try setting the variable manually."
+fi
+
+if [ "${TESTCLASSES}" = "" ]
+then
+  echo "TESTCLASSES not set.  Test cannot execute.  Failed."
+  exit 1
+fi
+
+# set platform-dependent variables
+OS=`uname -s`
+case "$OS" in
+  SunOS | Linux | Darwin)
+    NULL=/dev/null
+    PS=":"
+    FS="/"
+    ;;
+  Windows_95 | Windows_98 | Windows_ME )
+    NULL=NUL
+    PS=";"
+    FS="\\"
+    echo "Test skipped, only for WinNT"
+    exit 0
+    ;;
+  Windows_NT )
+    NULL=NUL
+    PS=";"
+    FS="\\"
+    ;;
+  CYGWIN_NT* )
+    NULL=/dev/null
+    PS=";"
+    FS="/"
+    ;;
+  CYGWIN_* )
+    NULL=/dev/null
+    PS=";"
+    FS="/"
+    echo "Test skipped, only for WinNT"
+    exit 0
+    ;;
+  * )
+    echo "Unrecognized system!"
+    exit 1;
+    ;;
+esac
+
+#CLASSPATH=.${PS}${TESTCLASSES} ; export CLASSPATH
+
+cp ${TESTSRC}${FS}*.java .
+
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -fullversion
+
+${TESTJAVA}${FS}bin${FS}javac -classpath .${PS}$TESTJAVA${FS}lib${FS}tools.jar *.java
+
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -classpath .${PS}$TESTJAVA${FS}lib${FS}tools.jar FieldMonitor > test.out
+
+grep "A fatal error has been detected" test.out > ${NULL}
+if [ $? = 0 ]; then
+    cat test.out
+    STATUS=1
+fi
+
+exit $STATUS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/7158988/TestPostFieldModification.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2012 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class TestPostFieldModification {
+
+  public String value;  // watch modification of value
+
+  public static void main(String[] args){
+
+    System.out.println("Start threads");
+    // this thread modifies the field 'value'
+    new Thread() {
+      TestPostFieldModification test = new TestPostFieldModification();
+      public void run() {
+        test.value="test";
+        for(int i = 0; i < 10; i++) {
+          test.value += new String("_test");
+        }
+      }
+    }.start();
+
+    // this thread is used to trigger a gc
+    Thread d = new Thread() {
+      public void run() {
+        while(true) {
+          try {
+            Thread.sleep(100);
+          } catch (InterruptedException e) {
+
+          }
+          System.gc();
+        }
+      }
+    };
+    d.setDaemon(true);
+    d.start();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/7167069/PrintAsFlag.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * Note that in the run command below the only important flag is PrintCommandLineFlags.
+ * The others are just flags of all types; bool, intx, uintx, uint64_t, double and ccstr.
+ *
+ * @test PrintAsFlag
+ * @summary verify that Flag::print_as_flag() works correctly. This is used by "jinfo -flag" and -XX:+PrintCommandLineFlags.
+ * @run main/othervm -XX:+PrintCommandLineFlags -XX:-ShowMessageBoxOnError -XX:BiasedLockingStartupDelay=4000 -XX:ParallelGCThreads=4 -XX:MaxRAM=1G -XX:CMSSmallCoalSurplusPercent=1.05 -XX:ErrorFile="file" PrintAsFlag
+ */
+
+public class PrintAsFlag {
+  public static void main(String... args) {
+    System.out.printf("Done");
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/sanity/WBApi.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,13 @@
+/*
+ * @test WBApi
+ * @summary verify that whitebox functions can be linked and executed
+ * @run compile -J-XX:+UnlockDiagnosticVMOptions -J-XX:+WhiteBoxAPI WBApi.java
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI WBApi
+ */
+
+import sun.hotspot.WhiteBox;
+public class WBApi {
+    public static void main(String... args) {
+        System.out.printf("args at: %x\n",WhiteBox.getWhiteBox().getObjectAddress(args));
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/serviceability/ParserTest.java	Sat Oct 20 00:08:35 2012 -0700
@@ -0,0 +1,180 @@
+/*
+ * @test ParserTest
+ * @summary verify that whitebox functions can be linked and executed
+ * @run compile -J-XX:+UnlockDiagnosticVMOptions -J-XX:+WhiteBoxAPI ParserTest.java
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI ParserTest
+ */
+
+import java.math.BigInteger;
+
+import sun.hotspot.parser.DiagnosticCommand;
+import sun.hotspot.parser.DiagnosticCommand.DiagnosticArgumentType;
+import sun.hotspot.WhiteBox;
+
+public class ParserTest {
+    WhiteBox wb;
+
+    public ParserTest() throws Exception {
+        wb = WhiteBox.getWhiteBox();
+
+        testNanoTime();
+        testJLong();
+        testBool();
+        testQuotes();
+        testMemorySize();
+    }
+
+    public static void main(String... args) throws Exception  {
+         new ParserTest();
+    }
+
+    public void testNanoTime() throws Exception {
+        String name = "name";
+        DiagnosticCommand arg = new DiagnosticCommand(name,
+                "desc", DiagnosticArgumentType.NANOTIME,
+                false, "0");
+        DiagnosticCommand[] args = {arg};
+
+        BigInteger bi = new BigInteger("7");
+        //These should work
+        parse(name, bi.toString(), name + "=7ns", args);
+
+        bi = bi.multiply(BigInteger.valueOf(1000));
+        parse(name, bi.toString(), name + "=7us", args);
+
+        bi = bi.multiply(BigInteger.valueOf(1000));
+        parse(name, bi.toString(), name + "=7ms", args);
+
+        bi = bi.multiply(BigInteger.valueOf(1000));
+        parse(name, bi.toString(), name + "=7s", args);
+
+        bi = bi.multiply(BigInteger.valueOf(60));
+        parse(name, bi.toString() , name + "=7m", args);
+
+        bi = bi.multiply(BigInteger.valueOf(60));
+        parse(name, bi.toString() , name + "=7h", args);
+
+        bi = bi.multiply(BigInteger.valueOf(24));
+        parse(name, bi.toString() , name + "=7d", args);
+
+        parse(name, "0", name + "=0", args);
+
+        shouldFail(name + "=7xs", args);
+        shouldFail(name + "=7mms", args);
+        shouldFail(name + "=7f", args);
+        //Currently, only value 0 is allowed without unit
+        shouldFail(name + "=7", args);
+    }
+
+    public void testJLong() throws Exception {
+        String name = "name";
+        DiagnosticCommand arg = new DiagnosticCommand(name,
+                "desc", DiagnosticArgumentType.JLONG,
+                false, "0");
+        DiagnosticCommand[] args = {arg};
+
+        wb.parseCommandLine(name + "=10", args);
+        parse(name, "10", name + "=10", args);
+        parse(name, "-5", name + "=-5", args);
+
+        //shouldFail(name + "=12m", args); <-- should fail, doesn't
+    }
+
+    public void testBool() throws Exception {
+        String name = "name";
+        DiagnosticCommand arg = new DiagnosticCommand(name,
+                "desc", DiagnosticArgumentType.BOOLEAN,
+                false, "false");
+        DiagnosticCommand[] args = {arg};
+
+        parse(name, "true", name + "=true", args);
+        parse(name, "false", name + "=false", args);
+        parse(name, "true", name, args);
+
+        //Empty commandline to parse, tests default value
+        //of the parameter "name"
+        parse(name, "false", "", args);
+    }
+
+    public void testQuotes() throws Exception {
+        String name = "name";
+        DiagnosticCommand arg1 = new DiagnosticCommand(name,
+                "desc", DiagnosticArgumentType.STRING,
+                false, null);
+        DiagnosticCommand arg2 = new DiagnosticCommand("arg",
+                "desc", DiagnosticArgumentType.STRING,
+                false, null);
+        DiagnosticCommand[] args = {arg1, arg2};
+
+        // try with a quoted value
+        parse(name, "Recording 1", name + "=\"Recording 1\"", args);
+        // try with a quoted argument
+        parse(name, "myrec", "\"" + name + "\"" + "=myrec", args);
+        // try with both a quoted value and a quoted argument
+        parse(name, "Recording 1", "\"" + name + "\"" + "=\"Recording 1\"", args);
+
+        // now the same thing but with other arguments after
+
+        // try with a quoted value
+        parse(name, "Recording 1", name + "=\"Recording 1\",arg=value", args);
+        // try with a quoted argument
+        parse(name, "myrec", "\"" + name + "\"" + "=myrec,arg=value", args);
+        // try with both a quoted value and a quoted argument
+        parse(name, "Recording 1", "\"" + name + "\"" + "=\"Recording 1\",arg=value", args);
+    }
+
+    public void testMemorySize() throws Exception {
+        String name = "name";
+        String defaultValue = "1024";
+        DiagnosticCommand arg = new DiagnosticCommand(name,
+                "desc", DiagnosticArgumentType.MEMORYSIZE,
+                false, defaultValue);
+        DiagnosticCommand[] args = {arg};
+
+        BigInteger bi = new BigInteger("7");
+        parse(name, bi.toString(), name + "=7b", args);
+
+        bi = bi.multiply(BigInteger.valueOf(1024));
+        parse(name, bi.toString(), name + "=7k", args);
+
+        bi = bi.multiply(BigInteger.valueOf(1024));
+        parse(name, bi.toString(), name + "=7m", args);
+
+        bi = bi.multiply(BigInteger.valueOf(1024));
+        parse(name, bi.toString(), name + "=7g", args);
+        parse(name, defaultValue, "", args);
+
+        //shouldFail(name + "=7gg", args); <---- should fail, doesn't
+        //shouldFail(name + "=7t", args);  <----- should fail, doesn't
+    }
+
+    public void parse(String searchName, String expectedValue,
+            String cmdLine, DiagnosticCommand[] argumentTypes) throws Exception {
+        //parseCommandLine will return an object array that looks like
+        //{<name of parsed object>, <of parsed object> ... }
+        Object[] res = wb.parseCommandLine(cmdLine, argumentTypes);
+        for (int i = 0; i < res.length-1; i+=2) {
+            String parsedName = (String) res[i];
+            if (searchName.equals(parsedName)) {
+                String parsedValue = (String) res[i+1];
+                if (expectedValue.equals(parsedValue)) {
+                    return;
+                } else {
+                    throw new Exception("Parsing of cmdline '" + cmdLine + "' failed!\n"
+                            + searchName + " parsed as " + parsedValue
+                            + "! Expected: " + expectedValue);
+                }
+            }
+        }
+        throw new Exception(searchName + " not found as a parsed Argument!");
+    }
+
+    private void shouldFail(String argument, DiagnosticCommand[] argumentTypes) throws Exception {
+        try {
+            wb.parseCommandLine(argument, argumentTypes);
+            throw new Exception("Parser accepted argument: " + argument);
+        } catch (IllegalArgumentException e) {
+            //expected
+        }
+    }
+}