changeset 645:bd441136a5ce

Merge
author kvn
date Thu, 19 Mar 2009 09:13:24 -0700
parents ba50942c8138 (current diff) 039a914095f4 (diff)
children 59f139e8a8d1 60bfce711da4 c89f86385056
files src/cpu/sparc/vm/sparc.ad src/cpu/x86/vm/assembler_x86.cpp src/cpu/x86/vm/c1_LIRAssembler_x86.cpp src/cpu/x86/vm/c1_Runtime1_x86.cpp src/cpu/x86/vm/interp_masm_x86_32.cpp src/cpu/x86/vm/interp_masm_x86_64.cpp src/cpu/x86/vm/stubGenerator_x86_32.cpp src/cpu/x86/vm/stubGenerator_x86_64.cpp src/cpu/x86/vm/x86_32.ad src/cpu/x86/vm/x86_64.ad src/os/linux/vm/os_linux.cpp src/os/solaris/vm/os_solaris.cpp src/os/windows/vm/os_windows.cpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/gc_implementation/g1/concurrentMark.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp src/share/vm/includeDB_core src/share/vm/memory/genCollectedHeap.cpp src/share/vm/memory/universe.cpp src/share/vm/memory/universe.hpp src/share/vm/oops/oop.inline.hpp src/share/vm/opto/classes.hpp src/share/vm/opto/compile.cpp src/share/vm/opto/graphKit.cpp src/share/vm/opto/lcm.cpp src/share/vm/opto/matcher.cpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/os.hpp
diffstat 81 files changed, 2366 insertions(+), 959 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Thu Mar 19 09:13:24 2009 -0700
@@ -118,9 +118,9 @@
   public long getJIntSize();
   public long getJLongSize();
   public long getJShortSize();
-  public long getHeapBase();
   public long getHeapOopSize();
-  public long getLogMinObjAlignmentInBytes();
+  public long getNarrowOopBase();
+  public int  getNarrowOopShift();
 
   public ReadResult readBytesFromProcess(long address, long numBytes)
     throws DebuggerException;
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Thu Mar 19 09:13:24 2009 -0700
@@ -56,8 +56,8 @@
   // heap data.
   protected long oopSize;
   protected long heapOopSize;
-  protected long heapBase;                 // heap base for compressed oops.
-  protected long logMinObjAlignmentInBytes; // Used to decode compressed oops.
+  protected long narrowOopBase;  // heap base for compressed oops.
+  protected int  narrowOopShift; // shift to decode compressed oops.
   // Should be initialized if desired by calling initCache()
   private PageCache cache;
 
@@ -159,10 +159,10 @@
     javaPrimitiveTypesConfigured = true;
   }
 
-  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignmentInBytes) {
-    this.heapBase = heapBase;
+  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift) {
     this.heapOopSize = heapOopSize;
-    this.logMinObjAlignmentInBytes = logMinObjAlignmentInBytes;
+    this.narrowOopBase = narrowOopBase;
+    this.narrowOopShift = narrowOopShift;
   }
 
   /** May be called by subclasses if desired to initialize the page
@@ -459,7 +459,7 @@
     long value = readCInteger(address, getHeapOopSize(), true);
     if (value != 0) {
       // See oop.inline.hpp decode_heap_oop
-      value = (long)(heapBase + (long)(value << logMinObjAlignmentInBytes));
+      value = (long)(narrowOopBase + (long)(value << narrowOopShift));
     }
     return value;
   }
@@ -545,10 +545,10 @@
     return heapOopSize;
   }
 
-  public long getHeapBase() {
-    return heapBase;
+  public long getNarrowOopBase() {
+    return narrowOopBase;
   }
-  public long getLogMinObjAlignmentInBytes() {
-    return logMinObjAlignmentInBytes;
+  public int getNarrowOopShift() {
+    return narrowOopShift;
   }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Thu Mar 19 09:13:24 2009 -0700
@@ -42,5 +42,5 @@
                                               long jintSize,
                                               long jlongSize,
                                               long jshortSize);
-  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignment);
+  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift);
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Thu Mar 19 09:13:24 2009 -0700
@@ -65,9 +65,10 @@
   public long      getJIntSize() throws RemoteException;
   public long      getJLongSize() throws RemoteException;
   public long      getJShortSize() throws RemoteException;
-  public long      getHeapBase() throws RemoteException;
   public long      getHeapOopSize() throws RemoteException;
-  public long      getLogMinObjAlignmentInBytes() throws RemoteException;
+  public long      getNarrowOopBase() throws RemoteException;
+  public int       getNarrowOopShift() throws RemoteException;
+
   public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
                                    long addrOrId2, boolean isAddress2) throws RemoteException;
   public int       getThreadHashCode(long addrOrId, boolean isAddress) throws RemoteException;
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Thu Mar 19 09:13:24 2009 -0700
@@ -85,9 +85,9 @@
       jlongSize    = remoteDebugger.getJLongSize();
       jshortSize   = remoteDebugger.getJShortSize();
       javaPrimitiveTypesConfigured = true;
-      heapBase     = remoteDebugger.getHeapBase();
+      narrowOopBase  = remoteDebugger.getNarrowOopBase();
+      narrowOopShift = remoteDebugger.getNarrowOopShift();
       heapOopSize  = remoteDebugger.getHeapOopSize();
-      logMinObjAlignmentInBytes  = remoteDebugger.getLogMinObjAlignmentInBytes();
     }
     catch (RemoteException e) {
       throw new DebuggerException(e);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Thu Mar 19 09:13:24 2009 -0700
@@ -114,17 +114,18 @@
     return debugger.getJShortSize();
   }
 
-  public long getHeapBase() throws RemoteException {
-    return debugger.getHeapBase();
-  }
-
   public long getHeapOopSize() throws RemoteException {
     return debugger.getHeapOopSize();
   }
 
-  public long getLogMinObjAlignmentInBytes() throws RemoteException {
-    return debugger.getLogMinObjAlignmentInBytes();
+  public long getNarrowOopBase() throws RemoteException {
+    return debugger.getNarrowOopBase();
   }
+
+  public int  getNarrowOopShift() throws RemoteException {
+    return debugger.getNarrowOopShift();
+  }
+
   public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
                                    long addrOrId2, boolean isAddress2) throws RemoteException {
     ThreadProxy t1 = getThreadProxy(addrOrId1, isAddress1);
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Thu Mar 19 09:13:24 2009 -0700
@@ -53,7 +53,8 @@
   // system obj array klass object
   private static sun.jvm.hotspot.types.OopField systemObjArrayKlassObjField;
 
-  private static AddressField heapBaseField;
+  private static AddressField narrowOopBaseField;
+  private static CIntegerField narrowOopShiftField;
 
   static {
     VM.registerVMInitializedObserver(new Observer() {
@@ -86,7 +87,8 @@
 
     systemObjArrayKlassObjField = type.getOopField("_systemObjArrayKlassObj");
 
-    heapBaseField = type.getAddressField("_heap_base");
+    narrowOopBaseField = type.getAddressField("_narrow_oop._base");
+    narrowOopShiftField = type.getCIntegerField("_narrow_oop._shift");
   }
 
   public Universe() {
@@ -100,14 +102,18 @@
     }
   }
 
-  public static long getHeapBase() {
-    if (heapBaseField.getValue() == null) {
+  public static long getNarrowOopBase() {
+    if (narrowOopBaseField.getValue() == null) {
       return 0;
     } else {
-      return heapBaseField.getValue().minus(null);
+      return narrowOopBaseField.getValue().minus(null);
     }
   }
 
+  public static int getNarrowOopShift() {
+    return (int)narrowOopShiftField.getValue();
+  }
+
   /** Returns "TRUE" iff "p" points into the allocated area of the heap. */
   public boolean isIn(Address p) {
     return heap().isIn(p);
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Wed Mar 18 11:37:48 2009 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu Mar 19 09:13:24 2009 -0700
@@ -342,8 +342,8 @@
       throw new RuntimeException("Attempt to initialize VM twice");
     }
     soleInstance = new VM(db, debugger, debugger.getMachineDescription().isBigEndian());
-    debugger.putHeapConst(Universe.getHeapBase(), soleInstance.getHeapOopSize(),
-                          soleInstance.logMinObjAlignmentInBytes);
+    debugger.putHeapConst(soleInstance.getHeapOopSize(), Universe.getNarrowOopBase(),
+                          Universe.getNarrowOopShift());
     for (Iterator iter = vmInitializedObservers.iterator(); iter.hasNext(); ) {
       ((Observer) iter.next()).update(null, null);
     }
--- a/make/windows/get_msc_ver.sh	Wed Mar 18 11:37:48 2009 -0400
+++ b/make/windows/get_msc_ver.sh	Thu Mar 19 09:13:24 2009 -0700
@@ -29,6 +29,7 @@
 # cl version 13.10.3077 returns "MSC_VER=1310"
 # cl version 14.00.30701 returns "MSC_VER=1399" (OLD_MSSDK version)
 # cl version 14.00.40310.41 returns "MSC_VER=1400"
+# cl version 15.00.21022.8 returns "MSC_VER=1500"
 
 # Note that we currently do not have a way to set HotSpotMksHome in
 # the batch build, but so far this has not seemed to be a problem. The
--- a/make/windows/makefiles/compile.make	Wed Mar 18 11:37:48 2009 -0400
+++ b/make/windows/makefiles/compile.make	Thu Mar 19 09:13:24 2009 -0700
@@ -170,11 +170,9 @@
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
-!if "$(BUILDARCH)" == "i486"
-# VS2005 on x86 restricts the use of certain libc functions without this
+# VS2005 and later restricts the use of certain libc functions without this
 CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
 !endif
-!endif
 
 !if "$(COMPILER_NAME)" == "VS2008"
 PRODUCT_OPT_OPTION   = /O2 /Oy-
@@ -185,11 +183,9 @@
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
-!if "$(BUILDARCH)" == "i486"
-# VS2005 on x86 restricts the use of certain libc functions without this
+# VS2005 and later restricts the use of certain libc functions without this
 CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
 !endif
-!endif
 
 # Compile for space above time.
 !if "$(Variant)" == "kernel"
--- a/make/windows/makefiles/sa.make	Wed Mar 18 11:37:48 2009 -0400
+++ b/make/windows/makefiles/sa.make	Thu Mar 19 09:13:24 2009 -0700
@@ -89,9 +89,11 @@
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
 !elseif "$(BUILDARCH)" == "amd64"
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+!if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
 SA_LINK_FLAGS = bufferoverflowU.lib
+!endif
 !else
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
 !endif
--- a/make/windows/makefiles/sanity.make	Wed Mar 18 11:37:48 2009 -0400
+++ b/make/windows/makefiles/sanity.make	Thu Mar 19 09:13:24 2009 -0700
@@ -27,9 +27,9 @@
 all: checkCL checkLink
 
 checkCL:
-	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" \
+	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" if "$(MSC_VER)" NEQ "1500" \
 	echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)).  Use FORCE_MSC_VER to override automatic detection.
 
 checkLink:
-	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" \
+	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" \
 	echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)).  Use FORCE_LINK_VER to override automatic detection.
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2767,6 +2767,268 @@
 }
 
 
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                                         Register super_klass,
+                                         Register temp_reg,
+                                         Register temp2_reg,
+                                         Label& L_success) {
+  Label L_failure, L_pop_to_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass,
+                                temp_reg, temp2_reg,
+                                &L_success, &L_failure, NULL);
+  Register sub_2 = sub_klass;
+  Register sup_2 = super_klass;
+  if (!sub_2->is_global())  sub_2 = L0;
+  if (!sup_2->is_global())  sup_2 = L1;
+
+  save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+  check_klass_subtype_slow_path(sub_2, sup_2,
+                                L2, L3, L4, L5,
+                                NULL, &L_pop_to_failure);
+
+  // on success:
+  restore();
+  ba(false, L_success);
+  delayed()->nop();
+
+  // on failure:
+  bind(L_pop_to_failure);
+  restore();
+  bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterConstant super_check_offset,
+                                        Register instanceof_hack) {
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
+                    Klass::super_check_offset_offset_in_bytes());
+
+  bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
+  bool need_slow_path = (must_load_sco ||
+                         super_check_offset.constant_or_zero() == sco_offset);
+
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp2_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1 || instanceof_hack != noreg ||
+         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
+         "at most one NULL in the batch, usually");
+
+  // Support for the instanceof hack, which uses delay slots to
+  // set a destination register to zero or one.
+  bool do_bool_sets = (instanceof_hack != noreg);
+#define BOOL_SET(bool_value)                            \
+  if (do_bool_sets && bool_value >= 0)                  \
+    set(bool_value, instanceof_hack)
+#define DELAYED_BOOL_SET(bool_value)                    \
+  if (do_bool_sets && bool_value >= 0)                  \
+    delayed()->set(bool_value, instanceof_hack);        \
+  else delayed()->nop()
+  // Hacked ba(), which may only be used just before L_fallthrough.
+#define FINAL_JUMP(label, bool_value)                   \
+  if (&(label) == &L_fallthrough) {                     \
+    BOOL_SET(bool_value);                               \
+  } else {                                              \
+    ba((do_bool_sets && bool_value >= 0), label);       \
+    DELAYED_BOOL_SET(bool_value);                       \
+  }
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  cmp(super_klass, sub_klass);
+  brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
+  DELAYED_BOOL_SET(1);
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    // The super check offset is always positive...
+    lduw(super_klass, sco_offset, temp2_reg);
+    super_check_offset = RegisterConstant(temp2_reg);
+  }
+  ld_ptr(sub_klass, super_check_offset, temp_reg);
+  cmp(super_klass, temp_reg);
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
+    delayed(); if (do_bool_sets)  BOOL_SET(1);
+    // if !do_bool_sets, sneak the next cmp into the delay slot:
+    cmp(super_check_offset.as_register(), sc_offset);
+
+    if (L_failure == &L_fallthrough) {
+      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
+      delayed()->nop();
+      BOOL_SET(0);  // fallthrough on failure
+    } else {
+      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
+      DELAYED_BOOL_SET(0);
+      FINAL_JUMP(*L_slow_path, -1);  // -1 => vanilla delay slot
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
+      DELAYED_BOOL_SET(1);
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
+      delayed()->nop();
+      FINAL_JUMP(*L_success, 1);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
+      DELAYED_BOOL_SET(1);
+      BOOL_SET(0);
+    } else {
+      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
+      DELAYED_BOOL_SET(0);
+      FINAL_JUMP(*L_success, 1);
+    }
+  }
+
+  bind(L_fallthrough);
+
+#undef final_jump
+#undef bool_set
+#undef DELAYED_BOOL_SET
+#undef final_jump
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register count_temp,
+                                                   Register scan_temp,
+                                                   Register scratch_reg,
+                                                   Register coop_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure) {
+  assert_different_registers(sub_klass, super_klass,
+                             count_temp, scan_temp, scratch_reg, coop_reg);
+
+  Label L_fallthrough, L_loop;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_supers_offset_in_bytes());
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  inc_counter((address) pst_counter, count_temp, scan_temp);
+#endif
+
+  // We will consult the secondary-super array.
+  ld_ptr(sub_klass, ss_offset, scan_temp);
+
+  // Compress superclass if necessary.
+  Register search_key = super_klass;
+  bool decode_super_klass = false;
+  if (UseCompressedOops) {
+    if (coop_reg != noreg) {
+      encode_heap_oop_not_null(super_klass, coop_reg);
+      search_key = coop_reg;
+    } else {
+      encode_heap_oop_not_null(super_klass);
+      decode_super_klass = true; // scarce temps!
+    }
+    // The superclass is never null; it would be a basic system error if a null
+    // pointer were to sneak in here.  Note that we have already loaded the
+    // Klass::super_check_offset from the super_klass in the fast path,
+    // so if there is a null in that register, we are already in the afterlife.
+  }
+
+  // Load the array length.  (Positive movl does right thing on LP64.)
+  lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp);
+
+  // Check for empty secondary super list
+  tst(count_temp);
+
+  // Top of search loop
+  bind(L_loop);
+  br(Assembler::equal, false, Assembler::pn, *L_failure);
+  delayed()->add(scan_temp, heapOopSize, scan_temp);
+  assert(heapOopSize != 0, "heapOopSize should be initialized");
+
+  // Skip the array header in all array accesses.
+  int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
+  elem_offset -= heapOopSize;   // the scan pointer was pre-incremented also
+
+  // Load next super to check
+  if (UseCompressedOops) {
+    // Don't use load_heap_oop; we don't want to decode the element.
+    lduw(   scan_temp, elem_offset, scratch_reg );
+  } else {
+    ld_ptr( scan_temp, elem_offset, scratch_reg );
+  }
+
+  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
+  cmp(scratch_reg, search_key);
+
+  // A miss means we are NOT a subtype and need to keep looping
+  brx(Assembler::notEqual, false, Assembler::pn, L_loop);
+  delayed()->deccc(count_temp); // decrement trip counter in delay slot
+
+  // Falling out the bottom means we found a hit; we ARE a subtype
+  if (decode_super_klass) decode_heap_oop(super_klass);
+
+  // Success.  Cache the super we found and proceed in triumph.
+  st_ptr(super_klass, sub_klass, sc_offset);
+
+  if (L_success != &L_fallthrough) {
+    ba(false, *L_success);
+    delayed()->nop();
+  }
+
+  bind(L_fallthrough);
+}
+
+
+
+
 void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
                                           Register temp_reg,
                                           Label& done, Label* slow_case,
@@ -4316,7 +4578,13 @@
 
 void MacroAssembler::encode_heap_oop(Register src, Register dst) {
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   verify_oop(src);
+  if (Universe::narrow_oop_base() == NULL) {
+    srlx(src, LogMinObjAlignmentInBytes, dst);
+    return;
+  }
   Label done;
   if (src == dst) {
     // optimize for frequent case src == dst
@@ -4338,26 +4606,39 @@
 
 void MacroAssembler::encode_heap_oop_not_null(Register r) {
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   verify_oop(r);
-  sub(r, G6_heapbase, r);
+  if (Universe::narrow_oop_base() != NULL)
+    sub(r, G6_heapbase, r);
   srlx(r, LogMinObjAlignmentInBytes, r);
 }
 
 void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   verify_oop(src);
-  sub(src, G6_heapbase, dst);
-  srlx(dst, LogMinObjAlignmentInBytes, dst);
+  if (Universe::narrow_oop_base() == NULL) {
+    srlx(src, LogMinObjAlignmentInBytes, dst);
+  } else {
+    sub(src, G6_heapbase, dst);
+    srlx(dst, LogMinObjAlignmentInBytes, dst);
+  }
 }
 
 // Same algorithm as oops.inline.hpp decode_heap_oop.
 void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
   assert (UseCompressedOops, "must be compressed");
-  Label done;
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   sllx(src, LogMinObjAlignmentInBytes, dst);
-  bpr(rc_nz, true, Assembler::pt, dst, done);
-  delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
-  bind(done);
+  if (Universe::narrow_oop_base() != NULL) {
+    Label done;
+    bpr(rc_nz, true, Assembler::pt, dst, done);
+    delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
+    bind(done);
+  }
   verify_oop(dst);
 }
 
@@ -4366,8 +4647,11 @@
   // pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   sllx(r, LogMinObjAlignmentInBytes, r);
-  add(r, G6_heapbase, r);
+  if (Universe::narrow_oop_base() != NULL)
+    add(r, G6_heapbase, r);
 }
 
 void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
@@ -4375,14 +4659,17 @@
   // pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
   assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
   sllx(src, LogMinObjAlignmentInBytes, dst);
-  add(dst, G6_heapbase, dst);
+  if (Universe::narrow_oop_base() != NULL)
+    add(dst, G6_heapbase, dst);
 }
 
 void MacroAssembler::reinit_heapbase() {
   if (UseCompressedOops) {
     // call indirectly to solve generation ordering problem
-    Address base(G6_heapbase, (address)Universe::heap_base_addr());
+    Address base(G6_heapbase, (address)Universe::narrow_oop_base_addr());
     load_ptr_contents(base, G6_heapbase);
   }
 }
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2327,6 +2327,46 @@
                                Register temp_reg, Register temp2_reg,
                                Label& no_such_interface);
 
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg and temp2_reg.
+  // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterConstant super_check_offset = RegisterConstant(-1),
+                Register instanceof_hack = noreg);
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg can be noreg, if no temps are available.
+  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
+  // Updates the sub's secondary super cache as necessary.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Register temp3_reg,
+                                     Register temp4_reg,
+                                     Label* L_success,
+                                     Label* L_failure);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Register temp2_reg,
+                           Label& L_success);
+
+
   // Stack overflow checking
 
   // Note: this clobbers G3_scratch
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2393,23 +2393,11 @@
 
     // get instance klass
     load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL);
-    // get super_check_offset
-    load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
-    // See if we get an immediate positive hit
-    __ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register());
-    __ cmp(k_RInfo, O7);
-    __ br(Assembler::equal, false, Assembler::pn, done);
-    __ delayed()->nop();
-    // check for immediate negative hit
-    __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-    __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
-    __ delayed()->nop();
-    // check for self
-    __ cmp(klass_RInfo, k_RInfo);
-    __ br(Assembler::equal, false, Assembler::pn, done);
-    __ delayed()->nop();
-
-    // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL);
+
+    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
     __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
     __ delayed()->nop();
     __ cmp(G3, 0);
@@ -2493,58 +2481,30 @@
       __ delayed()->nop();
       __ bind(done);
     } else {
+      bool need_slow_path = true;
       if (k->is_loaded()) {
-        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
-
-        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
-          // See if we get an immediate positive hit
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
-          __ delayed()->nop();
-        } else {
-          // See if we get an immediate positive hit
-          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::equal, false, Assembler::pn, done);
-          // check for self
-          __ delayed()->cmp(klass_RInfo, k_RInfo);
-          __ br(Assembler::equal, false, Assembler::pn, done);
-          __ delayed()->nop();
-
-          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
-          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-          __ delayed()->nop();
-          __ cmp(G3, 0);
-          __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
-          __ delayed()->nop();
-        }
-        __ bind(done);
+        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+          need_slow_path = false;
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
+                                         (need_slow_path ? &done : NULL),
+                                         stub->entry(), NULL,
+                                         RegisterConstant(k->super_check_offset()));
       } else {
-        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
-
-        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
-        // See if we get an immediate positive hit
-        load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT);
-        __ cmp(k_RInfo, O7);
-        __ br(Assembler::equal, false, Assembler::pn, done);
-        __ delayed()->nop();
-        // check for immediate negative hit
-        __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
-        // check for self
-        __ delayed()->cmp(klass_RInfo, k_RInfo);
-        __ br(Assembler::equal, false, Assembler::pn, done);
-        __ delayed()->nop();
-
-        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7,
+                                         &done, stub->entry(), NULL);
+      }
+      if (need_slow_path) {
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
         __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
         __ delayed()->nop();
         __ cmp(G3, 0);
         __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
         __ delayed()->nop();
-        __ bind(done);
       }
-
+      __ bind(done);
     }
     __ mov(obj, dst);
   } else if (code == lir_instanceof) {
@@ -2582,58 +2542,32 @@
       __ set(0, dst);
       __ bind(done);
     } else {
+      bool need_slow_path = true;
       if (k->is_loaded()) {
-        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
-        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
-
-        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
-          // See if we get an immediate positive hit
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::equal, true, Assembler::pt, done);
-          __ delayed()->set(1, dst);
-          __ set(0, dst);
-          __ bind(done);
-        } else {
-          // See if we get an immediate positive hit
-          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
-          __ cmp(Rtmp1, k_RInfo );
-          __ br(Assembler::equal, true, Assembler::pt, done);
-          __ delayed()->set(1, dst);
-          // check for self
-          __ cmp(klass_RInfo, k_RInfo);
-          __ br(Assembler::equal, true, Assembler::pt, done);
-          __ delayed()->set(1, dst);
-
-          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
-          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-          __ delayed()->nop();
-          __ mov(G3, dst);
-          __ bind(done);
-        }
+        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+          need_slow_path = false;
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg,
+                                         (need_slow_path ? &done : NULL),
+                                         (need_slow_path ? &done : NULL), NULL,
+                                         RegisterConstant(k->super_check_offset()),
+                                         dst);
       } else {
         assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
-
-        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL);
-        // See if we get an immediate positive hit
-        load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT);
-        __ cmp(k_RInfo, O7);
-        __ br(Assembler::equal, true, Assembler::pt, done);
-        __ delayed()->set(1, dst);
-        // check for immediate negative hit
-        __ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ br(Assembler::notEqual, true, Assembler::pt, done);
-        __ delayed()->set(0, dst);
-        // check for self
-        __ cmp(klass_RInfo, k_RInfo);
-        __ br(Assembler::equal, true, Assembler::pt, done);
-        __ delayed()->set(1, dst);
-
-        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst,
+                                         &done, &done, NULL,
+                                         RegisterConstant(-1),
+                                         dst);
+      }
+      if (need_slow_path) {
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
         __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
         __ delayed()->nop();
         __ mov(G3, dst);
-        __ bind(done);
       }
+      __ bind(done);
     }
   } else {
     ShouldNotReachHere();
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -714,38 +714,19 @@
         //      sub  : G3, argument, destroyed
         //      super: G1, argument, not changed
         //      raddr: O7, blown by call
-        Label loop, miss;
+        Label miss;
 
         __ save_frame(0);               // Blow no registers!
 
-        __ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
-        __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0
-        __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array
-        __ clr(L4);                     // Index
-        // Load a little early; will load 1 off the end of the array.
-        // Ok for now; revisit if we have other uses of this routine.
-        __ ld_ptr(L1,0,L2);             // Will load a little early
-
-        // The scan loop
-        __ bind(loop);
-        __ add(L1,wordSize,L1); // Bump by OOP size
-        __ cmp(L4,L0);
-        __ br(Assembler::equal,false,Assembler::pn,miss);
-        __ delayed()->inc(L4);  // Bump index
-        __ subcc(L2,G1,L3);             // Check for match; zero in L3 for a hit
-        __ brx( Assembler::notEqual, false, Assembler::pt, loop );
-        __ delayed()->ld_ptr(L1,0,L2); // Will load a little early
-
-        // Got a hit; report success; set cache
-        __ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
+        __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);
 
         __ mov(1, G3);
-        __ ret();                       // Result in G5 is ok; flags set
+        __ ret();                       // Result in G5 is 'true'
         __ delayed()->restore();        // free copy or add can go here
 
         __ bind(miss);
         __ mov(0, G3);
-        __ ret();                       // Result in G5 is ok; flags set
+        __ ret();                       // Result in G5 is 'false'
         __ delayed()->restore();        // free copy or add can go here
       }
 
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -866,65 +866,18 @@
                                                   Register Rtmp2,
                                                   Register Rtmp3,
                                                   Label &ok_is_subtype ) {
-  Label not_subtype, loop;
+  Label not_subtype;
 
   // Profile the not-null value's klass.
   profile_typecheck(Rsub_klass, Rtmp1);
 
-  // Load the super-klass's check offset into Rtmp1
-  ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 );
-  // Load from the sub-klass's super-class display list, or a 1-word cache of
-  // the secondary superclass list, or a failing value with a sentinel offset
-  // if the super-klass is an interface or exceptionally deep in the Java
-  // hierarchy and we have to scan the secondary superclass list the hard way.
-  ld_ptr( Rsub_klass, Rtmp1, Rtmp2 );
-  // See if we get an immediate positive hit
-  cmp( Rtmp2, Rsuper_klass );
-  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
-  // In the delay slot, check for immediate negative hit
-  delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
-  br( Assembler::notEqual, false, Assembler::pt, not_subtype );
-  // In the delay slot, check for self
-  delayed()->cmp( Rsub_klass, Rsuper_klass );
-  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
-
-  // Now do a linear scan of the secondary super-klass chain.
-  delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 );
-
-  // compress superclass
-  if (UseCompressedOops) encode_heap_oop(Rsuper_klass);
-
-  // Rtmp2 holds the objArrayOop of secondary supers.
-  ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length
-  // Check for empty secondary super list
-  tst(Rtmp1);
-
-  // Top of search loop
-  bind( loop );
-  br( Assembler::equal, false, Assembler::pn, not_subtype );
-  delayed()->nop();
-
-  // load next super to check
-  if (UseCompressedOops) {
-    lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
-    // Bump array pointer forward one oop
-    add( Rtmp2, 4, Rtmp2 );
-  } else {
-    ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
-    // Bump array pointer forward one oop
-    add( Rtmp2, wordSize, Rtmp2);
-  }
-  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
-  cmp( Rtmp3, Rsuper_klass );
-  // A miss means we are NOT a subtype and need to keep looping
-  brx( Assembler::notEqual, false, Assembler::pt, loop );
-  delayed()->deccc( Rtmp1 );    // dec trip counter in delay slot
-  // Falling out the bottom means we found a hit; we ARE a subtype
-  if (UseCompressedOops) decode_heap_oop(Rsuper_klass);
-  br( Assembler::always, false, Assembler::pt, ok_is_subtype );
-  // Update the cache
-  delayed()->st_ptr( Rsuper_klass, Rsub_klass,
-                     sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
+  check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass,
+                                Rtmp1, Rtmp2,
+                                &ok_is_subtype, &not_subtype, NULL);
+
+  check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass,
+                                Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg,
+                                &ok_is_subtype, NULL);
 
   bind(not_subtype);
   profile_typecheck_failed(Rtmp1);
--- a/src/cpu/sparc/vm/sparc.ad	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/sparc.ad	Thu Mar 19 09:13:24 2009 -0700
@@ -547,7 +547,11 @@
     int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
     int klass_load_size;
     if (UseCompressedOops) {
-      klass_load_size = 3*BytesPerInstWord; // see MacroAssembler::load_klass()
+      assert(Universe::heap() != NULL, "java heap should be initialized");
+      if (Universe::narrow_oop_base() == NULL)
+        klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
+      else
+        klass_load_size = 3*BytesPerInstWord;
     } else {
       klass_load_size = 1*BytesPerInstWord;
     }
@@ -1601,9 +1605,11 @@
   st->print_cr("\nUEP:");
 #ifdef    _LP64
   if (UseCompressedOops) {
+    assert(Universe::heap() != NULL, "java heap should be initialized");
     st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
     st->print_cr("\tSLL    R_G5,3,R_G5");
-    st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
+    if (Universe::narrow_oop_base() != NULL)
+      st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
   } else {
     st->print_cr("\tLDX    [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
   }
@@ -2502,7 +2508,11 @@
       __ load_klass(O0, G3_scratch);
       int klass_load_size;
       if (UseCompressedOops) {
-        klass_load_size = 3*BytesPerInstWord;
+        assert(Universe::heap() != NULL, "java heap should be initialized");
+        if (Universe::narrow_oop_base() == NULL)
+          klass_load_size = 2*BytesPerInstWord;
+        else
+          klass_load_size = 3*BytesPerInstWord;
       } else {
         klass_load_size = 1*BytesPerInstWord;
       }
@@ -9005,6 +9015,33 @@
   ins_pipe(long_memory_op);
 %}
 
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(iRegI dst, iRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "POPC   $src, $dst" %}
+  ins_encode %{
+    __ popc($src$$Register, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegI dst, iRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  format %{ "POPC   $src, $dst" %}
+  ins_encode %{
+    __ popc($src$$Register, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 // ============================================================================
 //------------Bytes reverse--------------------------------------------------
 
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -900,19 +900,7 @@
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
     address start = __ pc();
-    Label loop, miss;
-
-    // Compare super with sub directly, since super is not in its own SSA.
-    // The compiler used to emit this test, but we fold it in here,
-    // to increase overall code density, with no real loss of speed.
-    { Label L;
-      __ cmp(O1, O2);
-      __ brx(Assembler::notEqual, false, Assembler::pt, L);
-      __ delayed()->nop();
-      __ retl();
-      __ delayed()->addcc(G0,0,O0); // set Z flags, zero result
-      __ bind(L);
-    }
+    Label miss;
 
 #if defined(COMPILER2) && !defined(_LP64)
     // Do not use a 'save' because it blows the 64-bit O registers.
@@ -936,56 +924,12 @@
     Register L2_super   = L2;
     Register L3_index   = L3;
 
-#ifdef _LP64
-    Register L4_ooptmp  = L4;
-
-    if (UseCompressedOops) {
-      // this must be under UseCompressedOops check, as we rely upon fact
-      // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save
-      // on stack, see several lines above
-      __ encode_heap_oop(Rsuper, L4_ooptmp);
-    }
-#endif
-
-    inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
-
-    __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
-    __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
-    __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
-    __ clr(L3_index);           // zero index
-    // Load a little early; will load 1 off the end of the array.
-    // Ok for now; revisit if we have other uses of this routine.
-    if (UseCompressedOops) {
-      __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early
-    } else {
-      __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
-    }
-
-    assert(heapOopSize != 0, "heapOopSize should be initialized");
-    // The scan loop
-    __ BIND(loop);
-    __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
-    __ cmp(L3_index,L0_ary_len);
-    __ br(Assembler::equal,false,Assembler::pn,miss);
-    __ delayed()->inc(L3_index); // Bump index
-
-    if (UseCompressedOops) {
-#ifdef  _LP64
-      __ subcc(L2_super,L4_ooptmp,Rret);   // Check for match; zero in Rret for a hit
-      __ br( Assembler::notEqual, false, Assembler::pt, loop );
-      __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early
-#else
-      ShouldNotReachHere();
-#endif
-    } else {
-      __ subcc(L2_super,Rsuper,Rret);   // Check for match; zero in Rret for a hit
-      __ brx( Assembler::notEqual, false, Assembler::pt, loop );
-      __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
-    }
-
-    // Got a hit; report success; set cache.  Cache load doesn't
-    // happen here; for speed it is directly emitted by the compiler.
-    __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
+    __ check_klass_subtype_slow_path(Rsub, Rsuper,
+                                     L0, L1, L2, L3,
+                                     NULL, &miss);
+
+    // Match falls through here.
+    __ addcc(G0,0,Rret);        // set Z flags, Z result
 
 #if defined(COMPILER2) && !defined(_LP64)
     __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
@@ -999,7 +943,6 @@
     __ delayed()->restore();
 #endif
 
-    // Hit or miss falls through here
     __ BIND(miss);
     __ addcc(G0,1,Rret);        // set NZ flags, NZ result
 
@@ -2330,51 +2273,31 @@
                            Register super_check_offset,
                            Register super_klass,
                            Register temp,
-                           Label& L_success,
-                           Register deccc_hack = noreg) {
+                           Label& L_success) {
     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
 
     BLOCK_COMMENT("type_check:");
 
-    Label L_miss;
+    Label L_miss, L_pop_to_miss;
 
     assert_clean_int(super_check_offset, temp);
 
-    // maybe decrement caller's trip count:
-#define DELAY_SLOT delayed();   \
-    { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); }
-
-    // if the pointers are equal, we are done (e.g., String[] elements)
-    __ cmp(sub_klass, super_klass);
-    __ brx(Assembler::equal, true, Assembler::pt, L_success);
-    __ DELAY_SLOT;
-
-    // check the supertype display:
-    __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
-    __ cmp(super_klass,                      temp); // test the super type
-    __ brx(Assembler::equal, true, Assembler::pt, L_success);
-    __ DELAY_SLOT;
-
-    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_super_cache_offset_in_bytes());
-    __ cmp(super_klass, sc_offset);
-    __ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
-    __ delayed()->nop();
-
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
+                                     &L_success, &L_miss, NULL,
+                                     super_check_offset);
+
+    BLOCK_COMMENT("type_check_slow_path:");
     __ save_frame(0);
-    __ mov(sub_klass->after_save(), O1);
-    // mov(super_klass->after_save(), O2); //fill delay slot
-    assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation");
-    __ call(StubRoutines::Sparc::_partial_subtype_check);
-    __ delayed()->mov(super_klass->after_save(), O2);
+    __ check_klass_subtype_slow_path(sub_klass->after_save(),
+                                     super_klass->after_save(),
+                                     L0, L1, L2, L4,
+                                     NULL, &L_pop_to_miss);
+    __ ba(false, L_success);
+    __ delayed()->restore();
+
+    __ bind(L_pop_to_miss);
     __ restore();
 
-    // Upon return, the condition codes are already set.
-    __ brx(Assembler::equal, true, Assembler::pt, L_success);
-    __ DELAY_SLOT;
-
-#undef DELAY_SLOT
-
     // Fall through on failure!
     __ BIND(L_miss);
   }
@@ -2411,7 +2334,7 @@
     gen_write_ref_array_pre_barrier(O1, O2);
 
 #ifdef ASSERT
-    // We sometimes save a frame (see partial_subtype_check below).
+    // We sometimes save a frame (see generate_type_check below).
     // If this will cause trouble, let's fail now instead of later.
     __ save_frame(0);
     __ restore();
@@ -2455,41 +2378,39 @@
     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
     __ align(16);
 
-    __ bind(store_element);
-    // deccc(G1_remain);                // decrement the count (hoisted)
+    __ BIND(store_element);
+    __ deccc(G1_remain);                // decrement the count
     __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
     __ inc(O5_offset, heapOopSize);     // step to next offset
     __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
     __ delayed()->set(0, O0);           // return -1 on success
 
     // ======== loop entry is here ========
-    __ bind(load_element);
+    __ BIND(load_element);
     __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
     __ br_null(G3_oop, true, Assembler::pt, store_element);
-    __ delayed()->deccc(G1_remain);     // decrement the count
+    __ delayed()->nop();
 
     __ load_klass(G3_oop, G4_klass); // query the object klass
 
     generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
                         // branch to this on success:
-                        store_element,
-                        // decrement this on success:
-                        G1_remain);
+                        store_element);
     // ======== end loop ========
 
     // It was a real error; we must depend on the caller to finish the job.
     // Register G1 has number of *remaining* oops, O2 number of *total* oops.
     // Emit GC store barriers for the oops we have copied (O2 minus G1),
     // and report their number to the caller.
-    __ bind(fail);
+    __ BIND(fail);
     __ subcc(O2_count, G1_remain, O2_count);
     __ brx(Assembler::zero, false, Assembler::pt, done);
     __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
 
-    __ bind(do_card_marks);
+    __ BIND(do_card_marks);
     gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
 
-    __ bind(done);
+    __ BIND(done);
     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
     __ retl();
     __ delayed()->nop();             // return value in 00
@@ -2942,14 +2863,15 @@
     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
     StubRoutines::_fence_entry               = generate_fence();
 #endif  // COMPILER2 !=> _LP64
-
-    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
   }
 
 
   void generate_all() {
     // Generates all stubs and initializes the entry points
 
+    // Generate partial_subtype_check first here since its code depends on
+    // UseZeroBaseCompressedOops which is defined after heap initialization.
+    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
     // These entry points require SharedInfo::stack0 to be set up in non-core builds
     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -72,6 +72,9 @@
         FLAG_SET_ERGO(bool, UseCompressedOops, false);
       }
     }
+    // 32-bit oops don't make sense for the 64-bit VM on sparc
+    // since the 32-bit VM has the same registers and smaller objects.
+    Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
 #endif // _LP64
 #ifdef COMPILER2
     // Indirect branch is the same cost as direct
@@ -89,16 +92,26 @@
 #endif
   }
 
+  // Use hardware population count instruction if available.
+  if (has_hardware_popc()) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      UsePopCountInstruction = true;
+    }
+  }
+
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v8() ? ", has_v8" : ""),
                (has_v9() ? ", has_v9" : ""),
+               (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", has_vis1" : ""),
                (has_vis2() ? ", has_vis2" : ""),
                (is_ultra3() ? ", is_ultra3" : ""),
                (is_sun4v() ? ", is_sun4v" : ""),
                (is_niagara1() ? ", is_niagara1" : ""),
-               (!has_hardware_int_muldiv() ? ", no-muldiv" : ""),
+               (is_niagara1_plus() ? ", is_niagara1_plus" : ""),
+               (!has_hardware_mul32() ? ", no-mul32" : ""),
+               (!has_hardware_div32() ? ", no-div32" : ""),
                (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
 
   // buf is started with ", " or is empty
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,34 +25,38 @@
 class VM_Version: public Abstract_VM_Version {
 protected:
   enum Feature_Flag {
-    v8_instructions     = 0,
-    hardware_int_muldiv = 1,
-    hardware_fsmuld     = 2,
-    v9_instructions     = 3,
-    vis1_instructions   = 4,
-    vis2_instructions   = 5,
-    sun4v_instructions  = 6
+    v8_instructions    = 0,
+    hardware_mul32     = 1,
+    hardware_div32     = 2,
+    hardware_fsmuld    = 3,
+    hardware_popc      = 4,
+    v9_instructions    = 5,
+    vis1_instructions  = 6,
+    vis2_instructions  = 7,
+    sun4v_instructions = 8
   };
 
   enum Feature_Flag_Set {
-    unknown_m             = 0,
-    all_features_m        = -1,
+    unknown_m           = 0,
+    all_features_m      = -1,
 
-    v8_instructions_m     = 1 << v8_instructions,
-    hardware_int_muldiv_m = 1 << hardware_int_muldiv,
-    hardware_fsmuld_m     = 1 << hardware_fsmuld,
-    v9_instructions_m     = 1 << v9_instructions,
-    vis1_instructions_m   = 1 << vis1_instructions,
-    vis2_instructions_m   = 1 << vis2_instructions,
-    sun4v_m               = 1 << sun4v_instructions,
+    v8_instructions_m   = 1 << v8_instructions,
+    hardware_mul32_m    = 1 << hardware_mul32,
+    hardware_div32_m    = 1 << hardware_div32,
+    hardware_fsmuld_m   = 1 << hardware_fsmuld,
+    hardware_popc_m     = 1 << hardware_popc,
+    v9_instructions_m   = 1 << v9_instructions,
+    vis1_instructions_m = 1 << vis1_instructions,
+    vis2_instructions_m = 1 << vis2_instructions,
+    sun4v_m             = 1 << sun4v_instructions,
 
-    generic_v8_m          = v8_instructions_m | hardware_int_muldiv_m | hardware_fsmuld_m,
-    generic_v9_m          = generic_v8_m | v9_instructions_m | vis1_instructions_m,
-    ultra3_m              = generic_v9_m | vis2_instructions_m,
+    generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
+    generic_v9_m        = generic_v8_m | v9_instructions_m,
+    ultra3_m            = generic_v9_m | vis1_instructions_m | vis2_instructions_m,
 
     // Temporary until we have something more accurate
-    niagara1_unique_m     = sun4v_m,
-    niagara1_m            = generic_v9_m | niagara1_unique_m
+    niagara1_unique_m   = sun4v_m,
+    niagara1_m          = generic_v9_m | niagara1_unique_m
   };
 
   static int  _features;
@@ -62,7 +66,7 @@
   static int  determine_features();
   static int  platform_features(int features);
 
-  static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; }
+  static bool is_niagara1(int features) { return (features & sun4v_m) != 0; }
 
   static int maximum_niagara1_processor_count() { return 32; }
   // Returns true if the platform is in the niagara line and
@@ -76,8 +80,10 @@
   // Instruction support
   static bool has_v8()                  { return (_features & v8_instructions_m) != 0; }
   static bool has_v9()                  { return (_features & v9_instructions_m) != 0; }
-  static bool has_hardware_int_muldiv() { return (_features & hardware_int_muldiv_m) != 0; }
+  static bool has_hardware_mul32()      { return (_features & hardware_mul32_m) != 0; }
+  static bool has_hardware_div32()      { return (_features & hardware_div32_m) != 0; }
   static bool has_hardware_fsmuld()     { return (_features & hardware_fsmuld_m) != 0; }
+  static bool has_hardware_popc()       { return (_features & hardware_popc_m) != 0; }
   static bool has_vis1()                { return (_features & vis1_instructions_m) != 0; }
   static bool has_vis2()                { return (_features & vis2_instructions_m) != 0; }
 
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -221,13 +221,15 @@
     if (is_vtable_stub) {
       // ld;ld;ld,jmp,nop
       const int basic = 5*BytesPerInstWord +
-                        // shift;add for load_klass
-                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
+                        // shift;add for load_klass (only shift with zero heap based)
+                        (UseCompressedOops ?
+                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
       return basic + slop;
     } else {
       const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
-                        // shift;add for load_klass
-                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
+                        // shift;add for load_klass (only shift with zero heap based)
+                        (UseCompressedOops ?
+                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
       return (basic + slop);
     }
   }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -727,7 +727,7 @@
   }
 
 #ifdef _LP64
-  assert(false, "fix locate_operand");
+  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
 #else
   assert(which == imm_operand, "instruction has only an imm field");
 #endif // LP64
@@ -2193,6 +2193,25 @@
   emit_byte(0x58 | encode);
 }
 
+void Assembler::popcntl(Register dst, Address src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefix(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_operand(dst, src);
+}
+
+void Assembler::popcntl(Register dst, Register src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  emit_byte(0xF3);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::popf() {
   emit_byte(0x9D);
 }
@@ -3224,12 +3243,6 @@
   emit_byte(0xF1);
 }
 
-void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
-  InstructionMark im(this);
-  int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
-  emit_data((int)imm32, rspec, format);
-}
 
 #ifndef _LP64
 
@@ -3249,6 +3262,12 @@
   emit_data((int)imm32, rspec, 0);
 }
 
+void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  int encode = prefix_and_encode(dst->encoding());
+  emit_byte(0xB8 | encode);
+  emit_data((int)imm32, rspec, 0);
+}
 
 void Assembler::popa() { // 32bit
   emit_byte(0x61);
@@ -3857,6 +3876,37 @@
   emit_data64(imm64, rspec);
 }
 
+void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  int encode = prefix_and_encode(dst->encoding());
+  emit_byte(0xB8 | encode);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
+void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  prefix(dst);
+  emit_byte(0xC7);
+  emit_operand(rax, dst, 4);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
+void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  int encode = prefix_and_encode(src1->encoding());
+  emit_byte(0x81);
+  emit_byte(0xF8 | encode);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
+void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
+  InstructionMark im(this);
+  prefix(src1);
+  emit_byte(0x81);
+  emit_operand(rax, src1, 4);
+  emit_data((int)imm32, rspec, narrow_oop_operand);
+}
+
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
@@ -4049,6 +4099,25 @@
   addq(rsp, 16 * wordSize);
 }
 
+void Assembler::popcntq(Register dst, Address src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_operand(dst, src);
+}
+
+void Assembler::popcntq(Register dst, Register src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  emit_byte(0xF3);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::popq(Address dst) {
   InstructionMark im(this);
   prefixq(dst);
@@ -7217,6 +7286,225 @@
 }
 
 
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success) {
+  Label L_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+  bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterConstant super_check_offset) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
+                    Klass::super_check_offset_offset_in_bytes());
+  Address super_check_offset_addr(super_klass, sco_offset);
+
+  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
+  // range of a jccb.  If this routine grows larger, reconsider at
+  // least some of these.
+#define local_jcc(assembler_cond, label)                                \
+  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
+  else                             jcc( assembler_cond, label) /*omit semi*/
+
+  // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label)                                                \
+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
+  else                            jmp(label)                /*omit semi*/
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  cmpptr(sub_klass, super_klass);
+  local_jcc(Assembler::equal, *L_success);
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    // Positive movl does right thing on LP64.
+    movl(temp_reg, super_check_offset_addr);
+    super_check_offset = RegisterConstant(temp_reg);
+  }
+  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
+  cmpptr(super_klass, super_check_addr); // load displayed supertype
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    local_jcc(Assembler::equal, *L_success);
+    cmpl(super_check_offset.as_register(), sc_offset);
+    if (L_failure == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_slow_path);
+    } else {
+      local_jcc(Assembler::notEqual, *L_failure);
+      final_jmp(*L_slow_path);
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_success);
+    } else {
+      local_jcc(Assembler::notEqual, *L_slow_path);
+      final_jmp(*L_success);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_success);
+    } else {
+      local_jcc(Assembler::notEqual, *L_failure);
+      final_jmp(*L_success);
+    }
+  }
+
+  bind(L_fallthrough);
+
+#undef local_jcc
+#undef final_jmp
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   bool set_cond_codes) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  if (temp2_reg != noreg)
+    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
+#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_supers_offset_in_bytes());
+  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                   Klass::secondary_super_cache_offset_in_bytes());
+  Address secondary_supers_addr(sub_klass, ss_offset);
+  Address super_cache_addr(     sub_klass, sc_offset);
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+  // The repne_scan instruction uses fixed registers, which we must spill.
+  // Don't worry too much about pre-existing connections with the input regs.
+
+  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
+  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
+
+  // Get super_klass value into rax (even if it was in rdi or rcx).
+  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
+  if (super_klass != rax || UseCompressedOops) {
+    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
+    mov(rax, super_klass);
+  }
+  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
+  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  ExternalAddress pst_counter_addr((address) pst_counter);
+  NOT_LP64(  incrementl(pst_counter_addr) );
+  LP64_ONLY( lea(rcx, pst_counter_addr) );
+  LP64_ONLY( incrementl(Address(rcx, 0)) );
+#endif //PRODUCT
+
+  // We will consult the secondary-super array.
+  movptr(rdi, secondary_supers_addr);
+  // Load the array length.  (Positive movl does right thing on LP64.)
+  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
+  // Skip to start of data.
+  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+  // Scan RCX words at [RDI] for an occurrence of RAX.
+  // Set NZ/Z based on last compare.
+#ifdef _LP64
+  // This part is tricky, as values in supers array could be 32 or 64 bit wide
+  // and we store values in objArrays always encoded, thus we need to encode
+  // the value of rax before repne.  Note that rax is dead after the repne.
+  if (UseCompressedOops) {
+    encode_heap_oop_not_null(rax);
+    // The superclass is never null; it would be a basic system error if a null
+    // pointer were to sneak in here.  Note that we have already loaded the
+    // Klass::super_check_offset from the super_klass in the fast path,
+    // so if there is a null in that register, we are already in the afterlife.
+    repne_scanl();
+  } else
+#endif // _LP64
+    repne_scan();
+
+  // Unspill the temp. registers:
+  if (pushed_rdi)  pop(rdi);
+  if (pushed_rcx)  pop(rcx);
+  if (pushed_rax)  pop(rax);
+
+  if (set_cond_codes) {
+    // Special hack for the AD files:  rdi is guaranteed non-zero.
+    assert(!pushed_rdi, "rdi must be left non-NULL");
+    // Also, the condition codes are properly set Z/NZ on succeed/failure.
+  }
+
+  if (L_failure == &L_fallthrough)
+        jccb(Assembler::notEqual, *L_failure);
+  else  jcc(Assembler::notEqual, *L_failure);
+
+  // Success.  Cache the super we found and proceed in triumph.
+  movptr(super_cache_addr, super_klass);
+
+  if (L_success != &L_fallthrough) {
+    jmp(*L_success);
+  }
+
+#undef IS_A_TEMP
+
+  bind(L_fallthrough);
+}
+
+
 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
   ucomisd(dst, as_Address(src));
 }
@@ -7710,14 +7998,21 @@
 void MacroAssembler::load_prototype_header(Register dst, Register src) {
 #ifdef _LP64
   if (UseCompressedOops) {
+    assert (Universe::heap() != NULL, "java heap should be initialized");
     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    if (Universe::narrow_oop_shift() != 0) {
+      assert(Address::times_8 == LogMinObjAlignmentInBytes &&
+             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
+      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    } else {
+      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    }
   } else
 #endif
-    {
-      movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-      movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
-    }
+  {
+    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  }
 }
 
 void MacroAssembler::store_klass(Register dst, Register src) {
@@ -7760,11 +8055,20 @@
 // Algorithm must match oop.inline.hpp encode_heap_oop.
 void MacroAssembler::encode_heap_oop(Register r) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  if (Universe::narrow_oop_base() == NULL) {
+    verify_oop(r, "broken oop in encode_heap_oop");
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shrq(r, LogMinObjAlignmentInBytes);
+    }
+    return;
+  }
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
     push(rscratch1); // cmpptr trashes rscratch1
-    cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
+    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
     jcc(Assembler::equal, ok);
     stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
     bind(ok);
@@ -7780,6 +8084,7 @@
 
 void MacroAssembler::encode_heap_oop_not_null(Register r) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
@@ -7790,12 +8095,18 @@
   }
 #endif
   verify_oop(r, "broken oop in encode_heap_oop_not_null");
-  subq(r, r12_heapbase);
-  shrq(r, LogMinObjAlignmentInBytes);
+  if (Universe::narrow_oop_base() != NULL) {
+    subq(r, r12_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shrq(r, LogMinObjAlignmentInBytes);
+  }
 }
 
 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
@@ -7809,18 +8120,32 @@
   if (dst != src) {
     movq(dst, src);
   }
-  subq(dst, r12_heapbase);
-  shrq(dst, LogMinObjAlignmentInBytes);
+  if (Universe::narrow_oop_base() != NULL) {
+    subq(dst, r12_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shrq(dst, LogMinObjAlignmentInBytes);
+  }
 }
 
 void  MacroAssembler::decode_heap_oop(Register r) {
   assert (UseCompressedOops, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shlq(r, LogMinObjAlignmentInBytes);
+    }
+    verify_oop(r, "broken oop in decode_heap_oop");
+    return;
+  }
 #ifdef ASSERT
   if (CheckCompressedOops) {
     Label ok;
     push(rscratch1);
     cmpptr(r12_heapbase,
-           ExternalAddress((address)Universe::heap_base_addr()));
+           ExternalAddress((address)Universe::narrow_oop_base_addr()));
     jcc(Assembler::equal, ok);
     stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
     bind(ok);
@@ -7844,32 +8169,76 @@
 
 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
   assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
-  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
-  leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shlq(r, LogMinObjAlignmentInBytes);
+    }
+  } else {
+      assert (Address::times_8 == LogMinObjAlignmentInBytes &&
+              Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
+    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+  }
 }
 
 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
   assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
-  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
-  leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
+            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
+    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+  } else if (dst != src) {
+    movq(dst, src);
+  }
 }
 
 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int oop_index = oop_recorder()->find_index(obj);
   RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
+  mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
 }
 
 void MacroAssembler::reinit_heapbase() {
   if (UseCompressedOops) {
-    movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
+    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
   }
 }
 #endif // _LP64
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -578,20 +578,25 @@
 
   // These are all easily abused and hence protected
 
-  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0);
-
   // 32BIT ONLY SECTION
 #ifndef _LP64
   // Make these disappear in 64bit mode since they would never be correct
   void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
   void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
 
+  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
   void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
 
   void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
 #else
   // 64BIT ONLY SECTION
   void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
+
+  void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
+  void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
+
+  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
+  void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
 #endif // _LP64
 
   // These are unique in that we are ensured by the caller that the 32bit
@@ -1219,6 +1224,14 @@
   void popq(Address dst);
 #endif
 
+  void popcntl(Register dst, Address src);
+  void popcntl(Register dst, Register src);
+
+#ifdef _LP64
+  void popcntq(Register dst, Address src);
+  void popcntq(Register dst, Register src);
+#endif
+
   // Prefetches (SSE, SSE2, 3DNOW only)
 
   void prefetchnta(Address src);
@@ -1647,6 +1660,9 @@
   void decode_heap_oop_not_null(Register dst, Register src);
 
   void set_narrow_oop(Register dst, jobject obj);
+  void set_narrow_oop(Address dst, jobject obj);
+  void cmp_narrow_oop(Register dst, jobject obj);
+  void cmp_narrow_oop(Address dst, jobject obj);
 
   // if heap base register is used - reinit it with the correct value
   void reinit_heapbase();
@@ -1791,6 +1807,40 @@
                                Register scan_temp,
                                Label& no_such_interface);
 
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterConstant super_check_offset = RegisterConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg and temp2_reg can be noreg, if no temps are available.
+  // Updates the sub's secondary super cache as necessary.
+  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     bool set_cond_codes = false);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success);
+
   //----
   void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
 
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1598,18 +1598,9 @@
 
     // get instance klass
     __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
-    // get super_check_offset
-    __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
-    // See if we get an immediate positive hit
-    __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
-    __ jcc(Assembler::equal, done);
-    // check for immediate negative hit
-    __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-    __ jcc(Assembler::notEqual, *stub->entry());
-    // check for self
-    __ cmpptr(klass_RInfo, k_RInfo);
-    __ jcc(Assembler::equal, done);
-
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
+    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
     __ push(klass_RInfo);
     __ push(k_RInfo);
     __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@@ -1735,17 +1726,9 @@
         }
         __ bind(done);
       } else {
-        __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
-        // See if we get an immediate positive hit
-        __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
-        __ jcc(Assembler::equal, done);
-        // check for immediate negative hit
-        __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ jcc(Assembler::notEqual, *stub->entry());
-        // check for self
-        __ cmpptr(klass_RInfo, k_RInfo);
-        __ jcc(Assembler::equal, done);
-
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
         __ push(klass_RInfo);
         __ push(k_RInfo);
         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@@ -1821,23 +1804,15 @@
           __ pop(dst);
           __ jmp(done);
         }
-      } else {
-#else
-      { // YUCK
+      }
+        else // next block is unconditional if LP64:
 #endif // LP64
+      {
         assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
 
-        __ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
-        // See if we get an immediate positive hit
-        __ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1));
-        __ jcc(Assembler::equal, one);
-        // check for immediate negative hit
-        __ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-        __ jcc(Assembler::notEqual, zero);
-        // check for self
-        __ cmpptr(klass_RInfo, k_RInfo);
-        __ jcc(Assembler::equal, one);
-
+        // perform the fast part of the checking logic
+        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL);
+        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
         __ push(klass_RInfo);
         __ push(k_RInfo);
         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1354,6 +1354,13 @@
 
     case slow_subtype_check_id:
       {
+        // Typical calling sequence:
+        // __ push(klass_RInfo);  // object klass or other subclass
+        // __ push(sup_k_RInfo);  // array element klass or other superclass
+        // __ call(slow_subtype_check);
+        // Note that the subclass is pushed first, and is therefore deepest.
+        // Previous versions of this code reversed the names 'sub' and 'super'.
+        // This was operationally harmless but made the code unreadable.
         enum layout {
           rax_off, SLOT2(raxH_off)
           rcx_off, SLOT2(rcxH_off)
@@ -1361,9 +1368,10 @@
           rdi_off, SLOT2(rdiH_off)
           // saved_rbp_off, SLOT2(saved_rbpH_off)
           return_off, SLOT2(returnH_off)
-          sub_off, SLOT2(subH_off)
-          super_off, SLOT2(superH_off)
-          framesize
+          sup_k_off, SLOT2(sup_kH_off)
+          klass_off, SLOT2(superH_off)
+          framesize,
+          result_off = klass_off  // deepest argument is also the return value
         };
 
         __ set_info("slow_subtype_check", dont_gc_arguments);
@@ -1373,19 +1381,14 @@
         __ push(rax);
 
         // This is called by pushing args and not with C abi
-        __ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super
-        __ movptr(rax, Address(rsp, (sub_off  ) * VMRegImpl::stack_slot_size)); // sub
-
-        __ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
-        // since size is postive movl does right thing on 64bit
-        __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-        __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+        __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
+        __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
 
         Label miss;
-        __ repne_scan();
-        __ jcc(Assembler::notEqual, miss);
-        __ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax);
-        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result
+        __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss);
+
+        // fallthrough on success:
+        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result
         __ pop(rax);
         __ pop(rcx);
         __ pop(rsi);
@@ -1393,7 +1396,7 @@
         __ ret(0);
 
         __ bind(miss);
-        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
+        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
         __ pop(rax);
         __ pop(rcx);
         __ pop(rsi);
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -219,47 +219,16 @@
   // Resets EDI to locals.  Register sub_klass cannot be any of the above.
 void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) {
   assert( Rsub_klass != rax, "rax, holds superklass" );
-  assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" );
-  assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" );
-  Label not_subtype, loop;
+  assert( Rsub_klass != rcx, "used as a temp" );
+  assert( Rsub_klass != rdi, "used as a temp, restored from locals" );
 
   // Profile the not-null value's klass.
-  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
-
-  // Load the super-klass's check offset into ECX
-  movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) );
-  // Load from the sub-klass's super-class display list, or a 1-word cache of
-  // the secondary superclass list, or a failing value with a sentinel offset
-  // if the super-klass is an interface or exceptionally deep in the Java
-  // hierarchy and we have to scan the secondary superclass list the hard way.
-  // See if we get an immediate positive hit
-  cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) );
-  jcc( Assembler::equal,ok_is_subtype );
+  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
 
-  // Check for immediate negative hit
-  cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
-  jcc( Assembler::notEqual, not_subtype );
-  // Check for self
-  cmpptr( Rsub_klass, rax );
-  jcc( Assembler::equal, ok_is_subtype );
+  // Do the check.
+  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
 
-  // Now do a linear scan of the secondary super-klass chain.
-  movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) );
-  // EDI holds the objArrayOop of secondary supers.
-  movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length
-  // Skip to start of data; also clear Z flag incase ECX is zero
-  addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) );
-  // Scan ECX words at [EDI] for occurance of EAX
-  // Set NZ/Z based on last compare
-  repne_scan();
-  restore_locals();           // Restore EDI; Must not blow flags
-  // Not equal?
-  jcc( Assembler::notEqual, not_subtype );
-  // Must be equal but missed in cache.  Update cache.
-  movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax );
-  jmp( ok_is_subtype );
-
-  bind(not_subtype);
+  // Profile the failure of the check.
   profile_typecheck_failed(rcx); // blows rcx
 }
 
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -232,65 +232,13 @@
   assert(Rsub_klass != rcx, "rcx holds 2ndary super array length");
   assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr");
 
-  Label not_subtype, not_subtype_pop, loop;
-
   // Profile the not-null value's klass.
-  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
-
-  // Load the super-klass's check offset into rcx
-  movl(rcx, Address(rax, sizeof(oopDesc) +
-                    Klass::super_check_offset_offset_in_bytes()));
-  // Load from the sub-klass's super-class display list, or a 1-word
-  // cache of the secondary superclass list, or a failing value with a
-  // sentinel offset if the super-klass is an interface or
-  // exceptionally deep in the Java hierarchy and we have to scan the
-  // secondary superclass list the hard way.  See if we get an
-  // immediate positive hit
-  cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1));
-  jcc(Assembler::equal,ok_is_subtype);
-
-  // Check for immediate negative hit
-  cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
-  jcc( Assembler::notEqual, not_subtype );
-  // Check for self
-  cmpptr(Rsub_klass, rax);
-  jcc(Assembler::equal, ok_is_subtype);
+  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
 
-  // Now do a linear scan of the secondary super-klass chain.
-  movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) +
-                      Klass::secondary_supers_offset_in_bytes()));
-  // rdi holds the objArrayOop of secondary supers.
-  // Load the array length
-  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-  // Skip to start of data; also clear Z flag incase rcx is zero
-  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  // Scan rcx words at [rdi] for occurance of rax
-  // Set NZ/Z based on last compare
+  // Do the check.
+  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
 
-  // this part is kind tricky, as values in supers array could be 32 or 64 bit wide
-  // and we store values in objArrays always encoded, thus we need to encode value
-  // before repne
-  if (UseCompressedOops) {
-    push(rax);
-    encode_heap_oop(rax);
-    repne_scanl();
-    // Not equal?
-    jcc(Assembler::notEqual, not_subtype_pop);
-    // restore heap oop here for movq
-    pop(rax);
-  } else {
-    repne_scan();
-    jcc(Assembler::notEqual, not_subtype);
-  }
-  // Must be equal but missed in cache.  Update cache.
-  movptr(Address(Rsub_klass, sizeof(oopDesc) +
-               Klass::secondary_super_cache_offset_in_bytes()), rax);
-  jmp(ok_is_subtype);
-
-  bind(not_subtype_pop);
-  // restore heap oop here for miss
-  if (UseCompressedOops) pop(rax);
-  bind(not_subtype);
+  // Profile the failure of the check.
   profile_typecheck_failed(rcx); // blows rcx
 }
 
--- a/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -349,7 +349,7 @@
 
     if (_num_args < Argument::n_float_register_parameters_c-1) {
       *_reg_args++ = from_obj;
-      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
+      *_fp_identifiers |= (intptr_t)(0x01 << (_num_args*2)); // mark as float
       _num_args++;
     } else {
       *_to++ = from_obj;
@@ -364,7 +364,7 @@
 
     if (_num_args < Argument::n_float_register_parameters_c-1) {
       *_reg_args++ = from_obj;
-      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
+      *_fp_identifiers |= (intptr_t)(0x3 << (_num_args*2)); // mark as double
       _num_args++;
     } else {
       *_to++ = from_obj;
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1310,81 +1310,51 @@
                            Address& super_check_offset_addr,
                            Address& super_klass_addr,
                            Register temp,
-                           Label* L_success_ptr, Label* L_failure_ptr) {
+                           Label* L_success, Label* L_failure) {
     BLOCK_COMMENT("type_check:");
 
     Label L_fallthrough;
-    bool fall_through_on_success = (L_success_ptr == NULL);
-    if (fall_through_on_success) {
-      L_success_ptr = &L_fallthrough;
-    } else {
-      L_failure_ptr = &L_fallthrough;
-    }
-    Label& L_success = *L_success_ptr;
-    Label& L_failure = *L_failure_ptr;
+#define LOCAL_JCC(assembler_con, label_ptr)                             \
+    if (label_ptr != NULL)  __ jcc(assembler_con, *(label_ptr));        \
+    else                    __ jcc(assembler_con, L_fallthrough) /*omit semi*/
 
+    // The following is a strange variation of the fast path which requires
+    // one less register, because needed values are on the argument stack.
+    // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
+    //                                  L_success, L_failure, NULL);
     assert_different_registers(sub_klass, temp);
 
-    // a couple of useful fields in sub_klass:
-    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_supers_offset_in_bytes());
     int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
                      Klass::secondary_super_cache_offset_in_bytes());
-    Address secondary_supers_addr(sub_klass, ss_offset);
-    Address super_cache_addr(     sub_klass, sc_offset);
 
     // if the pointers are equal, we are done (e.g., String[] elements)
     __ cmpptr(sub_klass, super_klass_addr);
-    __ jcc(Assembler::equal, L_success);
+    LOCAL_JCC(Assembler::equal, L_success);
 
     // check the supertype display:
     __ movl2ptr(temp, super_check_offset_addr);
     Address super_check_addr(sub_klass, temp, Address::times_1, 0);
     __ movptr(temp, super_check_addr); // load displayed supertype
     __ cmpptr(temp, super_klass_addr); // test the super type
-    __ jcc(Assembler::equal, L_success);
+    LOCAL_JCC(Assembler::equal, L_success);
 
     // if it was a primary super, we can just fail immediately
     __ cmpl(super_check_offset_addr, sc_offset);
-    __ jcc(Assembler::notEqual, L_failure);
-
-    // Now do a linear scan of the secondary super-klass chain.
-    // This code is rarely used, so simplicity is a virtue here.
-    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
-    {
-      // The repne_scan instruction uses fixed registers, which we must spill.
-      // (We need a couple more temps in any case.)
-      __ push(rax);
-      __ push(rcx);
-      __ push(rdi);
-      assert_different_registers(sub_klass, rax, rcx, rdi);
+    LOCAL_JCC(Assembler::notEqual, L_failure);
 
-      __ movptr(rdi, secondary_supers_addr);
-      // Load the array length.
-      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-      // Skip to start of data.
-      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-      // Scan rcx words at [edi] for occurance of rax,
-      // Set NZ/Z based on last compare
-      __ movptr(rax, super_klass_addr);
-      __ repne_scan();
+    // The repne_scan instruction uses fixed registers, which will get spilled.
+    // We happen to know this works best when super_klass is in rax.
+    Register super_klass = temp;
+    __ movptr(super_klass, super_klass_addr);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
+                                     L_success, L_failure);
 
-      // Unspill the temp. registers:
-      __ pop(rdi);
-      __ pop(rcx);
-      __ pop(rax);
-    }
-    __ jcc(Assembler::notEqual, L_failure);
+    __ bind(L_fallthrough);
 
-    // Success.  Cache the super we found and proceed in triumph.
-    __ movptr(temp, super_klass_addr); // note: rax, is dead
-    __ movptr(super_cache_addr, temp);
+    if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
+    if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
 
-    if (!fall_through_on_success)
-      __ jmp(L_success);
-
-    // Fall through on failure!
-    __ bind(L_fallthrough);
+#undef LOCAL_JCC
   }
 
   //
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2091,66 +2091,9 @@
 
     Label L_miss;
 
-    // a couple of useful fields in sub_klass:
-    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_supers_offset_in_bytes());
-    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_super_cache_offset_in_bytes());
-    Address secondary_supers_addr(sub_klass, ss_offset);
-    Address super_cache_addr(     sub_klass, sc_offset);
-
-    // if the pointers are equal, we are done (e.g., String[] elements)
-    __ cmpptr(super_klass, sub_klass);
-    __ jcc(Assembler::equal, L_success);
-
-    // check the supertype display:
-    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
-    __ cmpptr(super_klass, super_check_addr); // test the super type
-    __ jcc(Assembler::equal, L_success);
-
-    // if it was a primary super, we can just fail immediately
-    __ cmpl(super_check_offset, sc_offset);
-    __ jcc(Assembler::notEqual, L_miss);
-
-    // Now do a linear scan of the secondary super-klass chain.
-    // The repne_scan instruction uses fixed registers, which we must spill.
-    // (We need a couple more temps in any case.)
-    // This code is rarely used, so simplicity is a virtue here.
-    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
-    {
-      __ push(rax);
-      __ push(rcx);
-      __ push(rdi);
-      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
-
-      __ movptr(rdi, secondary_supers_addr);
-      // Load the array length.
-      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-      // Skip to start of data.
-      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-      // Scan rcx words at [rdi] for occurance of rax
-      // Set NZ/Z based on last compare
-      __ movptr(rax, super_klass);
-      if (UseCompressedOops) {
-        // Compare against compressed form.  Don't need to uncompress because
-        // looks like orig rax is restored in popq below.
-        __ encode_heap_oop(rax);
-        __ repne_scanl();
-      } else {
-        __ repne_scan();
-      }
-
-      // Unspill the temp. registers:
-      __ pop(rdi);
-      __ pop(rcx);
-      __ pop(rax);
-
-      __ jcc(Assembler::notEqual, L_miss);
-    }
-
-    // Success.  Cache the super we found and proceed in triumph.
-    __ movptr(super_cache_addr, super_klass); // note: rax is dead
-    __ jmp(L_success);
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
+                                     super_check_offset);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
 
     // Fall through on failure!
     __ BIND(L_miss);
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -284,7 +284,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -297,6 +297,7 @@
                (supports_ssse3()? ", ssse3": ""),
                (supports_sse4_1() ? ", sse4.1" : ""),
                (supports_sse4_2() ? ", sse4.2" : ""),
+               (supports_popcnt() ? ", popcnt" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow()   ? ", 3dnow"  : ""),
                (supports_3dnow2()  ? ", 3dnowext" : ""),
@@ -410,6 +411,13 @@
     }
   }
 
+  // Use population count instruction if available.
+  if (supports_popcnt()) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      UsePopCountInstruction = true;
+    }
+  }
+
   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -70,7 +70,9 @@
                dca      : 1,
                sse4_1   : 1,
                sse4_2   : 1,
-                        : 11;
+                        : 2,
+               popcnt   : 1,
+                        : 8;
     } bits;
   };
 
@@ -179,7 +181,8 @@
      CPU_SSSE3  = (1 << 9),
      CPU_SSE4A  = (1 << 10),
      CPU_SSE4_1 = (1 << 11),
-     CPU_SSE4_2 = (1 << 12)
+     CPU_SSE4_2 = (1 << 12),
+     CPU_POPCNT = (1 << 13)
    } cpuFeatureFlags;
 
   // cpuid information block.  All info derived from executing cpuid with
@@ -290,6 +293,8 @@
       result |= CPU_SSE4_1;
     if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
       result |= CPU_SSE4_2;
+    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
+      result |= CPU_POPCNT;
     return result;
   }
 
@@ -379,6 +384,7 @@
   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
+  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
   //
   // AMD features
   //
--- a/src/cpu/x86/vm/x86_32.ad	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/x86_32.ad	Thu Mar 19 09:13:24 2009 -0700
@@ -1483,16 +1483,20 @@
   // main source block for now.  In future, we can generalize this by
   // adding a syntax that specifies the sizes of fields in an order,
   // so that the adlc can build the emit functions automagically
-  enc_class OpcP %{             // Emit opcode
-    emit_opcode(cbuf,$primary);
-  %}
-
-  enc_class OpcS %{             // Emit opcode
-    emit_opcode(cbuf,$secondary);
-  %}
-
-  enc_class Opcode(immI d8 ) %{ // Emit opcode
-    emit_opcode(cbuf,$d8$$constant);
+
+  // Emit primary opcode
+  enc_class OpcP %{
+    emit_opcode(cbuf, $primary);
+  %}
+
+  // Emit secondary opcode
+  enc_class OpcS %{
+    emit_opcode(cbuf, $secondary);
+  %}
+
+  // Emit opcode directly
+  enc_class Opcode(immI d8) %{
+    emit_opcode(cbuf, $d8$$constant);
   %}
 
   enc_class SizePrefix %{
@@ -1688,26 +1692,15 @@
     Register Reax = as_Register(EAX_enc); // super class
     Register Recx = as_Register(ECX_enc); // killed
     Register Resi = as_Register(ESI_enc); // sub class
-    Label hit, miss;
+    Label miss;
 
     MacroAssembler _masm(&cbuf);
-    // Compare super with sub directly, since super is not in its own SSA.
-    // The compiler used to emit this test, but we fold it in here,
-    // to allow platform-specific tweaking on sparc.
-    __ cmpptr(Reax, Resi);
-    __ jcc(Assembler::equal, hit);
-#ifndef PRODUCT
-    __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
-#endif //PRODUCT
-    __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
-    __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
-    __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-    __ repne_scan();
-    __ jcc(Assembler::notEqual, miss);
-    __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
-    __ bind(hit);
-    if( $primary )
-      __ xorptr(Redi,Redi);
+    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
+                                     NULL, &miss,
+                                     /*set_cond_codes:*/ true);
+    if ($primary) {
+      __ xorptr(Redi, Redi);
+    }
     __ bind(miss);
   %}
 
@@ -6387,6 +6380,67 @@
 %}
 
 
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(eRegI dst, eRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "POPCNT $dst, $src" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct popCountI_mem(eRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+
+  format %{ "POPCNT $dst, $mem" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+  effect(KILL cr, TEMP tmp, TEMP dst);
+
+  format %{ "POPCNT $dst, $src.lo\n\t"
+            "POPCNT $tmp, $src.hi\n\t"
+            "ADD    $dst, $tmp" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
+    __ addl($dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+  effect(KILL cr, TEMP tmp, TEMP dst);
+
+  format %{ "POPCNT $dst, $mem\n\t"
+            "POPCNT $tmp, $mem+4\n\t"
+            "ADD    $dst, $tmp" %}
+  ins_encode %{
+    //__ popcntl($dst$$Register, $mem$$Address$$first);
+    //__ popcntl($tmp$$Register, $mem$$Address$$second);
+    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
+    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
+    __ addl($dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 //----------Load/Store/Move Instructions---------------------------------------
 //----------Load Instructions--------------------------------------------------
 // Load Byte (8bit signed)
@@ -12501,15 +12555,12 @@
   effect( KILL rcx, KILL cr );
 
   ins_cost(1100);  // slightly larger than the next version
-  format %{ "CMPL   EAX,ESI\n\t"
-            "JEQ,s  hit\n\t"
-            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
+  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
-     "hit:\n\t"
             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
      "miss:\t" %}
 
@@ -12523,9 +12574,7 @@
   effect( KILL rcx, KILL result );
 
   ins_cost(1000);
-  format %{ "CMPL   EAX,ESI\n\t"
-            "JEQ,s  miss\t# Actually a hit; we are done.\n\t"
-            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
+  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
--- a/src/cpu/x86/vm/x86_64.ad	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/x86_64.ad	Thu Mar 19 09:13:24 2009 -0700
@@ -326,7 +326,6 @@
                          R9,  R9_H,
                          R10, R10_H,
                          R11, R11_H,
-                         R12, R12_H,
                          R13, R13_H,
                          R14, R14_H);
 
@@ -340,7 +339,6 @@
                          R9,  R9_H,
                          R10, R10_H,
                          R11, R11_H,
-                         R12, R12_H,
                          R13, R13_H,
                          R14, R14_H);
 
@@ -354,7 +352,6 @@
                              R9,  R9_H,
                              R10, R10_H,
                              R11, R11_H,
-                             R12, R12_H,
                              R13, R13_H,
                              R14, R14_H);
 
@@ -444,9 +441,6 @@
 // Singleton class for RDX long register
 reg_class long_rdx_reg(RDX, RDX_H);
 
-// Singleton class for R12 long register
-reg_class long_r12_reg(R12, R12_H);
-
 // Class for all int registers (except RSP)
 reg_class int_reg(RAX,
                   RDX,
@@ -1842,7 +1836,9 @@
 {
   if (UseCompressedOops) {
     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
-    st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
+    if (Universe::narrow_oop_shift() != 0) {
+      st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
+    }
     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
   } else {
     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
@@ -1891,7 +1887,11 @@
 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 {
   if (UseCompressedOops) {
-    return OptoBreakpoint ? 19 : 20;
+    if (Universe::narrow_oop_shift() == 0) {
+      return OptoBreakpoint ? 15 : 16;
+    } else {
+      return OptoBreakpoint ? 19 : 20;
+    }
   } else {
     return OptoBreakpoint ? 11 : 12;
   }
@@ -2575,45 +2575,13 @@
     Register Rrax = as_Register(RAX_enc); // super class
     Register Rrcx = as_Register(RCX_enc); // killed
     Register Rrsi = as_Register(RSI_enc); // sub class
-    Label hit, miss, cmiss;
+    Label miss;
+    const bool set_cond_codes = true;
 
     MacroAssembler _masm(&cbuf);
-    // Compare super with sub directly, since super is not in its own SSA.
-    // The compiler used to emit this test, but we fold it in here,
-    // to allow platform-specific tweaking on sparc.
-    __ cmpptr(Rrax, Rrsi);
-    __ jcc(Assembler::equal, hit);
-#ifndef PRODUCT
-    __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
-    __ incrementl(Address(Rrcx, 0));
-#endif //PRODUCT
-    __ movptr(Rrdi, Address(Rrsi, 
-                          sizeof(oopDesc) + 
-                          Klass::secondary_supers_offset_in_bytes()));
-    __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
-    __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-    if (UseCompressedOops) {
-      __ encode_heap_oop(Rrax);
-      __ repne_scanl();
-      __ jcc(Assembler::notEqual, cmiss);
-      __ decode_heap_oop(Rrax);
-      __ movptr(Address(Rrsi,
-                      sizeof(oopDesc) +
-                      Klass::secondary_super_cache_offset_in_bytes()),
-              Rrax);
-      __ jmp(hit);
-      __ bind(cmiss);
-      __ decode_heap_oop(Rrax);
-      __ jmp(miss);
-    } else {
-      __ repne_scan();
-      __ jcc(Assembler::notEqual, miss);
-      __ movptr(Address(Rrsi,
-                      sizeof(oopDesc) +
-                      Klass::secondary_super_cache_offset_in_bytes()),
-              Rrax);
-    }
-    __ bind(hit);
+    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
+                                     NULL, &miss,
+                                     /*set_cond_codes:*/ true);
     if ($primary) {
       __ xorptr(Rrdi, Rrdi);
     }
@@ -4906,15 +4874,6 @@
   interface(REG_INTER);
 %}
 
-
-operand r12RegL() %{
-  constraint(ALLOC_IN_RC(long_r12_reg));
-  match(RegL);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
 operand rRegN() %{
   constraint(ALLOC_IN_RC(int_reg));
   match(RegN);
@@ -5289,21 +5248,6 @@
   %}
 %}
 
-// Indirect Narrow Oop Plus Offset Operand
-operand indNarrowOopOffset(rRegN src, immL32 off) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN src) off);
-
-  op_cost(10);
-  format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
-  interface(MEMORY_INTER) %{
-    base(0xc); // R12
-    index($src);
-    scale(0x3);
-    disp($off);
-  %}
-%}
-
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -5321,6 +5265,158 @@
   %}
 %}
 
+// Indirect Narrow Oop Plus Offset Operand
+// Note: x86 architecture doesn't support "scale * index + offset" without a base
+// we can't free r12 even with Universe::narrow_oop_base() == NULL.
+operand indCompressedOopOffset(rRegN reg, immL32 off) %{
+  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  op_cost(10);
+  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
+  interface(MEMORY_INTER) %{
+    base(0xc); // R12
+    index($reg);
+    scale(0x3);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Operand
+operand indirectNarrow(rRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset8Narrow(rRegN reg, immL8 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (8-bit)]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Long Offset Operand
+operand indOffset32Narrow(rRegN reg, immL32 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (32-bit)]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) lreg) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexNarrow(rRegN reg, rRegL lreg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) lreg);
+
+  op_cost(10);
+  format %{"[$reg + $lreg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL lreg scale));
+
+  op_cost(10);
+  format %{"[$reg + $lreg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
+operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
+operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+
 //----------Special Memory Operands--------------------------------------------
 // Stack Slot Operand - This operand is used for loading and storing temporary
 //                      values on the stack where a match requires a value to
@@ -5488,7 +5584,10 @@
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
-               indNarrowOopOffset);
+               indCompressedOopOffset,
+               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
+               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
+               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@@ -6234,9 +6333,7 @@
    ins_cost(125); // XXX
    format %{ "movl    $dst, $mem\t# compressed ptr" %}
    ins_encode %{
-     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-     Register dst = as_Register($dst$$reg);
-     __ movl(dst, addr);
+     __ movl($dst$$Register, $mem$$Address);
    %}
    ins_pipe(ialu_reg_mem); // XXX
 %}
@@ -6262,9 +6359,7 @@
   ins_cost(125); // XXX
   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    Register dst = as_Register($dst$$reg);
-    __ movl(dst, addr);
+    __ movl($dst$$Register, $mem$$Address);
   %}
   ins_pipe(ialu_reg_mem); // XXX
 %}
@@ -6418,6 +6513,102 @@
   ins_pipe(ialu_reg_reg_fat);
 %}
 
+instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+// Load Effective Address which uses Narrow (32-bits) oop
+instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110); // XXX
+  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct loadConI(rRegI dst, immI src)
 %{
   match(Set dst src);
@@ -6528,8 +6719,7 @@
   effect(KILL cr);
   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
   ins_encode %{
-    Register dst = $dst$$Register;
-    __ xorq(dst, dst);
+    __ xorq($dst$$Register, $dst$$Register);
   %}
   ins_pipe(ialu_reg);
 %}
@@ -6541,11 +6731,10 @@
   format %{ "movl    $dst, $src\t# compressed ptr" %}
   ins_encode %{
     address con = (address)$src$$constant;
-    Register dst = $dst$$Register;
     if (con == NULL) {
       ShouldNotReachHere();
     } else {
-      __ set_narrow_oop(dst, (jobject)$src$$constant);
+      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
     }
   %}
   ins_pipe(ialu_reg_fat); // XXX
@@ -6794,12 +6983,25 @@
   ins_pipe(ialu_mem_reg);
 %}
 
+instruct storeImmP0(memory mem, immP0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreP mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 // Store NULL Pointer, mark word, or other simple pointer constant.
 instruct storeImmP(memory mem, immP31 src)
 %{
   match(Set mem (StoreP mem src));
 
-  ins_cost(125); // XXX
+  ins_cost(150); // XXX
   format %{ "movq    $mem, $src\t# ptr" %}
   opcode(0xC7); /* C7 /0 */
   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
@@ -6814,14 +7016,55 @@
   ins_cost(125); // XXX
   format %{ "movl    $mem, $src\t# compressed ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    Register src = as_Register($src$$reg);
-    __ movl(addr, src);
+    __ movl($mem$$Address, $src$$Register);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
+instruct storeImmN0(memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() == NULL);
+  match(Set mem (StoreN mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
   %}
   ins_pipe(ialu_mem_reg);
 %}
 
+instruct storeImmN(memory mem, immN src)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(150); // XXX
+  format %{ "movl    $mem, $src\t# compressed ptr" %}
+  ins_encode %{
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      __ movl($mem$$Address, (int32_t)0);
+    } else {
+      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
+    }
+  %}
+  ins_pipe(ialu_mem_imm);
+%}
+
 // Store Integer Immediate
+instruct storeImmI0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreI mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmI(memory mem, immI src)
 %{
   match(Set mem (StoreI mem src));
@@ -6834,6 +7077,19 @@
 %}
 
 // Store Long Immediate
+instruct storeImmL0(memory mem, immL0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreL mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmL(memory mem, immL32 src)
 %{
   match(Set mem (StoreL mem src));
@@ -6846,6 +7102,19 @@
 %}
 
 // Store Short/Char Immediate
+instruct storeImmC0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreC mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movw($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmI16(memory mem, immI16 src)
 %{
   predicate(UseStoreImmI16);
@@ -6859,6 +7128,19 @@
 %}
 
 // Store Byte Immediate
+instruct storeImmB0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreB mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movb($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmB(memory mem, immI8 src)
 %{
   match(Set mem (StoreB mem src));
@@ -6898,6 +7180,19 @@
 %}
 
 // Store CMS card-mark Immediate
+instruct storeImmCM0_reg(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movb($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmCM0(memory mem, immI0 src)
 %{
   match(Set mem (StoreCM mem src));
@@ -6931,6 +7226,19 @@
 %}
 
 // Store immediate Float value (it is faster than store from XMM register)
+instruct storeF0(memory mem, immF0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreF mem zero));
+
+  ins_cost(25); // XXX
+  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeF_imm(memory mem, immF src)
 %{
   match(Set mem (StoreF mem src));
@@ -6957,6 +7265,7 @@
 // Store immediate double 0.0 (it is faster than store from XMM register)
 instruct storeD0_imm(memory mem, immD0 src)
 %{
+  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
   match(Set mem (StoreD mem src));
 
   ins_cost(50);
@@ -6966,6 +7275,19 @@
   ins_pipe(ialu_mem_imm);
 %}
 
+instruct storeD0(memory mem, immD0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreD mem zero));
+
+  ins_cost(25); // XXX
+  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeSSI(stackSlotI dst, rRegI src)
 %{
   match(Set dst src);
@@ -7077,6 +7399,56 @@
   ins_pipe( ialu_mem_reg );
 %}
 
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(rRegI dst, rRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct popCountI_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(rRegI dst, rRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 //----------MemBar Instructions-----------------------------------------------
 // Memory barrier flavors
 
@@ -7192,9 +7564,7 @@
   effect(KILL cr);
   format %{ "encode_heap_oop_not_null $dst,$src" %}
   ins_encode %{
-    Register s = $src$$Register;
-    Register d = $dst$$Register;
-    __ encode_heap_oop_not_null(d, s);
+    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
   %}
   ins_pipe(ialu_reg_long);
 %}
@@ -7224,7 +7594,11 @@
   ins_encode %{
     Register s = $src$$Register;
     Register d = $dst$$Register;
-    __ decode_heap_oop_not_null(d, s);
+    if (s != d) {
+      __ decode_heap_oop_not_null(d, s);
+    } else {
+      __ decode_heap_oop_not_null(d);
+    }
   %}
   ins_pipe(ialu_reg_long);
 %}
@@ -11389,8 +11763,9 @@
 
 // This will generate a signed flags result. This should be OK since
 // any compare to a zero should be eq/neq.
-instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
-%{
+instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
+%{
+  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
   match(Set cr (CmpP (LoadP op) zero));
 
   ins_cost(500); // XXX
@@ -11401,13 +11776,24 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
+instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set cr (CmpP (LoadP mem) zero));
+
+  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ cmpq(r12, $mem$$Address);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
 
 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
 %{
   match(Set cr (CmpN op1 op2));
 
   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
-  ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %}
+  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
   ins_pipe(ialu_cr_reg_reg);
 %}
 
@@ -11415,11 +11801,30 @@
 %{
   match(Set cr (CmpN src (LoadN mem)));
 
-  ins_cost(500); // XXX
-  format %{ "cmpl    $src, mem\t# compressed ptr" %}
+  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
+  ins_encode %{
+    __ cmpl($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
+  match(Set cr (CmpN op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
   ins_encode %{
-    Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ cmpl(as_Register($src$$reg), adr);
+    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
+  %}
+  ins_pipe(ialu_cr_reg_imm);
+%}
+
+instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
+%{
+  match(Set cr (CmpN src (LoadN mem)));
+
+  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
+  ins_encode %{
+    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
   %}
   ins_pipe(ialu_cr_reg_mem);
 %}
@@ -11432,15 +11837,27 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
-instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero)
-%{
+instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() != NULL);
   match(Set cr (CmpN (LoadN mem) zero));
 
   ins_cost(500); // XXX
   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ cmpl(addr, (int)0xFFFFFFFF);
+    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() == NULL);
+  match(Set cr (CmpN (LoadN mem) zero));
+
+  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ cmpl(r12, $mem$$Address);
   %}
   ins_pipe(ialu_cr_reg_mem);
 %}
@@ -11472,7 +11889,6 @@
 %{
   match(Set cr (CmpL op1 (LoadL op2)));
 
-  ins_cost(500); // XXX
   format %{ "cmpq    $op1, $op2" %}
   opcode(0x3B); /* Opcode 3B /r */
   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
@@ -11733,15 +12149,12 @@
   effect(KILL rcx, KILL cr);
 
   ins_cost(1100);  // slightly larger than the next version
-  format %{ "cmpq    rax, rsi\n\t"
-            "jeq,s   hit\n\t"
-            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
-    "hit:\n\t"
             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
     "miss:\t" %}
 
@@ -11756,13 +12169,10 @@
                                      rdi_RegP result)
 %{
   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
-  predicate(!UseCompressedOops); // decoding oop kills condition codes
   effect(KILL rcx, KILL result);
 
   ins_cost(1000);
-  format %{ "cmpq    rax, rsi\n\t"
-            "jeq,s   miss\t# Actually a hit; we are done.\n\t"
-            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
--- a/src/os/linux/vm/os_linux.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os/linux/vm/os_linux.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2582,7 +2582,7 @@
 #define SHM_HUGETLB 04000
 #endif
 
-char* os::reserve_memory_special(size_t bytes) {
+char* os::reserve_memory_special(size_t bytes, char* req_addr) {
   assert(UseLargePages, "only for large pages");
 
   key_t key = IPC_PRIVATE;
--- a/src/os/solaris/dtrace/generateJvmOffsets.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os/solaris/dtrace/generateJvmOffsets.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -249,6 +249,10 @@
 
   printf("\n");
 
+  GEN_OFFS(NarrowOopStruct, _base);
+  GEN_OFFS(NarrowOopStruct, _shift);
+  printf("\n");
+
   GEN_VALUE(SIZE_HeapBlockHeader, sizeof(HeapBlock::Header));
   GEN_SIZE(oopDesc);
   GEN_SIZE(constantPoolOopDesc);
--- a/src/os/solaris/dtrace/jhelper.d	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os/solaris/dtrace/jhelper.d	Thu Mar 19 09:13:24 2009 -0700
@@ -46,7 +46,10 @@
 extern pointer __1cJCodeCacheF_heap_;
 extern pointer __1cIUniverseP_methodKlassObj_;
 extern pointer __1cIUniverseO_collectedHeap_;
-extern pointer __1cIUniverseK_heap_base_;
+extern pointer __1cIUniverseL_narrow_oop_;
+#ifdef _LP64
+extern pointer UseCompressedOops;
+#endif
 
 extern pointer __1cHnmethodG__vtbl_;
 extern pointer __1cKBufferBlobG__vtbl_;
@@ -56,6 +59,7 @@
 #define copyin_uint16(ADDR) *(uint16_t*) copyin((pointer) (ADDR), sizeof(uint16_t))
 #define copyin_uint32(ADDR) *(uint32_t*) copyin((pointer) (ADDR), sizeof(uint32_t))
 #define copyin_int32(ADDR)  *(int32_t*)  copyin((pointer) (ADDR), sizeof(int32_t))
+#define copyin_uint8(ADDR)  *(uint8_t*)  copyin((pointer) (ADDR), sizeof(uint8_t))
 
 #define SAME(x) x
 #define copyin_offset(JVM_CONST)  JVM_CONST = \
@@ -132,6 +136,9 @@
   copyin_offset(SIZE_oopDesc);
   copyin_offset(SIZE_constantPoolOopDesc);
 
+  copyin_offset(OFFSET_NarrowOopStruct_base);
+  copyin_offset(OFFSET_NarrowOopStruct_shift);
+
   /*
    * The PC to translate is in arg0.
    */
@@ -151,9 +158,19 @@
 
   this->Universe_methodKlassOop = copyin_ptr(&``__1cIUniverseP_methodKlassObj_);
   this->CodeCache_heap_address = copyin_ptr(&``__1cJCodeCacheF_heap_);
-  this->Universe_heap_base = copyin_ptr(&``__1cIUniverseK_heap_base_);
 
   /* Reading volatile values */
+#ifdef _LP64
+  this->Use_Compressed_Oops  = copyin_uint8(&``UseCompressedOops);
+#else
+  this->Use_Compressed_Oops  = 0;
+#endif
+
+  this->Universe_narrow_oop_base  = copyin_ptr(&``__1cIUniverseL_narrow_oop_ +
+                                               OFFSET_NarrowOopStruct_base);
+  this->Universe_narrow_oop_shift = copyin_int32(&``__1cIUniverseL_narrow_oop_ +
+                                                 OFFSET_NarrowOopStruct_shift);
+
   this->CodeCache_low = copyin_ptr(this->CodeCache_heap_address + 
       OFFSET_CodeHeap_memory + OFFSET_VirtualSpace_low);
 
@@ -295,7 +312,7 @@
 
 dtrace:helper:ustack:
 /!this->done && this->vtbl == this->BufferBlob_vtbl &&
-this->Universe_heap_base == NULL &&
+this->Use_Compressed_Oops == 0 &&
 this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
 {
   MARK_LINE;
@@ -306,7 +323,7 @@
 
 dtrace:helper:ustack:
 /!this->done && this->vtbl == this->BufferBlob_vtbl &&
-this->Universe_heap_base != NULL &&
+this->Use_Compressed_Oops != 0 &&
 this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
 {
   MARK_LINE;
@@ -314,8 +331,8 @@
    * Read compressed pointer and  decode heap oop, same as oop.inline.hpp
    */
   this->cklass = copyin_uint32(this->methodOopPtr + OFFSET_oopDesc_metadata);
-  this->klass = (uint64_t)((uintptr_t)this->Universe_heap_base +
-                ((uintptr_t)this->cklass << 3));
+  this->klass = (uint64_t)((uintptr_t)this->Universe_narrow_oop_base +
+                ((uintptr_t)this->cklass << this->Universe_narrow_oop_shift));
   this->methodOop = this->klass == this->Universe_methodKlassOop;
   this->done = !this->methodOop;
 }
--- a/src/os/solaris/dtrace/libjvm_db.c	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os/solaris/dtrace/libjvm_db.c	Thu Mar 19 09:13:24 2009 -0700
@@ -146,13 +146,17 @@
   uint64_t BufferBlob_vtbl;
   uint64_t RuntimeStub_vtbl;
 
+  uint64_t Use_Compressed_Oops_address;
   uint64_t Universe_methodKlassObj_address;
+  uint64_t Universe_narrow_oop_base_address;
+  uint64_t Universe_narrow_oop_shift_address;
   uint64_t CodeCache_heap_address;
-  uint64_t Universe_heap_base_address;
 
   /* Volatiles */
+  uint8_t  Use_Compressed_Oops;
   uint64_t Universe_methodKlassObj;
-  uint64_t Universe_heap_base;
+  uint64_t Universe_narrow_oop_base;
+  uint32_t Universe_narrow_oop_shift;
   uint64_t CodeCache_low;
   uint64_t CodeCache_high;
   uint64_t CodeCache_segmap_low;
@@ -279,8 +283,11 @@
       if (strcmp("_methodKlassObj", vmp->fieldName) == 0) {
         J->Universe_methodKlassObj_address = vmp->address;
       }
-      if (strcmp("_heap_base", vmp->fieldName) == 0) {
-        J->Universe_heap_base_address = vmp->address;
+      if (strcmp("_narrow_oop._base", vmp->fieldName) == 0) {
+        J->Universe_narrow_oop_base_address = vmp->address;
+      }
+      if (strcmp("_narrow_oop._shift", vmp->fieldName) == 0) {
+        J->Universe_narrow_oop_shift_address = vmp->address;
       }
     }
     CHECK_FAIL(err);
@@ -298,14 +305,39 @@
   return -1;
 }
 
+static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
+  psaddr_t sym_addr;
+  int err;
+
+  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
+  if (err != PS_OK) goto fail;
+  *valuep = sym_addr;
+  return PS_OK;
+
+ fail:
+  return err;
+}
+
 static int read_volatiles(jvm_agent_t* J) {
   uint64_t ptr;
   int err;
 
+  err = find_symbol(J, "UseCompressedOops", &J->Use_Compressed_Oops_address);
+  if (err == PS_OK) {
+    err = ps_pread(J->P,  J->Use_Compressed_Oops_address, &J->Use_Compressed_Oops, sizeof(uint8_t));
+    CHECK_FAIL(err);
+  } else {
+    J->Use_Compressed_Oops = 0;
+  }
+
   err = read_pointer(J, J->Universe_methodKlassObj_address, &J->Universe_methodKlassObj);
   CHECK_FAIL(err);
-  err = read_pointer(J, J->Universe_heap_base_address, &J->Universe_heap_base);
+
+  err = read_pointer(J, J->Universe_narrow_oop_base_address, &J->Universe_narrow_oop_base);
   CHECK_FAIL(err);
+  err = ps_pread(J->P,  J->Universe_narrow_oop_shift_address, &J->Universe_narrow_oop_shift, sizeof(uint32_t));
+  CHECK_FAIL(err);
+
   err = read_pointer(J, J->CodeCache_heap_address + OFFSET_CodeHeap_memory +
                      OFFSET_VirtualSpace_low, &J->CodeCache_low);
   CHECK_FAIL(err);
@@ -374,19 +406,6 @@
   return -1;
 }
 
-static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
-  psaddr_t sym_addr;
-  int err;
-
-  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
-  if (err != PS_OK) goto fail;
-  *valuep = sym_addr;
-  return PS_OK;
-
- fail:
-  return err;
-}
-
 static int find_jlong_constant(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   psaddr_t sym_addr;
   int err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
@@ -458,14 +477,14 @@
 static int is_methodOop(jvm_agent_t* J, uint64_t methodOopPtr) {
   uint64_t klass;
   int err;
-  // If heap_base is nonnull, this was a compressed oop.
-  if (J->Universe_heap_base != NULL) {
+  // If UseCompressedOops, this was a compressed oop.
+  if (J->Use_Compressed_Oops != 0) {
     uint32_t cklass;
     err = read_compressed_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata,
           &cklass);
     // decode heap oop, same as oop.inline.hpp
-    klass = (uint64_t)((uintptr_t)J->Universe_heap_base +
-            ((uintptr_t)cklass << 3));
+    klass = (uint64_t)((uintptr_t)J->Universe_narrow_oop_base +
+            ((uintptr_t)cklass << J->Universe_narrow_oop_shift));
   } else {
     err = read_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, &klass);
   }
--- a/src/os/solaris/vm/os_solaris.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os/solaris/vm/os_solaris.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -3220,7 +3220,7 @@
   return true;
 }
 
-char* os::reserve_memory_special(size_t bytes) {
+char* os::reserve_memory_special(size_t bytes, char* addr) {
   assert(UseLargePages && UseISM, "only for ISM large pages");
 
   size_t size = bytes;
@@ -4451,6 +4451,9 @@
 int_fnP_thread_t os::Solaris::_thr_suspend_mutator;
 int_fnP_thread_t os::Solaris::_thr_continue_mutator;
 
+// (Static) wrapper for getisax(2) call.
+os::Solaris::getisax_func_t os::Solaris::_getisax = 0;
+
 // (Static) wrappers for the liblgrp API
 os::Solaris::lgrp_home_func_t os::Solaris::_lgrp_home;
 os::Solaris::lgrp_init_func_t os::Solaris::_lgrp_init;
@@ -4465,16 +4468,19 @@
 // (Static) wrapper for meminfo() call.
 os::Solaris::meminfo_func_t os::Solaris::_meminfo = 0;
 
-static address resolve_symbol(const char *name) {
-  address addr;
-
-  addr = (address) dlsym(RTLD_DEFAULT, name);
+static address resolve_symbol_lazy(const char* name) {
+  address addr = (address) dlsym(RTLD_DEFAULT, name);
   if(addr == NULL) {
     // RTLD_DEFAULT was not defined on some early versions of 2.5.1
     addr = (address) dlsym(RTLD_NEXT, name);
-    if(addr == NULL) {
-      fatal(dlerror());
-    }
+  }
+  return addr;
+}
+
+static address resolve_symbol(const char* name) {
+  address addr = resolve_symbol_lazy(name);
+  if(addr == NULL) {
+    fatal(dlerror());
   }
   return addr;
 }
@@ -4673,15 +4679,26 @@
 }
 
 void os::Solaris::misc_sym_init() {
-  address func = (address)dlsym(RTLD_DEFAULT, "meminfo");
-  if(func == NULL) {
-    func = (address) dlsym(RTLD_NEXT, "meminfo");
-  }
+  address func;
+
+  // getisax
+  func = resolve_symbol_lazy("getisax");
+  if (func != NULL) {
+    os::Solaris::_getisax = CAST_TO_FN_PTR(getisax_func_t, func);
+  }
+
+  // meminfo
+  func = resolve_symbol_lazy("meminfo");
   if (func != NULL) {
     os::Solaris::set_meminfo(CAST_TO_FN_PTR(meminfo_func_t, func));
   }
 }
 
+uint_t os::Solaris::getisax(uint32_t* array, uint_t n) {
+  assert(_getisax != NULL, "_getisax not set");
+  return _getisax(array, n);
+}
+
 // Symbol doesn't exist in Solaris 8 pset.h
 #ifndef PS_MYID
 #define PS_MYID -3
@@ -4716,6 +4733,10 @@
 
   Solaris::initialize_system_info();
 
+  // Initialize misc. symbols as soon as possible, so we can use them
+  // if we need them.
+  Solaris::misc_sym_init();
+
   int fd = open("/dev/zero", O_RDWR);
   if (fd < 0) {
     fatal1("os::init: cannot open /dev/zero (%s)", strerror(errno));
@@ -4857,7 +4878,6 @@
     }
   }
 
-  Solaris::misc_sym_init();
   Solaris::signal_sets_init();
   Solaris::init_signal_mem();
   Solaris::install_signal_handlers();
--- a/src/os/solaris/vm/os_solaris.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os/solaris/vm/os_solaris.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -72,6 +72,8 @@
         LGRP_VIEW_OS            /* what's available to operating system */
   } lgrp_view_t;
 
+  typedef uint_t (*getisax_func_t)(uint32_t* array, uint_t n);
+
   typedef lgrp_id_t (*lgrp_home_func_t)(idtype_t idtype, id_t id);
   typedef lgrp_cookie_t (*lgrp_init_func_t)(lgrp_view_t view);
   typedef int (*lgrp_fini_func_t)(lgrp_cookie_t cookie);
@@ -87,6 +89,8 @@
                                 const uint_t  info_req[],  int info_count,
                                 uint64_t  outdata[], uint_t validity[]);
 
+  static getisax_func_t _getisax;
+
   static lgrp_home_func_t _lgrp_home;
   static lgrp_init_func_t _lgrp_init;
   static lgrp_fini_func_t _lgrp_fini;
@@ -283,6 +287,9 @@
   }
   static lgrp_cookie_t lgrp_cookie()                 { return _lgrp_cookie; }
 
+  static bool supports_getisax()                     { return _getisax != NULL; }
+  static uint_t getisax(uint32_t* array, uint_t n);
+
   static void set_meminfo(meminfo_func_t func)       { _meminfo = func; }
   static int meminfo (const uint64_t inaddr[],   int addr_count,
                      const uint_t  info_req[],  int info_count,
--- a/src/os/windows/vm/os_windows.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os/windows/vm/os_windows.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2595,7 +2595,7 @@
   return true;
 }
 
-char* os::reserve_memory_special(size_t bytes) {
+char* os::reserve_memory_special(size_t bytes, char* addr) {
 
   if (UseLargePagesIndividualAllocation) {
     if (TracePageSizes && Verbose) {
@@ -2615,7 +2615,7 @@
         "use -XX:-UseLargePagesIndividualAllocation to turn off");
       return NULL;
     }
-    p_buf = (char *) VirtualAlloc(NULL,
+    p_buf = (char *) VirtualAlloc(addr,
                                  size_of_reserve,  // size of Reserve
                                  MEM_RESERVE,
                                  PAGE_EXECUTE_READWRITE);
--- a/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -30,5 +30,7 @@
 define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
 define_pd_global(intx, CompilerThreadStackSize,  0);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      4*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions, false);
--- a/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -43,5 +43,7 @@
 
 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      2*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
--- a/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -30,5 +30,9 @@
 define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
 define_pd_global(intx, CompilerThreadStackSize,  0);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      4*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
+
+
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2006-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,58 +25,107 @@
 # include "incls/_precompiled.incl"
 # include "incls/_vm_version_solaris_sparc.cpp.incl"
 
+# include <sys/auxv.h>
+# include <sys/auxv_SPARC.h>
 # include <sys/systeminfo.h>
 
+// We need to keep these here as long as we have to build on Solaris
+// versions before 10.
+#ifndef SI_ARCHITECTURE_32
+#define SI_ARCHITECTURE_32      516     /* basic 32-bit SI_ARCHITECTURE */
+#endif
+
+#ifndef SI_ARCHITECTURE_64
+#define SI_ARCHITECTURE_64      517     /* basic 64-bit SI_ARCHITECTURE */
+#endif
+
+static void do_sysinfo(int si, const char* string, int* features, int mask) {
+  char   tmp;
+  size_t bufsize = sysinfo(si, &tmp, 1);
+
+  // All SI defines used below must be supported.
+  guarantee(bufsize != -1, "must be supported");
+
+  char* buf = (char*) malloc(bufsize);
+
+  if (buf == NULL)
+    return;
+
+  if (sysinfo(si, buf, bufsize) == bufsize) {
+    // Compare the string.
+    if (strcmp(buf, string) == 0) {
+      *features |= mask;
+    }
+  }
+
+  free(buf);
+}
+
 int VM_Version::platform_features(int features) {
-  // We determine what sort of hardware we have via sysinfo(SI_ISALIST, ...).
-  // This isn't the best of all possible ways because there's not enough
-  // detail in the isa list it returns, but it's a bit less arcane than
-  // generating assembly code and an illegal instruction handler.  We used
-  // to generate a getpsr trap, but that's even more arcane.
-  //
-  // Another possibility would be to use sysinfo(SI_PLATFORM, ...), but
-  // that would require more knowledge here than is wise.
+  // getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are
+  // supported on Solaris 10 and later.
+  if (os::Solaris::supports_getisax()) {
+#ifndef PRODUCT
+    if (PrintMiscellaneous && Verbose)
+      tty->print_cr("getisax(2) supported.");
+#endif
 
-  // isalist spec via 'man isalist' as of 01-Aug-2001
+    // Check 32-bit architecture.
+    do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m);
+
+    // Check 64-bit architecture.
+    do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m);
+
+    // Extract valid instruction set extensions.
+    uint_t av;
+    uint_t avn = os::Solaris::getisax(&av, 1);
+    assert(avn == 1, "should only return one av");
 
-  char   tmp;
-  size_t bufsize  = sysinfo(SI_ISALIST, &tmp, 1);
-  char*  buf      = (char*)malloc(bufsize);
+    if (av & AV_SPARC_MUL32)  features |= hardware_mul32_m;
+    if (av & AV_SPARC_DIV32)  features |= hardware_div32_m;
+    if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
+    if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m;
+    if (av & AV_SPARC_POPC)   features |= hardware_popc_m;
+    if (av & AV_SPARC_VIS)    features |= vis1_instructions_m;
+    if (av & AV_SPARC_VIS2)   features |= vis2_instructions_m;
+  } else {
+    // getisax(2) failed, use the old legacy code.
+#ifndef PRODUCT
+    if (PrintMiscellaneous && Verbose)
+      tty->print_cr("getisax(2) not supported.");
+#endif
+
+    char   tmp;
+    size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1);
+    char*  buf     = (char*) malloc(bufsize);
 
-  if (buf != NULL) {
-    if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
-      // Figure out what kind of sparc we have
-      char *sparc_string = strstr(buf, "sparc");
-      if (sparc_string != NULL) {            features |= v8_instructions_m;
-        if (sparc_string[5] == 'v') {
-          if (sparc_string[6] == '8') {
-            if (sparc_string[7] == '-')      features |= hardware_int_muldiv_m;
-            else if (sparc_string[7] == 'p') features |= generic_v9_m;
-            else                      features |= generic_v8_m;
-          } else if (sparc_string[6] == '9') features |= generic_v9_m;
+    if (buf != NULL) {
+      if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
+        // Figure out what kind of sparc we have
+        char *sparc_string = strstr(buf, "sparc");
+        if (sparc_string != NULL) {              features |= v8_instructions_m;
+          if (sparc_string[5] == 'v') {
+            if (sparc_string[6] == '8') {
+              if (sparc_string[7] == '-') {      features |= hardware_mul32_m;
+                                                 features |= hardware_div32_m;
+              } else if (sparc_string[7] == 'p') features |= generic_v9_m;
+              else                               features |= generic_v8_m;
+            } else if (sparc_string[6] == '9')   features |= generic_v9_m;
+          }
+        }
+
+        // Check for visualization instructions
+        char *vis = strstr(buf, "vis");
+        if (vis != NULL) {                       features |= vis1_instructions_m;
+          if (vis[3] == '2')                     features |= vis2_instructions_m;
         }
       }
-
-      // Check for visualization instructions
-      char *vis = strstr(buf, "vis");
-      if (vis != NULL) {              features |= vis1_instructions_m;
-        if (vis[3] == '2')            features |= vis2_instructions_m;
-      }
+      free(buf);
     }
-    free(buf);
   }
 
-  bufsize = sysinfo(SI_MACHINE, &tmp, 1);
-  buf     = (char*)malloc(bufsize);
-
-  if (buf != NULL) {
-    if (sysinfo(SI_MACHINE, buf, bufsize) == bufsize) {
-      if (strstr(buf, "sun4v") != NULL) {
-        features |= sun4v_m;
-      }
-    }
-    free(buf);
-  }
+  // Determine the machine type.
+  do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m);
 
   return features;
 }
--- a/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -46,5 +46,7 @@
 
 define_pd_global(intx, CompilerThreadStackSize,  0);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      256*M);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
--- a/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -45,5 +45,7 @@
 
 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
 
+// Only used on 64 bit platforms
+define_pd_global(uintx, HeapBaseMinAddress,      2*G);
 // Only used on 64 bit Windows platforms
 define_pd_global(bool, UseVectoredExceptions,    false);
--- a/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -68,6 +68,9 @@
     PVOID HandlerData;
 } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT;
 
+#if MSC_VER < 1500
+
+/* Not needed for VS2008 compiler, comes from winnt.h. */
 typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) (
     IN PEXCEPTION_RECORD ExceptionRecord,
     IN ULONG64 EstablisherFrame,
@@ -75,4 +78,6 @@
     IN OUT PDISPATCHER_CONTEXT DispatcherContext
 );
 
+#endif
+
 #endif // AMD64
--- a/src/share/vm/adlc/adlc.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/adlc/adlc.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -44,7 +44,7 @@
 #error "Something is wrong with the detection of MSC_VER in the makefiles"
 #endif
 
-#if _MSC_VER >= 1400 && !defined(_WIN64)
+#if _MSC_VER >= 1400
 #define strdup _strdup
 #endif
 
--- a/src/share/vm/asm/assembler.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/asm/assembler.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -321,16 +321,19 @@
 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   // Exception handler checks the nmethod's implicit null checks table
   // only when this method returns false.
-  if (UseCompressedOops) {
+#ifdef _LP64
+  if (UseCompressedOops && Universe::narrow_oop_base() != NULL) {
+    assert (Universe::heap() != NULL, "java heap should be initialized");
     // The first page after heap_base is unmapped and
     // the 'offset' is equal to [heap_base + offset] for
     // narrow oop implicit null checks.
-    uintptr_t heap_base = (uintptr_t)Universe::heap_base();
-    if ((uintptr_t)offset >= heap_base) {
+    uintptr_t base = (uintptr_t)Universe::narrow_oop_base();
+    if ((uintptr_t)offset >= base) {
       // Normalize offset for the next check.
-      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
+      offset = (intptr_t)(pointer_delta((void*)offset, (void*)base, 1));
     }
   }
+#endif
   return offset < 0 || os::vm_page_size() <= offset;
 }
 
--- a/src/share/vm/ci/ciMethodBlocks.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/ci/ciMethodBlocks.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -284,6 +284,11 @@
       //
       int ex_start = handler->start();
       int ex_end = handler->limit();
+      // ensure a block at the start of exception range and start of following code
+      (void) make_block_at(ex_start);
+      if (ex_end < _code_size)
+        (void) make_block_at(ex_end);
+
       if (eb->is_handler()) {
         // Extend old handler exception range to cover additional range.
         int old_ex_start = eb->ex_start_bci();
@@ -295,10 +300,6 @@
         eb->clear_exception_handler(); // Reset exception information
       }
       eb->set_exception_range(ex_start, ex_end);
-      // ensure a block at the start of exception range and start of following code
-      (void) make_block_at(ex_start);
-      if (ex_end < _code_size)
-        (void) make_block_at(ex_end);
     }
   }
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -284,6 +284,7 @@
   template(value_name,                                "value")                                    \
   template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
   template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
+  template(bitCount_name,                             "bitCount")                                 \
                                                                                                   \
   /* non-intrinsic name/signature pairs: */                                                       \
   template(register_method_name,                      "register")                                 \
@@ -304,6 +305,7 @@
   template(double_long_signature,                     "(D)J")                                     \
   template(double_double_signature,                   "(D)D")                                     \
   template(int_float_signature,                       "(I)F")                                     \
+  template(long_int_signature,                        "(J)I")                                     \
   template(long_long_signature,                       "(J)J")                                     \
   template(long_double_signature,                     "(J)D")                                     \
   template(byte_signature,                            "B")                                        \
@@ -507,6 +509,10 @@
    do_name(     doubleToLongBits_name,                           "doubleToLongBits")                                    \
   do_intrinsic(_longBitsToDouble,         java_lang_Double,       longBitsToDouble_name,    long_double_signature, F_S) \
    do_name(     longBitsToDouble_name,                           "longBitsToDouble")                                    \
+                                                                                                                        \
+  do_intrinsic(_bitCount_i,               java_lang_Integer,      bitCount_name,            int_int_signature,   F_S)   \
+  do_intrinsic(_bitCount_l,               java_lang_Long,         bitCount_name,            long_int_signature,  F_S)   \
+                                                                                                                        \
   do_intrinsic(_reverseBytes_i,           java_lang_Integer,      reverseBytes_name,        int_int_signature,   F_S)   \
    do_name(     reverseBytes_name,                               "reverseBytes")                                        \
   do_intrinsic(_reverseBytes_l,           java_lang_Long,         reverseBytes_name,        long_long_signature, F_S)   \
@@ -696,7 +702,6 @@
   do_signature(putShort_raw_signature,    "(JS)V")                                                                      \
   do_signature(getChar_raw_signature,     "(J)C")                                                                       \
   do_signature(putChar_raw_signature,     "(JC)V")                                                                      \
-  do_signature(getInt_raw_signature,      "(J)I")                                                                       \
   do_signature(putInt_raw_signature,      "(JI)V")                                                                      \
       do_alias(getLong_raw_signature,    /*(J)J*/ long_long_signature)                                                  \
       do_alias(putLong_raw_signature,    /*(JJ)V*/ long_long_void_signature)                                            \
@@ -713,7 +718,7 @@
   do_intrinsic(_getByte_raw,              sun_misc_Unsafe,        getByte_name, getByte_raw_signature,           F_RN)  \
   do_intrinsic(_getShort_raw,             sun_misc_Unsafe,        getShort_name, getShort_raw_signature,         F_RN)  \
   do_intrinsic(_getChar_raw,              sun_misc_Unsafe,        getChar_name, getChar_raw_signature,           F_RN)  \
-  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, getInt_raw_signature,             F_RN)  \
+  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, long_int_signature,               F_RN)  \
   do_intrinsic(_getLong_raw,              sun_misc_Unsafe,        getLong_name, getLong_raw_signature,           F_RN)  \
   do_intrinsic(_getFloat_raw,             sun_misc_Unsafe,        getFloat_name, getFloat_raw_signature,         F_RN)  \
   do_intrinsic(_getDouble_raw,            sun_misc_Unsafe,        getDouble_name, getDouble_raw_signature,       F_RN)  \
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -107,7 +107,7 @@
 #ifndef PRODUCT
 bool CMBitMapRO::covers(ReservedSpace rs) const {
   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
-  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
+  assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
          "size inconsistency");
   return _bmStartWord == (HeapWord*)(rs.base()) &&
          _bmWordSize  == rs.size()>>LogHeapWordSize;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1422,9 +1422,34 @@
   // Reserve the maximum.
   PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
   // Includes the perm-gen.
+
+  const size_t total_reserved = max_byte_size + pgs->max_size();
+  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
+
   ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
                         HeapRegion::GrainBytes,
-                        false /*ism*/);
+                        false /*ism*/, addr);
+
+  if (UseCompressedOops) {
+    if (addr != NULL && !heap_rs.is_reserved()) {
+      // Failed to reserve at specified address - the requested memory
+      // region is taken already, for example, by 'java' launcher.
+      // Try again to reserver heap higher.
+      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
+      ReservedSpace heap_rs0(total_reserved, HeapRegion::GrainBytes,
+                             false /*ism*/, addr);
+      if (addr != NULL && !heap_rs0.is_reserved()) {
+        // Failed to reserve at specified address again - give up.
+        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
+        assert(addr == NULL, "");
+        ReservedSpace heap_rs1(total_reserved, HeapRegion::GrainBytes,
+                               false /*ism*/, addr);
+        heap_rs = heap_rs1;
+      } else {
+        heap_rs = heap_rs0;
+      }
+    }
+  }
 
   if (!heap_rs.is_reserved()) {
     vm_exit_during_initialization("Could not reserve enough space for object heap");
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -508,7 +508,7 @@
   typedef PosParPRT* PosParPRTPtr;
   if (_max_fine_entries == 0) {
     assert(_mod_max_fine_entries_mask == 0, "Both or none.");
-    _max_fine_entries = (1 << G1LogRSRegionEntries);
+    _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries);
     _mod_max_fine_entries_mask = _max_fine_entries - 1;
 #if SAMPLE_FOR_EVICTION
     assert(_fine_eviction_sample_size == 0
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -104,12 +104,38 @@
                   og_min_size, og_max_size,
                   yg_min_size, yg_max_size);
 
+  const size_t total_reserved = pg_max_size + og_max_size + yg_max_size;
+  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
+
   // The main part of the heap (old gen + young gen) can often use a larger page
   // size than is needed or wanted for the perm gen.  Use the "compound
   // alignment" ReservedSpace ctor to avoid having to use the same page size for
   // all gens.
+
   ReservedHeapSpace heap_rs(pg_max_size, pg_align, og_max_size + yg_max_size,
-                            og_align);
+                            og_align, addr);
+
+  if (UseCompressedOops) {
+    if (addr != NULL && !heap_rs.is_reserved()) {
+      // Failed to reserve at specified address - the requested memory
+      // region is taken already, for example, by 'java' launcher.
+      // Try again to reserver heap higher.
+      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
+      ReservedHeapSpace heap_rs0(pg_max_size, pg_align, og_max_size + yg_max_size,
+                                 og_align, addr);
+      if (addr != NULL && !heap_rs0.is_reserved()) {
+        // Failed to reserve at specified address again - give up.
+        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
+        assert(addr == NULL, "");
+        ReservedHeapSpace heap_rs1(pg_max_size, pg_align, og_max_size + yg_max_size,
+                                   og_align, addr);
+        heap_rs = heap_rs1;
+      } else {
+        heap_rs = heap_rs0;
+      }
+    }
+  }
+
   os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz,
                        heap_rs.base(), pg_max_size);
   os::trace_page_sizes("ps main", og_min_size + yg_min_size,
--- a/src/share/vm/includeDB_core	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/includeDB_core	Thu Mar 19 09:13:24 2009 -0700
@@ -4598,6 +4598,7 @@
 vm_version_<arch>.hpp                   globals_extension.hpp
 vm_version_<arch>.hpp                   vm_version.hpp
 
+vm_version_<os_arch>.cpp                os.hpp
 vm_version_<os_arch>.cpp                vm_version_<arch>.hpp
 
 vmreg.cpp                               assembler.hpp
--- a/src/share/vm/memory/blockOffsetTable.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/memory/blockOffsetTable.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -235,7 +235,7 @@
   };
 
   static size_t power_to_cards_back(uint i) {
-    return 1 << (LogBase * i);
+    return (size_t)(1 << (LogBase * i));
   }
   static size_t power_to_words_back(uint i) {
     return power_to_cards_back(i) * N_words;
--- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -218,6 +218,31 @@
     heap_address -= total_reserved;
   } else {
     heap_address = NULL;  // any address will do.
+    if (UseCompressedOops) {
+      heap_address = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
+      *_total_reserved = total_reserved;
+      *_n_covered_regions = n_covered_regions;
+      *heap_rs = ReservedHeapSpace(total_reserved, alignment,
+                                   UseLargePages, heap_address);
+
+      if (heap_address != NULL && !heap_rs->is_reserved()) {
+        // Failed to reserve at specified address - the requested memory
+        // region is taken already, for example, by 'java' launcher.
+        // Try again to reserver heap higher.
+        heap_address = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
+        *heap_rs = ReservedHeapSpace(total_reserved, alignment,
+                                     UseLargePages, heap_address);
+
+        if (heap_address != NULL && !heap_rs->is_reserved()) {
+          // Failed to reserve at specified address again - give up.
+          heap_address = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
+          assert(heap_address == NULL, "");
+          *heap_rs = ReservedHeapSpace(total_reserved, alignment,
+                                       UseLargePages, heap_address);
+        }
+      }
+      return heap_address;
+    }
   }
 
   *_total_reserved = total_reserved;
--- a/src/share/vm/memory/universe.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/memory/universe.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -99,7 +99,8 @@
 size_t          Universe::_heap_used_at_last_gc = 0;
 
 CollectedHeap*  Universe::_collectedHeap = NULL;
-address         Universe::_heap_base = NULL;
+
+NarrowOopStruct Universe::_narrow_oop = { NULL, 0, true };
 
 
 void Universe::basic_type_classes_do(void f(klassOop)) {
@@ -729,6 +730,53 @@
   return JNI_OK;
 }
 
+// Choose the heap base address and oop encoding mode
+// when compressed oops are used:
+// Unscaled  - Use 32-bits oops without encoding when
+//     NarrowOopHeapBaseMin + heap_size < 4Gb
+// ZeroBased - Use zero based compressed oops with encoding when
+//     NarrowOopHeapBaseMin + heap_size < 32Gb
+// HeapBased - Use compressed oops with heap base + encoding.
+
+// 4Gb
+static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1);
+// 32Gb
+static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes;
+
+char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    assert(mode == UnscaledNarrowOop  ||
+           mode == ZeroBasedNarrowOop ||
+           mode == HeapBasedNarrowOop, "mode is invalid");
+
+    const size_t total_size = heap_size + HeapBaseMinAddress;
+    if (total_size <= OopEncodingHeapMax && (mode != HeapBasedNarrowOop)) {
+      if (total_size <= NarrowOopHeapMax && (mode == UnscaledNarrowOop) &&
+          (Universe::narrow_oop_shift() == 0)) {
+        // Use 32-bits oops without encoding and
+        // place heap's top on the 4Gb boundary
+        return (char*)(NarrowOopHeapMax - heap_size);
+      } else {
+        // Can't reserve with NarrowOopShift == 0
+        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+        if (mode == UnscaledNarrowOop ||
+            mode == ZeroBasedNarrowOop && total_size <= NarrowOopHeapMax) {
+          // Use zero based compressed oops with encoding and
+          // place heap's top on the 32Gb boundary in case
+          // total_size > 4Gb or failed to reserve below 4Gb.
+          return (char*)(OopEncodingHeapMax - heap_size);
+        }
+      }
+    } else {
+      // Can't reserve below 32Gb.
+      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+    }
+  }
+#endif
+  return NULL; // also return NULL (don't care) for 32-bit VM
+}
+
 jint Universe::initialize_heap() {
 
   if (UseParallelGC) {
@@ -773,6 +821,8 @@
   if (status != JNI_OK) {
     return status;
   }
+
+#ifdef _LP64
   if (UseCompressedOops) {
     // Subtract a page because something can get allocated at heap base.
     // This also makes implicit null checking work, because the
@@ -780,8 +830,49 @@
     // See needs_explicit_null_check.
     // Only set the heap base for compressed oops because it indicates
     // compressed oops for pstack code.
-    Universe::_heap_base = Universe::heap()->base() - os::vm_page_size();
+    if (PrintCompressedOopsMode) {
+      tty->cr();
+      tty->print("heap address: "PTR_FORMAT, Universe::heap()->base());
+    }
+    if ((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) {
+      // Can't reserve heap below 32Gb.
+      Universe::set_narrow_oop_base(Universe::heap()->base() - os::vm_page_size());
+      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+      if (PrintCompressedOopsMode) {
+        tty->print(", Compressed Oops with base: "PTR_FORMAT, Universe::narrow_oop_base());
+      }
+    } else {
+      Universe::set_narrow_oop_base(0);
+      if (PrintCompressedOopsMode) {
+        tty->print(", zero based Compressed Oops");
+      }
+#ifdef _WIN64
+      if (!Universe::narrow_oop_use_implicit_null_checks()) {
+        // Don't need guard page for implicit checks in indexed addressing
+        // mode with zero based Compressed Oops.
+        Universe::set_narrow_oop_use_implicit_null_checks(true);
+      }
+#endif //  _WIN64
+      if((uint64_t)Universe::heap()->reserved_region().end() > NarrowOopHeapMax) {
+        // Can't reserve heap below 4Gb.
+        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
+      } else {
+        assert(Universe::narrow_oop_shift() == 0, "use unscaled narrow oop");
+        if (PrintCompressedOopsMode) {
+          tty->print(", 32-bits Oops");
+        }
+      }
+    }
+    if (PrintCompressedOopsMode) {
+      tty->cr();
+      tty->cr();
+    }
   }
+  assert(Universe::narrow_oop_base() == (Universe::heap()->base() - os::vm_page_size()) ||
+         Universe::narrow_oop_base() == NULL, "invalid value");
+  assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes ||
+         Universe::narrow_oop_shift() == 0, "invalid value");
+#endif
 
   // We will never reach the CATCH below since Exceptions::_throw will cause
   // the VM to exit if an exception is thrown during initialization
--- a/src/share/vm/memory/universe.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/memory/universe.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -90,6 +90,19 @@
   methodOop get_methodOop();
 };
 
+// For UseCompressedOops.
+struct NarrowOopStruct {
+  // Base address for oop-within-java-object materialization.
+  // NULL if using wide oops or zero based narrow oops.
+  address _base;
+  // Number of shift bits for encoding/decoding narrow oops.
+  // 0 if using wide oops or zero based unscaled narrow oops,
+  // LogMinObjAlignmentInBytes otherwise.
+  int     _shift;
+  // Generate code with implicit null checks for narrow oops.
+  bool    _use_implicit_null_checks;
+};
+
 
 class Universe: AllStatic {
   // Ugh.  Universe is much too friendly.
@@ -181,9 +194,9 @@
 
   // The particular choice of collected heap.
   static CollectedHeap* _collectedHeap;
-  // Base address for oop-within-java-object materialization.
-  // NULL if using wide oops.  Doubles as heap oop null value.
-  static address        _heap_base;
+
+  // For UseCompressedOops.
+  static struct NarrowOopStruct _narrow_oop;
 
   // array of dummy objects used with +FullGCAlot
   debug_only(static objArrayOop _fullgc_alot_dummy_array;)
@@ -328,8 +341,25 @@
   static CollectedHeap* heap() { return _collectedHeap; }
 
   // For UseCompressedOops
-  static address heap_base()       { return _heap_base; }
-  static address* heap_base_addr() { return &_heap_base; }
+  static address* narrow_oop_base_addr()              { return &_narrow_oop._base; }
+  static address  narrow_oop_base()                   { return  _narrow_oop._base; }
+  static int      narrow_oop_shift()                  { return  _narrow_oop._shift; }
+  static void     set_narrow_oop_base(address base)   { _narrow_oop._base  = base; }
+  static void     set_narrow_oop_shift(int shift)     { _narrow_oop._shift = shift; }
+  static bool     narrow_oop_use_implicit_null_checks()             { return  _narrow_oop._use_implicit_null_checks; }
+  static void     set_narrow_oop_use_implicit_null_checks(bool use) { _narrow_oop._use_implicit_null_checks = use; }
+  // Narrow Oop encoding mode:
+  // 0 - Use 32-bits oops without encoding when
+  //     NarrowOopHeapBaseMin + heap_size < 4Gb
+  // 1 - Use zero based compressed oops with encoding when
+  //     NarrowOopHeapBaseMin + heap_size < 32Gb
+  // 2 - Use compressed oops with heap base + encoding.
+  enum NARROW_OOP_MODE {
+    UnscaledNarrowOop  = 0,
+    ZeroBasedNarrowOop = 1,
+    HeapBasedNarrowOop = 2
+  };
+  static char* preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode);
 
   // Historic gc information
   static size_t get_heap_capacity_at_last_gc()         { return _heap_capacity_at_last_gc; }
--- a/src/share/vm/oops/oop.inline.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/oops/oop.inline.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -148,10 +148,11 @@
 
 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
   assert(!is_null(v), "oop value can never be zero");
-  address heap_base = Universe::heap_base();
-  uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)heap_base, 1));
+  address base = Universe::narrow_oop_base();
+  int    shift = Universe::narrow_oop_shift();
+  uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
   assert(OopEncodingHeapMax > pd, "change encoding max if new encoding");
-  uint64_t result = pd >> LogMinObjAlignmentInBytes;
+  uint64_t result = pd >> shift;
   assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow");
   return (narrowOop)result;
 }
@@ -162,8 +163,9 @@
 
 inline oop oopDesc::decode_heap_oop_not_null(narrowOop v) {
   assert(!is_null(v), "narrow oop value can never be zero");
-  address heap_base = Universe::heap_base();
-  return (oop)(void*)((uintptr_t)heap_base + ((uintptr_t)v << LogMinObjAlignmentInBytes));
+  address base = Universe::narrow_oop_base();
+  int    shift = Universe::narrow_oop_shift();
+  return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
 }
 
 inline oop oopDesc::decode_heap_oop(narrowOop v) {
--- a/src/share/vm/opto/addnode.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/addnode.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -756,7 +756,13 @@
       if ( eti == NULL ) {
         // there must be one pointer among the operands
         guarantee(tptr == NULL, "must be only one pointer operand");
-        tptr = et->isa_oopptr();
+        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
+          // 32-bits narrow oop can be the base of address expressions
+          tptr = et->make_ptr()->isa_oopptr();
+        } else {
+          // only regular oops are expected here
+          tptr = et->isa_oopptr();
+        }
         guarantee(tptr != NULL, "non-int operand must be pointer");
         if (tptr->higher_equal(tp->add_offset(tptr->offset())))
           tp = tptr; // Set more precise type for bailout
--- a/src/share/vm/opto/classes.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/classes.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -184,6 +184,8 @@
 macro(Parm)
 macro(PartialSubtypeCheck)
 macro(Phi)
+macro(PopCountI)
+macro(PopCountL)
 macro(PowD)
 macro(PrefetchRead)
 macro(PrefetchWrite)
--- a/src/share/vm/opto/compile.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/compile.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2081,7 +2081,7 @@
 
 #ifdef _LP64
   case Op_CastPP:
-    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
+    if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) {
       Compile* C = Compile::current();
       Node* in1 = n->in(1);
       const Type* t = n->bottom_type();
@@ -2136,7 +2136,7 @@
         new_in2 = in2->in(1);
       } else if (in2->Opcode() == Op_ConP) {
         const Type* t = in2->bottom_type();
-        if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
+        if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) {
           new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
           //
           // This transformation together with CastPP transformation above
--- a/src/share/vm/opto/connode.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/connode.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -433,7 +433,7 @@
 // If not converting int->oop, throw away cast after constant propagation
 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
   const Type *t = ccp->type(in(1));
-  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
+  if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks())) {
     return NULL; // do not transform raw pointers or narrow oops
   }
   return ConstraintCastNode::Ideal_DU_postCCP(ccp);
--- a/src/share/vm/opto/connode.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/connode.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -635,3 +635,23 @@
   virtual uint ideal_reg() const { return Op_RegL; }
   virtual const Type* Value( PhaseTransform *phase ) const;
 };
+
+//---------- PopCountINode -----------------------------------------------------
+// Population count (bit count) of an integer.
+class PopCountINode : public Node {
+public:
+  PopCountINode(Node* in1) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//---------- PopCountLNode -----------------------------------------------------
+// Population count (bit count) of a long.
+class PopCountLNode : public Node {
+public:
+  PopCountLNode(Node* in1) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
--- a/src/share/vm/opto/graphKit.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/graphKit.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -2277,7 +2277,7 @@
   r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
   set_control(                _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
 
-  // Check for self.  Very rare to get here, but its taken 1/3 the time.
+  // Check for self.  Very rare to get here, but it is taken 1/3 the time.
   // No performance impact (too rare) but allows sharing of secondary arrays
   // which has some footprint reduction.
   Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
@@ -2286,11 +2286,27 @@
   r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
   set_control(               _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
 
+  // -- Roads not taken here: --
+  // We could also have chosen to perform the self-check at the beginning
+  // of this code sequence, as the assembler does.  This would not pay off
+  // the same way, since the optimizer, unlike the assembler, can perform
+  // static type analysis to fold away many successful self-checks.
+  // Non-foldable self checks work better here in second position, because
+  // the initial primary superclass check subsumes a self-check for most
+  // types.  An exception would be a secondary type like array-of-interface,
+  // which does not appear in its own primary supertype display.
+  // Finally, we could have chosen to move the self-check into the
+  // PartialSubtypeCheckNode, and from there out-of-line in a platform
+  // dependent manner.  But it is worthwhile to have the check here,
+  // where it can be perhaps be optimized.  The cost in code space is
+  // small (register compare, branch).
+
   // Now do a linear scan of the secondary super-klass array.  Again, no real
   // performance impact (too rare) but it's gotta be done.
-  // (The stub also contains the self-check of subklass == superklass.
   // Since the code is rarely used, there is no penalty for moving it
-  // out of line, and it can only improve I-cache density.)
+  // out of line, and it can only improve I-cache density.
+  // The decision to inline or out-of-line this final check is platform
+  // dependent, and is found in the AD file definition of PartialSubtypeCheck.
   Node* psc = _gvn.transform(
     new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );
 
--- a/src/share/vm/opto/lcm.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/lcm.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -158,7 +158,14 @@
           continue;             // Give up if offset is beyond page size
         // cannot reason about it; is probably not implicit null exception
       } else {
-        const TypePtr* tptr = base->bottom_type()->is_ptr();
+        const TypePtr* tptr;
+        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
+          // 32-bits narrow oop can be the base of address expressions
+          tptr = base->bottom_type()->make_ptr();
+        } else {
+          // only regular oops are expected here
+          tptr = base->bottom_type()->is_ptr();
+        }
         // Give up if offset is not a compile-time constant
         if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
           continue;
--- a/src/share/vm/opto/library_call.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/library_call.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -221,6 +221,7 @@
   bool inline_unsafe_CAS(BasicType type);
   bool inline_unsafe_ordered_store(BasicType type);
   bool inline_fp_conversions(vmIntrinsics::ID id);
+  bool inline_bitCount(vmIntrinsics::ID id);
   bool inline_reverseBytes(vmIntrinsics::ID id);
 };
 
@@ -314,6 +315,11 @@
     if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
     break;
 
+  case vmIntrinsics::_bitCount_i:
+  case vmIntrinsics::_bitCount_l:
+    if (!UsePopCountInstruction)  return NULL;
+    break;
+
  default:
     break;
   }
@@ -617,6 +623,10 @@
   case vmIntrinsics::_longBitsToDouble:
     return inline_fp_conversions(intrinsic_id());
 
+  case vmIntrinsics::_bitCount_i:
+  case vmIntrinsics::_bitCount_l:
+    return inline_bitCount(intrinsic_id());
+
   case vmIntrinsics::_reverseBytes_i:
   case vmIntrinsics::_reverseBytes_l:
     return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
@@ -1714,6 +1724,27 @@
   }
 }
 
+//----------------------------inline_bitCount_int/long-----------------------
+// inline int Integer.bitCount(int)
+// inline int Long.bitCount(long)
+bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
+  if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
+  if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
+  _sp += arg_size();  // restore stack pointer
+  switch (id) {
+  case vmIntrinsics::_bitCount_i:
+    push(_gvn.transform(new (C, 2) PopCountINode(pop())));
+    break;
+  case vmIntrinsics::_bitCount_l:
+    push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair())));
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return true;
+}
+
 //----------------------------inline_reverseBytes_int/long-------------------
 // inline Integer.reverseBytes(int)
 // inline Long.reverseBytes(long)
--- a/src/share/vm/opto/matcher.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/opto/matcher.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1481,8 +1481,13 @@
       const Type* mach_at = mach->adr_type();
       // DecodeN node consumed by an address may have different type
       // then its input. Don't compare types for such case.
-      if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
-          m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
+      if (m->adr_type() != mach_at &&
+          (m->in(MemNode::Address)->is_DecodeN() ||
+           m->in(MemNode::Address)->is_AddP() &&
+           m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN() ||
+           m->in(MemNode::Address)->is_AddP() &&
+           m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
+           m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeN())) {
         mach_at = m->adr_type();
       }
       if (m->adr_type() != mach_at) {
--- a/src/share/vm/runtime/arguments.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/runtime/arguments.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -1211,7 +1211,9 @@
     if (UseLargePages && UseCompressedOops) {
       // Cannot allocate guard pages for implicit checks in indexed addressing
       // mode, when large pages are specified on windows.
-      FLAG_SET_DEFAULT(UseImplicitNullCheckForNarrowOop, false);
+      // This flag could be switched ON if narrow oop base address is set to 0,
+      // see code in Universe::initialize_heap().
+      Universe::set_narrow_oop_use_implicit_null_checks(false);
     }
 #endif //  _WIN64
   } else {
--- a/src/share/vm/runtime/globals.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/runtime/globals.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -303,11 +303,14 @@
             "Use 32-bit object references in 64-bit VM. "                   \
             "lp64_product means flag is always constant in 32 bit VM")      \
                                                                             \
-  lp64_product(bool, CheckCompressedOops, trueInDebug,                      \
-            "generate checks in encoding/decoding code")                    \
-                                                                            \
-  product(bool, UseImplicitNullCheckForNarrowOop, true,                     \
-            "generate implicit null check in indexed addressing mode.")     \
+  notproduct(bool, CheckCompressedOops, true,                               \
+            "generate checks in encoding/decoding code in debug VM")        \
+                                                                            \
+  product_pd(uintx, HeapBaseMinAddress,                                     \
+            "OS specific low limit for heap base address")                  \
+                                                                            \
+  diagnostic(bool, PrintCompressedOopsMode, false,                          \
+            "Print compressed oops base address and encoding mode")         \
                                                                             \
   /* UseMembar is theoretically a temp flag used for memory barrier         \
    * removal testing.  It was supposed to be removed before FCS but has     \
@@ -2169,6 +2172,9 @@
   diagnostic(bool, PrintIntrinsics, false,                                  \
           "prints attempted and successful inlining of intrinsics")         \
                                                                             \
+  product(bool, UsePopCountInstruction, false,                              \
+          "Use population count instruction")                               \
+                                                                            \
   diagnostic(ccstrlist, DisableIntrinsic, "",                               \
           "do not expand intrinsics whose (internal) names appear here")    \
                                                                             \
--- a/src/share/vm/runtime/os.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/runtime/os.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -243,7 +243,7 @@
 
   static char*  non_memory_address_word();
   // reserve, commit and pin the entire memory region
-  static char*  reserve_memory_special(size_t size);
+  static char*  reserve_memory_special(size_t size, char* addr = NULL);
   static bool   release_memory_special(char* addr, size_t bytes);
   static bool   large_page_init();
   static size_t large_page_size();
--- a/src/share/vm/runtime/virtualspace.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/runtime/virtualspace.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -109,6 +109,7 @@
                              const size_t prefix_align,
                              const size_t suffix_size,
                              const size_t suffix_align,
+                             char* requested_address,
                              const size_t noaccess_prefix)
 {
   assert(prefix_size != 0, "sanity");
@@ -131,7 +132,7 @@
   const bool try_reserve_special = UseLargePages &&
     prefix_align == os::large_page_size();
   if (!os::can_commit_large_page_memory() && try_reserve_special) {
-    initialize(size, prefix_align, true, NULL, noaccess_prefix);
+    initialize(size, prefix_align, true, requested_address, noaccess_prefix);
     return;
   }
 
@@ -146,7 +147,13 @@
          noaccess_prefix == prefix_align, "noaccess prefix wrong");
 
   // Optimistically try to reserve the exact size needed.
-  char* addr = os::reserve_memory(size, NULL, prefix_align);
+  char* addr;
+  if (requested_address != 0) {
+    addr = os::attempt_reserve_memory_at(size,
+                                         requested_address-noaccess_prefix);
+  } else {
+    addr = os::reserve_memory(size, NULL, prefix_align);
+  }
   if (addr == NULL) return;
 
   // Check whether the result has the needed alignment (unlikely unless
@@ -206,12 +213,8 @@
   char* base = NULL;
 
   if (special) {
-    // It's not hard to implement reserve_memory_special() such that it can
-    // allocate at fixed address, but there seems no use of this feature
-    // for now, so it's not implemented.
-    assert(requested_address == NULL, "not implemented");
 
-    base = os::reserve_memory_special(size);
+    base = os::reserve_memory_special(size, requested_address);
 
     if (base != NULL) {
       // Check alignment constraints
@@ -372,7 +375,8 @@
                                      bool large, char* requested_address) :
   ReservedSpace(size, alignment, large,
                 requested_address,
-                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
+                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
+                 Universe::narrow_oop_use_implicit_null_checks()) ?
                   lcm(os::vm_page_size(), alignment) : 0) {
   // Only reserved space for the java heap should have a noaccess_prefix
   // if using compressed oops.
@@ -382,9 +386,12 @@
 ReservedHeapSpace::ReservedHeapSpace(const size_t prefix_size,
                                      const size_t prefix_align,
                                      const size_t suffix_size,
-                                     const size_t suffix_align) :
+                                     const size_t suffix_align,
+                                     char* requested_address) :
   ReservedSpace(prefix_size, prefix_align, suffix_size, suffix_align,
-                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
+                requested_address,
+                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
+                 Universe::narrow_oop_use_implicit_null_checks()) ?
                   lcm(os::vm_page_size(), prefix_align) : 0) {
   protect_noaccess_prefix(prefix_size+suffix_size);
 }
--- a/src/share/vm/runtime/virtualspace.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/runtime/virtualspace.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -73,7 +73,8 @@
                 const size_t noaccess_prefix = 0);
   ReservedSpace(const size_t prefix_size, const size_t prefix_align,
                 const size_t suffix_size, const size_t suffix_align,
-                const size_t noaccess_prefix);
+                char* requested_address,
+                const size_t noaccess_prefix = 0);
 
   // Accessors
   char*  base()      const { return _base;      }
@@ -121,7 +122,8 @@
   ReservedHeapSpace(size_t size, size_t forced_base_alignment,
                     bool large, char* requested_address);
   ReservedHeapSpace(const size_t prefix_size, const size_t prefix_align,
-                    const size_t suffix_size, const size_t suffix_align);
+                    const size_t suffix_size, const size_t suffix_align,
+                    char* requested_address);
 };
 
 // VirtualSpace is data structure for committing a previously reserved address range in smaller chunks.
--- a/src/share/vm/runtime/vmStructs.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/runtime/vmStructs.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -263,7 +263,9 @@
      static_field(Universe,                    _bootstrapping,                                bool)                                  \
      static_field(Universe,                    _fully_initialized,                            bool)                                  \
      static_field(Universe,                    _verify_count,                                 int)                                   \
-     static_field(Universe,                    _heap_base,                                    address)                                   \
+     static_field(Universe,                    _narrow_oop._base,                             address)                               \
+     static_field(Universe,                    _narrow_oop._shift,                            int)                                   \
+     static_field(Universe,                    _narrow_oop._use_implicit_null_checks,         bool)                                  \
                                                                                                                                      \
   /**********************************************************************************/                                               \
   /* Generation and Space hierarchies                                               */                                               \
--- a/src/share/vm/runtime/vm_version.cpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/runtime/vm_version.cpp	Thu Mar 19 09:13:24 2009 -0700
@@ -163,9 +163,11 @@
       #elif _MSC_VER == 1200
         #define HOTSPOT_BUILD_COMPILER "MS VC++ 6.0"
       #elif _MSC_VER == 1310
-        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1"
+        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1 (VS2003)"
       #elif _MSC_VER == 1400
-        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0"
+        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0 (VS2005)"
+      #elif _MSC_VER == 1500
+        #define HOTSPOT_BUILD_COMPILER "MS VC++ 9.0 (VS2008)"
       #else
         #define HOTSPOT_BUILD_COMPILER "unknown MS VC++:" XSTR(_MSC_VER)
       #endif
--- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Thu Mar 19 09:13:24 2009 -0700
@@ -162,7 +162,7 @@
 }
 
 // Visual Studio 2005 deprecates POSIX names - use ISO C++ names instead
-#if _MSC_VER >= 1400 && !defined(_WIN64)
+#if _MSC_VER >= 1400
 #define open _open
 #define close _close
 #define read  _read
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6378821/Test6378821.java	Thu Mar 19 09:13:24 2009 -0700
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6378821
+ * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821
+ */
+
+public class Test6378821 {
+    static final int[]  ia = new int[]  { 0x12345678 };
+    static final long[] la = new long[] { 0x12345678abcdefL };
+
+    public static void main(String [] args) {
+        // Resolve the class and the method.
+        Integer.bitCount(1);
+        Long.bitCount(1);
+
+        sub(ia[0]);
+        sub(la[0]);
+        sub(ia);
+        sub(la);
+    }
+
+    static void check(int i, int expected, int result) {
+        if (result != expected) {
+            throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected);
+        }
+    }
+
+    static void check(long l, int expected, int result) {
+        if (result != expected) {
+            throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected);
+        }
+    }
+
+    static void sub(int i)     { check(i,     fint(i),  fcomp(i) ); }
+    static void sub(int[] ia)  { check(ia[0], fint(ia), fcomp(ia)); }
+    static void sub(long l)    { check(l,     fint(l),  fcomp(l) ); }
+    static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); }
+
+    static int fint (int i)     { return Integer.bitCount(i); }
+    static int fcomp(int i)     { return Integer.bitCount(i); }
+
+    static int fint (int[] ia)  { return Integer.bitCount(ia[0]); }
+    static int fcomp(int[] ia)  { return Integer.bitCount(ia[0]); }
+
+    static int fint (long l)    { return Long.bitCount(l); }
+    static int fcomp(long l)    { return Long.bitCount(l); }
+
+    static int fint (long[] la) { return Long.bitCount(la[0]); }
+    static int fcomp(long[] la) { return Long.bitCount(la[0]); }
+}