changeset 642:c771b7f43bbf

6378821: bitCount() should use POPC on SPARC processors and AMD+10h Summary: bitCount() should use POPC on SPARC processors where POPC is implemented directly in hardware. Reviewed-by: kvn, never
author twisti
date Fri, 13 Mar 2009 11:35:17 -0700
parents 660978a2a31a
children c517646eef23
files src/cpu/sparc/vm/sparc.ad src/cpu/sparc/vm/vm_version_sparc.cpp src/cpu/sparc/vm/vm_version_sparc.hpp src/cpu/x86/vm/assembler_x86.cpp src/cpu/x86/vm/assembler_x86.hpp src/cpu/x86/vm/vm_version_x86.cpp src/cpu/x86/vm/vm_version_x86.hpp src/cpu/x86/vm/x86_32.ad src/cpu/x86/vm/x86_64.ad src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/opto/classes.hpp src/share/vm/opto/connode.hpp src/share/vm/opto/library_call.cpp src/share/vm/runtime/globals.hpp test/compiler/6378821/Test6378821.java
diffstat 16 files changed, 374 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/sparc.ad	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Mar 13 11:35:17 2009 -0700
@@ -9015,6 +9015,33 @@
   ins_pipe(long_memory_op);
 %}
 
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(iRegI dst, iRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "POPC   $src, $dst" %}
+  ins_encode %{
+    __ popc($src$$Register, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegI dst, iRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  format %{ "POPC   $src, $dst" %}
+  ins_encode %{
+    __ popc($src$$Register, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 // ============================================================================
 //------------Bytes reverse--------------------------------------------------
 
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Mar 13 11:35:17 2009 -0700
@@ -92,10 +92,18 @@
 #endif
   }
 
+  // Use hardware population count instruction if available.
+  if (has_hardware_popc()) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      UsePopCountInstruction = true;
+    }
+  }
+
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v8() ? ", has_v8" : ""),
                (has_v9() ? ", has_v9" : ""),
+               (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", has_vis1" : ""),
                (has_vis2() ? ", has_vis2" : ""),
                (is_ultra3() ? ", is_ultra3" : ""),
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Mar 13 11:35:17 2009 -0700
@@ -29,10 +29,11 @@
     hardware_mul32     = 1,
     hardware_div32     = 2,
     hardware_fsmuld    = 3,
-    v9_instructions    = 4,
-    vis1_instructions  = 5,
-    vis2_instructions  = 6,
-    sun4v_instructions = 7
+    hardware_popc      = 4,
+    v9_instructions    = 5,
+    vis1_instructions  = 6,
+    vis2_instructions  = 7,
+    sun4v_instructions = 8
   };
 
   enum Feature_Flag_Set {
@@ -43,6 +44,7 @@
     hardware_mul32_m    = 1 << hardware_mul32,
     hardware_div32_m    = 1 << hardware_div32,
     hardware_fsmuld_m   = 1 << hardware_fsmuld,
+    hardware_popc_m     = 1 << hardware_popc,
     v9_instructions_m   = 1 << v9_instructions,
     vis1_instructions_m = 1 << vis1_instructions,
     vis2_instructions_m = 1 << vis2_instructions,
@@ -81,6 +83,7 @@
   static bool has_hardware_mul32()      { return (_features & hardware_mul32_m) != 0; }
   static bool has_hardware_div32()      { return (_features & hardware_div32_m) != 0; }
   static bool has_hardware_fsmuld()     { return (_features & hardware_fsmuld_m) != 0; }
+  static bool has_hardware_popc()       { return (_features & hardware_popc_m) != 0; }
   static bool has_vis1()                { return (_features & vis1_instructions_m) != 0; }
   static bool has_vis2()                { return (_features & vis2_instructions_m) != 0; }
 
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Mar 13 11:35:17 2009 -0700
@@ -2193,6 +2193,25 @@
   emit_byte(0x58 | encode);
 }
 
+void Assembler::popcntl(Register dst, Address src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefix(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_operand(dst, src);
+}
+
+void Assembler::popcntl(Register dst, Register src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  emit_byte(0xF3);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::popf() {
   emit_byte(0x9D);
 }
@@ -4080,6 +4099,25 @@
   addq(rsp, 16 * wordSize);
 }
 
+void Assembler::popcntq(Register dst, Address src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  InstructionMark im(this);
+  emit_byte(0xF3);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_operand(dst, src);
+}
+
+void Assembler::popcntq(Register dst, Register src) {
+  assert(VM_Version::supports_popcnt(), "must support");
+  emit_byte(0xF3);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB8);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::popq(Address dst) {
   InstructionMark im(this);
   prefixq(dst);
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Mar 13 11:35:17 2009 -0700
@@ -1224,6 +1224,14 @@
   void popq(Address dst);
 #endif
 
+  void popcntl(Register dst, Address src);
+  void popcntl(Register dst, Register src);
+
+#ifdef _LP64
+  void popcntq(Register dst, Address src);
+  void popcntq(Register dst, Register src);
+#endif
+
   // Prefetches (SSE, SSE2, 3DNOW only)
 
   void prefetchnta(Address src);
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Mar 13 11:35:17 2009 -0700
@@ -284,7 +284,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -297,6 +297,7 @@
                (supports_ssse3()? ", ssse3": ""),
                (supports_sse4_1() ? ", sse4.1" : ""),
                (supports_sse4_2() ? ", sse4.2" : ""),
+               (supports_popcnt() ? ", popcnt" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow()   ? ", 3dnow"  : ""),
                (supports_3dnow2()  ? ", 3dnowext" : ""),
@@ -410,6 +411,13 @@
     }
   }
 
+  // Use population count instruction if available.
+  if (supports_popcnt()) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      UsePopCountInstruction = true;
+    }
+  }
+
   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Fri Mar 13 11:35:17 2009 -0700
@@ -70,7 +70,9 @@
                dca      : 1,
                sse4_1   : 1,
                sse4_2   : 1,
-                        : 11;
+                        : 2,
+               popcnt   : 1,
+                        : 8;
     } bits;
   };
 
@@ -179,7 +181,8 @@
      CPU_SSSE3  = (1 << 9),
      CPU_SSE4A  = (1 << 10),
      CPU_SSE4_1 = (1 << 11),
-     CPU_SSE4_2 = (1 << 12)
+     CPU_SSE4_2 = (1 << 12),
+     CPU_POPCNT = (1 << 13)
    } cpuFeatureFlags;
 
   // cpuid information block.  All info derived from executing cpuid with
@@ -290,6 +293,8 @@
       result |= CPU_SSE4_1;
     if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
       result |= CPU_SSE4_2;
+    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
+      result |= CPU_POPCNT;
     return result;
   }
 
@@ -379,6 +384,7 @@
   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
+  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
   //
   // AMD features
   //
--- a/src/cpu/x86/vm/x86_32.ad	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Mar 13 11:35:17 2009 -0700
@@ -1483,16 +1483,20 @@
   // main source block for now.  In future, we can generalize this by
   // adding a syntax that specifies the sizes of fields in an order,
   // so that the adlc can build the emit functions automagically
-  enc_class OpcP %{             // Emit opcode
-    emit_opcode(cbuf,$primary);
-  %}
-
-  enc_class OpcS %{             // Emit opcode
-    emit_opcode(cbuf,$secondary);
-  %}
-
-  enc_class Opcode(immI d8 ) %{ // Emit opcode
-    emit_opcode(cbuf,$d8$$constant);
+
+  // Emit primary opcode
+  enc_class OpcP %{
+    emit_opcode(cbuf, $primary);
+  %}
+
+  // Emit secondary opcode
+  enc_class OpcS %{
+    emit_opcode(cbuf, $secondary);
+  %}
+
+  // Emit opcode directly
+  enc_class Opcode(immI d8) %{
+    emit_opcode(cbuf, $d8$$constant);
   %}
 
   enc_class SizePrefix %{
@@ -6387,6 +6391,67 @@
 %}
 
 
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(eRegI dst, eRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "POPCNT $dst, $src" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct popCountI_mem(eRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+
+  format %{ "POPCNT $dst, $mem" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+  effect(KILL cr, TEMP tmp, TEMP dst);
+
+  format %{ "POPCNT $dst, $src.lo\n\t"
+            "POPCNT $tmp, $src.hi\n\t"
+            "ADD    $dst, $tmp" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
+    __ addl($dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+  effect(KILL cr, TEMP tmp, TEMP dst);
+
+  format %{ "POPCNT $dst, $mem\n\t"
+            "POPCNT $tmp, $mem+4\n\t"
+            "ADD    $dst, $tmp" %}
+  ins_encode %{
+    //__ popcntl($dst$$Register, $mem$$Address$$first);
+    //__ popcntl($tmp$$Register, $mem$$Address$$second);
+    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
+    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
+    __ addl($dst$$Register, $tmp$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 //----------Load/Store/Move Instructions---------------------------------------
 //----------Load Instructions--------------------------------------------------
 // Load Byte (8bit signed)
--- a/src/cpu/x86/vm/x86_64.ad	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Mar 13 11:35:17 2009 -0700
@@ -7429,6 +7429,56 @@
   ins_pipe( ialu_mem_reg );
 %}
 
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(rRegI dst, rRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct popCountI_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(rRegI dst, rRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 //----------MemBar Instructions-----------------------------------------------
 // Memory barrier flavors
 
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Mar 13 11:35:17 2009 -0700
@@ -85,6 +85,7 @@
     if (av & AV_SPARC_DIV32)  features |= hardware_div32_m;
     if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
     if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m;
+    if (av & AV_SPARC_POPC)   features |= hardware_popc_m;
     if (av & AV_SPARC_VIS)    features |= vis1_instructions_m;
     if (av & AV_SPARC_VIS2)   features |= vis2_instructions_m;
   } else {
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Mar 13 11:35:17 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -284,6 +284,7 @@
   template(value_name,                                "value")                                    \
   template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
   template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
+  template(bitCount_name,                             "bitCount")                                 \
                                                                                                   \
   /* non-intrinsic name/signature pairs: */                                                       \
   template(register_method_name,                      "register")                                 \
@@ -304,6 +305,7 @@
   template(double_long_signature,                     "(D)J")                                     \
   template(double_double_signature,                   "(D)D")                                     \
   template(int_float_signature,                       "(I)F")                                     \
+  template(long_int_signature,                        "(J)I")                                     \
   template(long_long_signature,                       "(J)J")                                     \
   template(long_double_signature,                     "(J)D")                                     \
   template(byte_signature,                            "B")                                        \
@@ -507,6 +509,10 @@
    do_name(     doubleToLongBits_name,                           "doubleToLongBits")                                    \
   do_intrinsic(_longBitsToDouble,         java_lang_Double,       longBitsToDouble_name,    long_double_signature, F_S) \
    do_name(     longBitsToDouble_name,                           "longBitsToDouble")                                    \
+                                                                                                                        \
+  do_intrinsic(_bitCount_i,               java_lang_Integer,      bitCount_name,            int_int_signature,   F_S)   \
+  do_intrinsic(_bitCount_l,               java_lang_Long,         bitCount_name,            long_int_signature,  F_S)   \
+                                                                                                                        \
   do_intrinsic(_reverseBytes_i,           java_lang_Integer,      reverseBytes_name,        int_int_signature,   F_S)   \
    do_name(     reverseBytes_name,                               "reverseBytes")                                        \
   do_intrinsic(_reverseBytes_l,           java_lang_Long,         reverseBytes_name,        long_long_signature, F_S)   \
@@ -696,7 +702,6 @@
   do_signature(putShort_raw_signature,    "(JS)V")                                                                      \
   do_signature(getChar_raw_signature,     "(J)C")                                                                       \
   do_signature(putChar_raw_signature,     "(JC)V")                                                                      \
-  do_signature(getInt_raw_signature,      "(J)I")                                                                       \
   do_signature(putInt_raw_signature,      "(JI)V")                                                                      \
       do_alias(getLong_raw_signature,    /*(J)J*/ long_long_signature)                                                  \
       do_alias(putLong_raw_signature,    /*(JJ)V*/ long_long_void_signature)                                            \
@@ -713,7 +718,7 @@
   do_intrinsic(_getByte_raw,              sun_misc_Unsafe,        getByte_name, getByte_raw_signature,           F_RN)  \
   do_intrinsic(_getShort_raw,             sun_misc_Unsafe,        getShort_name, getShort_raw_signature,         F_RN)  \
   do_intrinsic(_getChar_raw,              sun_misc_Unsafe,        getChar_name, getChar_raw_signature,           F_RN)  \
-  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, getInt_raw_signature,             F_RN)  \
+  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, long_int_signature,               F_RN)  \
   do_intrinsic(_getLong_raw,              sun_misc_Unsafe,        getLong_name, getLong_raw_signature,           F_RN)  \
   do_intrinsic(_getFloat_raw,             sun_misc_Unsafe,        getFloat_name, getFloat_raw_signature,         F_RN)  \
   do_intrinsic(_getDouble_raw,            sun_misc_Unsafe,        getDouble_name, getDouble_raw_signature,       F_RN)  \
--- a/src/share/vm/opto/classes.hpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/share/vm/opto/classes.hpp	Fri Mar 13 11:35:17 2009 -0700
@@ -184,6 +184,8 @@
 macro(Parm)
 macro(PartialSubtypeCheck)
 macro(Phi)
+macro(PopCountI)
+macro(PopCountL)
 macro(PowD)
 macro(PrefetchRead)
 macro(PrefetchWrite)
--- a/src/share/vm/opto/connode.hpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/share/vm/opto/connode.hpp	Fri Mar 13 11:35:17 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -635,3 +635,23 @@
   virtual uint ideal_reg() const { return Op_RegL; }
   virtual const Type* Value( PhaseTransform *phase ) const;
 };
+
+//---------- PopCountINode -----------------------------------------------------
+// Population count (bit count) of an integer.
+class PopCountINode : public Node {
+public:
+  PopCountINode(Node* in1) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//---------- PopCountLNode -----------------------------------------------------
+// Population count (bit count) of a long.
+class PopCountLNode : public Node {
+public:
+  PopCountLNode(Node* in1) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type* bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
--- a/src/share/vm/opto/library_call.cpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/share/vm/opto/library_call.cpp	Fri Mar 13 11:35:17 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -221,6 +221,7 @@
   bool inline_unsafe_CAS(BasicType type);
   bool inline_unsafe_ordered_store(BasicType type);
   bool inline_fp_conversions(vmIntrinsics::ID id);
+  bool inline_bitCount(vmIntrinsics::ID id);
   bool inline_reverseBytes(vmIntrinsics::ID id);
 };
 
@@ -314,6 +315,11 @@
     if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
     break;
 
+  case vmIntrinsics::_bitCount_i:
+  case vmIntrinsics::_bitCount_l:
+    if (!UsePopCountInstruction)  return NULL;
+    break;
+
  default:
     break;
   }
@@ -617,6 +623,10 @@
   case vmIntrinsics::_longBitsToDouble:
     return inline_fp_conversions(intrinsic_id());
 
+  case vmIntrinsics::_bitCount_i:
+  case vmIntrinsics::_bitCount_l:
+    return inline_bitCount(intrinsic_id());
+
   case vmIntrinsics::_reverseBytes_i:
   case vmIntrinsics::_reverseBytes_l:
     return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
@@ -1714,6 +1724,27 @@
   }
 }
 
+//----------------------------inline_bitCount_int/long-----------------------
+// inline int Integer.bitCount(int)
+// inline int Long.bitCount(long)
+bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
+  if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
+  if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
+  _sp += arg_size();  // restore stack pointer
+  switch (id) {
+  case vmIntrinsics::_bitCount_i:
+    push(_gvn.transform(new (C, 2) PopCountINode(pop())));
+    break;
+  case vmIntrinsics::_bitCount_l:
+    push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair())));
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return true;
+}
+
 //----------------------------inline_reverseBytes_int/long-------------------
 // inline Integer.reverseBytes(int)
 // inline Long.reverseBytes(long)
--- a/src/share/vm/runtime/globals.hpp	Thu Mar 12 10:37:46 2009 -0700
+++ b/src/share/vm/runtime/globals.hpp	Fri Mar 13 11:35:17 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2172,6 +2172,9 @@
   diagnostic(bool, PrintIntrinsics, false,                                  \
           "prints attempted and successful inlining of intrinsics")         \
                                                                             \
+  product(bool, UsePopCountInstruction, false,                              \
+          "Use population count instruction")                               \
+                                                                            \
   diagnostic(ccstrlist, DisableIntrinsic, "",                               \
           "do not expand intrinsics whose (internal) names appear here")    \
                                                                             \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6378821/Test6378821.java	Fri Mar 13 11:35:17 2009 -0700
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6378821
+ * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821
+ */
+
+public class Test6378821 {
+    static final int[]  ia = new int[]  { 0x12345678 };
+    static final long[] la = new long[] { 0x12345678abcdefL };
+
+    public static void main(String [] args) {
+        // Resolve the class and the method.
+        Integer.bitCount(1);
+        Long.bitCount(1);
+
+        sub(ia[0]);
+        sub(la[0]);
+        sub(ia);
+        sub(la);
+    }
+
+    static void check(int i, int expected, int result) {
+        if (result != expected) {
+            throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected);
+        }
+    }
+
+    static void check(long l, int expected, int result) {
+        if (result != expected) {
+            throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected);
+        }
+    }
+
+    static void sub(int i)     { check(i,     fint(i),  fcomp(i) ); }
+    static void sub(int[] ia)  { check(ia[0], fint(ia), fcomp(ia)); }
+    static void sub(long l)    { check(l,     fint(l),  fcomp(l) ); }
+    static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); }
+
+    static int fint (int i)     { return Integer.bitCount(i); }
+    static int fcomp(int i)     { return Integer.bitCount(i); }
+
+    static int fint (int[] ia)  { return Integer.bitCount(ia[0]); }
+    static int fcomp(int[] ia)  { return Integer.bitCount(ia[0]); }
+
+    static int fint (long l)    { return Long.bitCount(l); }
+    static int fcomp(long l)    { return Long.bitCount(l); }
+
+    static int fint (long[] la) { return Long.bitCount(la[0]); }
+    static int fcomp(long[] la) { return Long.bitCount(la[0]); }
+}