changeset 16:5a930f8c514b

Bug 1488: Performance improvement for SSE2 instructions. reviewed-by: ykubota
author Yasumasa Suenaga <suenaga.yasumasa@lab.ntt.co.jp>
date Wed, 26 Jun 2013 17:44:08 +0900
parents 6d475889590a
children 5b0c1a7c3bf9 7e87901b8ddd
files agent/ChangeLog agent/src/bitMapMarker.cpp agent/src/jvmInfo.cpp agent/src/jvmInfo.hpp agent/src/oopUtil.cpp agent/src/snapShotContainer.cpp agent/src/snapShotContainer.hpp agent/src/util.hpp
diffstat 8 files changed, 106 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/agent/ChangeLog	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/ChangeLog	Wed Jun 26 17:44:08 2013 +0900
@@ -1,3 +1,7 @@
+2013-06-26  Yasumasa Suenaga  <suenaga.yasumasa@lab.ntt.co.jp>
+
+	* Bug 1488: Performance improvement for SSE2 instructions.
+
 2013-06-25  Yasumasa Suenaga  <suenaga.yasumasa@lab.ntt.co.jp>
 
 	* Bug 1428: Refactoring to get GCCause string
--- a/agent/src/bitMapMarker.cpp	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/src/bitMapMarker.cpp	Wed Jun 26 17:44:08 2013 +0900
@@ -299,8 +299,20 @@
   }
   else
 #endif // AVX
+#ifndef __amd64__
 #ifdef SSE2
-  if(likely(usableSSE2)){
+  if(unlikely(!usableSSE2))
+#endif // SSE2
+  {
+    /* Pure C. */
+    memset(this->bitmapAddr, 0, this->bitmapSize);
+  }
+#ifdef SSE2
+  else
+#endif // SSE2
+#endif // __amd64__
+#ifdef SSE2
+  {
     asm volatile("pxor %%xmm0, %%xmm0;"
                  ".align 16;"
                  "SSE2_LOOP:" /* memset 128 bytes per LOOP. */
@@ -321,12 +333,7 @@
                  : : "r" (this->bitmapSize), "r" (this->bitmapAddr)
                  : "cc", "%xmm0");
   }
-  else
 #endif // SSE2
-  {
-    /* Pure C. */
-    memset(this->bitmapAddr, 0, this->bitmapSize);
-  }
   
   /* Reset advise. */
   madvise(this->bitmapAddr, this->bitmapSize, MADV_RANDOM);
--- a/agent/src/jvmInfo.cpp	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/src/jvmInfo.cpp	Wed Jun 26 17:44:08 2013 +0900
@@ -87,18 +87,26 @@
       MAXSIZE_GC_CAUSE) != 0)) {
       throw "Couldn't allocate gc-cause memory!";
     }
-  } else
+  }
+  else
 #endif // AVX
+#ifndef __amd64__
 #ifdef SSE2
-  if (likely(usableSSE2)) {
+  if(unlikely(!usableSSE2))
+#endif // SSE2
+  {
+    gcCause = (char *)calloc(1, MAXSIZE_GC_CAUSE);
+  }
+#ifdef SSE2
+  else
+#endif // SSE2
+#endif // __amd64__
+#ifdef SSE2
+  {
     if (unlikely(posix_memalign((void **)&gcCause, 16, /* for movdqa. */
       MAXSIZE_GC_CAUSE) != 0)) {
       throw "Couldn't allocate gc-cause memory!";
     }
-  } else {
-#endif // SSE2
-    gcCause = (char *)calloc(1, MAXSIZE_GC_CAUSE);
-#ifdef SSE2
   }
 #endif // SSE2
   
--- a/agent/src/jvmInfo.hpp	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/src/jvmInfo.hpp	Wed Jun 26 17:44:08 2013 +0900
@@ -220,8 +220,19 @@
         }
         else
       #endif // SSE3.
+      #ifndef __amd64__
       #ifdef SSE2
-        if(likely(usableSSE2)){
+        if(!unlikely(!usableSSE2))
+      #endif // SSE2
+        {
+          __builtin_memcpy(this->gcCause, this->_gcCause, MAXSIZE_GC_CAUSE);
+        }
+      #ifdef SSE2
+        else
+      #endif // SSE2
+      #endif // __amd64__
+      #ifdef SSE2
+        {
           asm volatile(
           /* Strcpy with SSE2 (80 bytes). */
             SSE_MEMCPY_GCCAUSE("movdqu", "(%0)", "(%1)")
@@ -231,11 +242,7 @@
             : "cc", "%xmm0", "%xmm1"
           );
         }
-        else
       #endif // SSE2.
-        {
-          __builtin_memcpy(this->gcCause, this->_gcCause, MAXSIZE_GC_CAUSE);
-        }
 
       }
 
@@ -489,17 +496,24 @@
       }
       else
     #endif // AVX.
+    #ifndef __amd64__
     #ifdef SSE2
-      if(likely(usableSSE2)){
+      if(unlikely(!usableSSE2))
+    #endif // SSE2
+      {
+        memcpy(this->gcCause, UNKNOWN_GC_CAUSE, 16);
+      }
+    #ifdef SSE2
+      else
+    #endif // SSE2
+    #endif // __amd64__
+    #ifdef SSE2
+      {
         asm volatile("movdqa   (%1), %%xmm0;"
                      "movdqa %%xmm0,   (%0);"
           : : "a" (this->gcCause), "c" (UNKNOWN_GC_CAUSE) : "%xmm0");
       }
-      else
-    #endif // SSE2.
-      {
-        memcpy(this->gcCause, UNKNOWN_GC_CAUSE, 16);
-      }
+    #endif // SSE2
 
     }
 
--- a/agent/src/oopUtil.cpp	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/src/oopUtil.cpp	Wed Jun 26 17:44:08 2013 +0900
@@ -1068,13 +1068,15 @@
       /* Copy class name if class is instance class. */
       /* As like "instanceKlass::signature_name()".  */
       str[0] = 'L';
-      memcpy(&str[1], name, len);
+      //memcpy(&str[1], name, len);
+      __builtin_memcpy(&str[1], name, len);
       str[len + 1] = ';';
       str[len + 2] = '\0';
     } else {
       
       /* Copy class name if class is other. */
-      memcpy(str, name, len);
+      //memcpy(str, name, len);
+      __builtin_memcpy(str, name, len);
       str[len] = '\0';
     }
   }
--- a/agent/src/snapShotContainer.cpp	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/src/snapShotContainer.cpp	Wed Jun 26 17:44:08 2013 +0900
@@ -232,18 +232,25 @@
   }
   else
 #endif
+#ifndef __amd64__
 #ifdef SSE2
-  if(likely(usableSSE2)){
+  if(unlikely(!usableSSE2))
+#endif // SSE2
+  {
+    cur->count = 0;
+    cur->total_size = 0;
+  }
+#ifdef SSE2
+  else
+#endif // SSE2
+#endif // __amd64__
+#ifdef SSE2
+  {
     asm volatile("pxor %%xmm0, %%xmm0;"
                  "movdqa %%xmm0, (%0);"
                  : : "r" (cur) : "%xmm0");
   }
-  else
-#endif
-  {
-    cur->count = 0;
-    cur->total_size = 0;
-  }
+#endif // SSE2
 
   /* Set counter map. */
   counterMap[objData] = cur;
@@ -308,18 +315,25 @@
     }
     else
 #endif
+#ifndef __amd64__
 #ifdef SSE2
-    if(likely(usableSSE2)){
+    if(unlikely(!usableSSE2))
+#endif // SSE2
+    {
+      it->second->count = 0;
+      it->second->total_size = 0;
+    }
+#ifdef SSE2
+    else
+#endif // SSE2
+#endif // __amd64__
+#ifdef SSE2
+    {
       asm volatile("pxor %%xmm0, %%xmm0;"
                    "movdqa %%xmm0, (%0);"
                    : : "r" (it->second) : "%xmm0");
     }
-    else
-#endif
-    {
-      it->second->count = 0;
-      it->second->total_size = 0;
-    }
+#endif // SSE2
 
   }
 
--- a/agent/src/snapShotContainer.hpp	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/src/snapShotContainer.hpp	Wed Jun 26 17:44:08 2013 +0900
@@ -246,20 +246,27 @@
       }
       else
     #endif
+    #ifndef __amd64__
     #ifdef SSE2
-      if(likely(usableSSE2)){
+      if(unlikely(!usableSSE2))
+    #endif // SSE2
+      {
+        counter->count += operand->count;
+        counter->total_size += operand->total_size;
+      }
+    #ifdef SSE2
+      else
+    #endif // SSE2
+    #endif // __amd64__
+    #ifdef SSE2
+      {
         asm volatile("movdqa (%1), %%xmm0;"
                      "paddq (%0), %%xmm0;"
                      "movdqa %%xmm0, (%0);"
                      : : "r" (counter), "r" (operand)
                      : "cc", "%xmm0");
       }
-      else
-    #endif
-      {
-        counter->count += operand->count;
-        counter->total_size += operand->total_size;
-      }
+    #endif // SSE2
 
     }
 
--- a/agent/src/util.hpp	Tue Jun 25 19:53:25 2013 +0900
+++ b/agent/src/util.hpp	Wed Jun 26 17:44:08 2013 +0900
@@ -310,6 +310,10 @@
 
 /* CPU instruction set flag. */
 
+#ifdef __amd64__
+#define SSE2 1
+#endif
+
 /*!
  * \brief SSE2 instruction usable flag.
  */