changeset 7145:917873d2983d

8040140: System.nanoTime() is slow and non-monotonic on OS X Reviewed-by: sspitsyn, shade, dholmes, acorn
author sla
date Fri, 25 Apr 2014 07:40:33 +0200
parents a062c3691003
children 49961f279e24
files src/os/bsd/vm/os_bsd.cpp src/os/bsd/vm/os_bsd.hpp src/os/solaris/vm/os_solaris.cpp src/share/vm/runtime/os.hpp
diffstat 4 files changed, 76 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/src/os/bsd/vm/os_bsd.cpp	Mon May 05 15:10:43 2014 +0200
+++ b/src/os/bsd/vm/os_bsd.cpp	Fri Apr 25 07:40:33 2014 +0200
@@ -127,8 +127,12 @@
 // global variables
 julong os::Bsd::_physical_memory = 0;
 
-
+#ifdef __APPLE__
+mach_timebase_info_data_t os::Bsd::_timebase_info = {0, 0};
+volatile uint64_t         os::Bsd::_max_abstime   = 0;
+#else
 int (*os::Bsd::_clock_gettime)(clockid_t, struct timespec *) = NULL;
+#endif
 pthread_t os::Bsd::_main_thread;
 int os::Bsd::_page_size = -1;
 
@@ -986,13 +990,15 @@
   return jlong(time.tv_sec) * 1000  +  jlong(time.tv_usec / 1000);
 }
 
+#ifndef __APPLE__
 #ifndef CLOCK_MONOTONIC
 #define CLOCK_MONOTONIC (1)
 #endif
+#endif
 
 #ifdef __APPLE__
 void os::Bsd::clock_init() {
-        // XXXDARWIN: Investigate replacement monotonic clock
+  mach_timebase_info(&_timebase_info);
 }
 #else
 void os::Bsd::clock_init() {
@@ -1007,10 +1013,38 @@
 #endif
 
 
+#ifdef __APPLE__
+
+jlong os::javaTimeNanos() {
+    const uint64_t tm = mach_absolute_time();
+    const uint64_t now = (tm * Bsd::_timebase_info.numer) / Bsd::_timebase_info.denom;
+    const uint64_t prev = Bsd::_max_abstime;
+    if (now <= prev) {
+      return prev;   // same or retrograde time;
+    }
+    const uint64_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&Bsd::_max_abstime, prev);
+    assert(obsv >= prev, "invariant");   // Monotonicity
+    // If the CAS succeeded then we're done and return "now".
+    // If the CAS failed and the observed value "obsv" is >= now then
+    // we should return "obsv".  If the CAS failed and now > obsv > prv then
+    // some other thread raced this thread and installed a new value, in which case
+    // we could either (a) retry the entire operation, (b) retry trying to install now
+    // or (c) just return obsv.  We use (c).   No loop is required although in some cases
+    // we might discard a higher "now" value in deference to a slightly lower but freshly
+    // installed obsv value.   That's entirely benign -- it admits no new orderings compared
+    // to (a) or (b) -- and greatly reduces coherence traffic.
+    // We might also condition (c) on the magnitude of the delta between obsv and now.
+    // Avoiding excessive CAS operations to hot RW locations is critical.
+    // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
+    return (prev == obsv) ? now : obsv;
+}
+
+#else // __APPLE__
+
 jlong os::javaTimeNanos() {
   if (Bsd::supports_monotonic_clock()) {
     struct timespec tp;
-    int status = Bsd::clock_gettime(CLOCK_MONOTONIC, &tp);
+    int status = Bsd::_clock_gettime(CLOCK_MONOTONIC, &tp);
     assert(status == 0, "gettime error");
     jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
     return result;
@@ -1023,6 +1057,8 @@
   }
 }
 
+#endif // __APPLE__
+
 void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
   if (Bsd::supports_monotonic_clock()) {
     info_ptr->max_value = ALL_64_BITS;
--- a/src/os/bsd/vm/os_bsd.hpp	Mon May 05 15:10:43 2014 +0200
+++ b/src/os/bsd/vm/os_bsd.hpp	Fri Apr 25 07:40:33 2014 +0200
@@ -58,7 +58,13 @@
   // For signal flags diagnostics
   static int sigflags[MAXSIGNUM];
 
+#ifdef __APPLE__
+  // mach_absolute_time
+  static mach_timebase_info_data_t _timebase_info;
+  static volatile uint64_t         _max_abstime;
+#else
   static int (*_clock_gettime)(clockid_t, struct timespec *);
+#endif
 
   static GrowableArray<int>* _cpu_to_node;
 
@@ -135,11 +141,11 @@
   static void clock_init(void);
 
   static inline bool supports_monotonic_clock() {
+#ifdef __APPLE__
+    return true;
+#else
     return _clock_gettime != NULL;
-  }
-
-  static int clock_gettime(clockid_t clock_id, struct timespec *tp) {
-    return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
+#endif
   }
 
   // Stack repair handling
--- a/src/os/solaris/vm/os_solaris.cpp	Mon May 05 15:10:43 2014 +0200
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Apr 25 07:40:33 2014 +0200
@@ -415,11 +415,7 @@
 
 static hrtime_t first_hrtime = 0;
 static const hrtime_t hrtime_hz = 1000*1000*1000;
-const int LOCK_BUSY = 1;
-const int LOCK_FREE = 0;
-const int LOCK_INVALID = -1;
 static volatile hrtime_t max_hrtime = 0;
-static volatile int max_hrtime_lock = LOCK_FREE;     // Update counter with LSB as lock-in-progress
 
 
 void os::Solaris::initialize_system_info() {
@@ -1534,58 +1530,31 @@
 }
 
 
-// gethrtime can move backwards if read from one cpu and then a different cpu
-// getTimeNanos is guaranteed to not move backward on Solaris
-// local spinloop created as faster for a CAS on an int than
-// a CAS on a 64bit jlong. Also Atomic::cmpxchg for jlong is not
-// supported on sparc v8 or pre supports_cx8 intel boxes.
-// oldgetTimeNanos for systems which do not support CAS on 64bit jlong
-// i.e. sparc v8 and pre supports_cx8 (i486) intel boxes
-inline hrtime_t oldgetTimeNanos() {
-  int gotlock = LOCK_INVALID;
-  hrtime_t newtime = gethrtime();
-
-  for (;;) {
-// grab lock for max_hrtime
-    int curlock = max_hrtime_lock;
-    if (curlock & LOCK_BUSY)  continue;
-    if (gotlock = Atomic::cmpxchg(LOCK_BUSY, &max_hrtime_lock, LOCK_FREE) != LOCK_FREE) continue;
-    if (newtime > max_hrtime) {
-      max_hrtime = newtime;
-    } else {
-      newtime = max_hrtime;
-    }
-    // release lock
-    max_hrtime_lock = LOCK_FREE;
-    return newtime;
-  }
-}
-// gethrtime can move backwards if read from one cpu and then a different cpu
-// getTimeNanos is guaranteed to not move backward on Solaris
+// gethrtime() should be monotonic according to the documentation,
+// but some virtualized platforms are known to break this guarantee.
+// getTimeNanos() must be guaranteed not to move backwards, so we
+// are forced to add a check here.
 inline hrtime_t getTimeNanos() {
-  if (VM_Version::supports_cx8()) {
-    const hrtime_t now = gethrtime();
-    // Use atomic long load since 32-bit x86 uses 2 registers to keep long.
-    const hrtime_t prev = Atomic::load((volatile jlong*)&max_hrtime);
-    if (now <= prev)  return prev;   // same or retrograde time;
-    const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
-    assert(obsv >= prev, "invariant");   // Monotonicity
-    // If the CAS succeeded then we're done and return "now".
-    // If the CAS failed and the observed value "obs" is >= now then
-    // we should return "obs".  If the CAS failed and now > obs > prv then
-    // some other thread raced this thread and installed a new value, in which case
-    // we could either (a) retry the entire operation, (b) retry trying to install now
-    // or (c) just return obs.  We use (c).   No loop is required although in some cases
-    // we might discard a higher "now" value in deference to a slightly lower but freshly
-    // installed obs value.   That's entirely benign -- it admits no new orderings compared
-    // to (a) or (b) -- and greatly reduces coherence traffic.
-    // We might also condition (c) on the magnitude of the delta between obs and now.
-    // Avoiding excessive CAS operations to hot RW locations is critical.
-    // See http://blogs.sun.com/dave/entry/cas_and_cache_trivia_invalidate
-    return (prev == obsv) ? now : obsv ;
-  } else {
-    return oldgetTimeNanos();
-  }
+  const hrtime_t now = gethrtime();
+  const hrtime_t prev = max_hrtime;
+  if (now <= prev) {
+    return prev;   // same or retrograde time;
+  }
+  const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
+  assert(obsv >= prev, "invariant");   // Monotonicity
+  // If the CAS succeeded then we're done and return "now".
+  // If the CAS failed and the observed value "obsv" is >= now then
+  // we should return "obsv".  If the CAS failed and now > obsv > prv then
+  // some other thread raced this thread and installed a new value, in which case
+  // we could either (a) retry the entire operation, (b) retry trying to install now
+  // or (c) just return obsv.  We use (c).   No loop is required although in some cases
+  // we might discard a higher "now" value in deference to a slightly lower but freshly
+  // installed obsv value.   That's entirely benign -- it admits no new orderings compared
+  // to (a) or (b) -- and greatly reduces coherence traffic.
+  // We might also condition (c) on the magnitude of the delta between obsv and now.
+  // Avoiding excessive CAS operations to hot RW locations is critical.
+  // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
+  return (prev == obsv) ? now : obsv;
 }
 
 // Time since start-up in seconds to a fine granularity.
--- a/src/share/vm/runtime/os.hpp	Mon May 05 15:10:43 2014 +0200
+++ b/src/share/vm/runtime/os.hpp	Fri Apr 25 07:40:33 2014 +0200
@@ -48,6 +48,9 @@
 #ifdef TARGET_OS_FAMILY_bsd
 # include "jvm_bsd.h"
 # include <setjmp.h>
+# ifdef __APPLE__
+#  include <mach/mach_time.h>
+# endif
 #endif
 
 class AgentLibrary;