Bug 1243241 - Make RDTSC monotonic;r?jandem draft
authorDavid Rajchenbach-Teller <dteller@mozilla.com>
Thu, 28 Jan 2016 14:33:30 +0100
changeset 326882 3e8a6a1265b9a708698e6628f55d363971158e99
parent 326148 211a4c710fb6af2cad10102c4cabc7cb525998b8
child 513636 c1ce3609f273e3f3841c80837aa17bcca05be92f
push id10171
push userdteller@mozilla.com
push dateThu, 28 Jan 2016 13:38:08 +0000
reviewersjandem
bugs1243241
milestone47.0a1
Bug 1243241 - Make RDTSC monotonic;r?jandem We assume that the total number of cycles spent executing JS code during an event is equal to the number of cycles in the "top group", i.e. a group to which everything belongs. While this is true in theory, RDTSC is actually non-monotonic, so we can end up with fewer cycles reported for the top group than for some groups whose execution was actually shorter. When we end up in this situation, groups with more cycles than the top group will be reported as using more CPU than was actually used. This patch fixes the situation by proxying RDTSC behind a trivial API that ensures that values are monotonic during each tick.
js/src/vm/Stopwatch.cpp
js/src/vm/Stopwatch.h
--- a/js/src/vm/Stopwatch.cpp
+++ b/js/src/vm/Stopwatch.cpp
@@ -35,16 +35,24 @@ PerformanceMonitoring::addRecentGroup(Pe
 void
 PerformanceMonitoring::reset()
 {
     // All ongoing measures are dependent on the current iteration#.
     // By incrementing it, we mark all data as stale. Stale data will
     // be overwritten progressively during the execution.
     ++iteration_;
     recentGroups_.clear();
+
+    // Every so often, we will be rescheduled to another CPU. If this
+    // happens, we may end up with an entirely unsynchronized
+    // timestamp counter. If we do not reset
+    // `highestTimestampCounter_`, we could end up ignoring entirely
+    // valid sets of measures just because we are on a CPU that has a
+    // lower RDTSC.
+    highestTimestampCounter_ = 0;
 }
 
 void
 PerformanceMonitoring::start()
 {
     if (!isMonitoringJank_)
         return;
 
@@ -153,16 +161,29 @@ PerformanceMonitoring::commit()
 
     // Reset immediately, to make sure that we're not hit by the end
     // of a nested event loop (which would cause `commit` to be called
     // twice in succession).
     reset();
     return success;
 }
 
+uint64_t
+PerformanceMonitoring::monotonicReadTimestampCounter()
+{
+#if defined(MOZ_HAVE_RDTSC)
+    const uint64_t hardware = ReadTimestampCounter();
+    if (highestTimestampCounter_ < hardware)
+        highestTimestampCounter_ = hardware;
+    return highestTimestampCounter_;
+#else
+    return 0;
+#endif // defined(MOZ_HAVE_RDTSC)
+}
+
 void
 PerformanceMonitoring::dispose(JSRuntime* rt)
 {
     reset();
     for (CompartmentsIter c(rt, SkipAtoms); !c.done(); c.next()) {
         c->performanceMonitoring.unlink();
     }
 }
@@ -267,17 +288,17 @@ AutoStopwatch::enter()
     JSRuntime* runtime = cx_->runtime();
 
     if (runtime->performanceMonitoring.isMonitoringCPOW()) {
         CPOWTimeStart_ = runtime->performanceMonitoring.totalCPOWTime;
         isMonitoringCPOW_ = true;
     }
 
     if (runtime->performanceMonitoring.isMonitoringJank()) {
-        cyclesStart_ = this->getCycles();
+        cyclesStart_ = this->getCycles(runtime);
         cpuStart_ = this->getCPU();
         isMonitoringJank_ = true;
     }
 }
 
 bool
 AutoStopwatch::exit()
 {
@@ -290,18 +311,18 @@ AutoStopwatch::exit()
         // If possible, discard results when we don't end on the
         // same CPU as we started.  Note that we can be
         // rescheduled to another CPU beween `getCycles()` and
         // `getCPU()`.  We hope that this will happen rarely
         // enough that the impact on our statistics will remain
         // limited.
         const cpuid_t cpuEnd = this->getCPU();
         if (isSameCPU(cpuStart_, cpuEnd)) {
-            const uint64_t cyclesEnd = getCycles();
-            cyclesDelta = getDelta(cyclesEnd, cyclesStart_);
+            const uint64_t cyclesEnd = getCycles(runtime);
+            cyclesDelta = cyclesEnd - cyclesStart_; // Always >= 0 by definition of `getCycles`.
         }
 #if WINVER >= 0x600
         updateTelemetry(cpuStart_, cpuEnd);
 #elif defined(__linux__)
         updateTelemetry(cpuStart_, cpuEnd);
 #endif // WINVER >= 0x600 || _linux__
     }
 
@@ -377,23 +398,19 @@ uint64_t
 AutoStopwatch::getDelta(const uint64_t end, const uint64_t start) const
 {
     if (start >= end)
       return 0;
     return end - start;
 }
 
 uint64_t
-AutoStopwatch::getCycles() const
+AutoStopwatch::getCycles(JSRuntime* runtime) const
 {
-#if defined(MOZ_HAVE_RDTSC)
-    return ReadTimestampCounter();
-#else
-    return 0;
-#endif // defined(MOZ_HAVE_RDTSC)
+    return runtime->performanceMonitoring.monotonicReadTimestampCounter();
 }
 
 cpuid_t inline
 AutoStopwatch::getCPU() const
 {
 #if defined(XP_WIN) && WINVER >= _WIN32_WINNT_VISTA
     PROCESSOR_NUMBER proc;
     GetCurrentProcessorNumberEx(&proc);
--- a/js/src/vm/Stopwatch.h
+++ b/js/src/vm/Stopwatch.h
@@ -80,16 +80,17 @@ struct PerformanceMonitoring {
       , stopwatchCommitCallback(nullptr)
       , stopwatchCommitClosure(nullptr)
       , getGroupsCallback(nullptr)
       , getGroupsClosure(nullptr)
       , isMonitoringJank_(false)
       , isMonitoringCPOW_(false)
       , iteration_(0)
       , startedAtIteration_(0)
+      , highestTimestampCounter_(0)
     { }
 
     /**
      * Reset the stopwatch.
      *
      * This method is meant to be called whenever we start
      * processing an event, to ensure that we stop any ongoing
      * measurement that would otherwise provide irrelevant
@@ -207,16 +208,23 @@ struct PerformanceMonitoring {
 
     /**
      * The total amount of time spent waiting on CPOWs since the
      * start of the process, in microseconds.
      */
     uint64_t totalCPOWTime;
 
     /**
+     * A variant of RDTSC artificially made monotonic.
+     *
+     * Always return 0 on platforms that do not support RDTSC.
+     */
+    uint64_t monotonicReadTimestampCounter();
+
+    /**
      * Data extracted by the AutoStopwatch to determine how often
      * we reschedule the process to a different CPU during the
      * execution of JS.
      *
      * Warning: These values are incremented *only* on platforms
      * that offer a syscall/libcall to check on which CPU a
      * process is currently executed.
      */
@@ -280,16 +288,22 @@ struct PerformanceMonitoring {
      * stopwatch results.
      */
     uint64_t startedAtIteration_;
 
     /**
      * Groups used in the current iteration.
      */
     GroupVector recentGroups_;
+
+    /**
+     * The highest value of the timestamp counter encountered
+     * during this iteration.
+     */
+    uint64_t highestTimestampCounter_;
 };
 
 #if WINVER >= 0x0600
 struct cpuid_t {
     WORD group_;
     BYTE number_;
     cpuid_t(WORD group, BYTE number)
         : group_(group),
@@ -372,17 +386,17 @@ class AutoStopwatch final {
     // but is not guaranteed to be so.
     //
     // If `start <= end`, return `end - start`.
     // Otherwise, return `0`.
     uint64_t inline getDelta(const uint64_t end, const uint64_t start) const;
 
     // Return the value of the Timestamp Counter, as provided by the CPU.
     // 0 on platforms for which we do not have access to a Timestamp Counter.
-    uint64_t inline getCycles() const;
+    uint64_t inline getCycles(JSRuntime*) const;
 
 
     // Return the identifier of the current CPU, on platforms for which we have
     // access to the current CPU.
     cpuid_t inline getCPU() const;
 
     // Compare two CPU identifiers.
     bool inline isSameCPU(const cpuid_t& a, const cpuid_t& b) const;