Bug 1243241 - Make RDTSC monotonic;r?jandem
We assume that the total number of cycles spent executing JS code
during an event is equal to the number of cycles in the "top group",
i.e. a group to which everything belongs. While this is true in
theory, RDTSC is actually non-monotonic, so we can end up with fewer
cycles reported for the top group than for some groups whose execution
was actually shorter. When we end up in this situation, groups with
more cycles than the top group will be reported as using more CPU than
was actually used.
This patch fixes the situation by proxying RDTSC behind a trivial API
that ensures that values are monotonic during each tick.
--- a/js/src/vm/Stopwatch.cpp
+++ b/js/src/vm/Stopwatch.cpp
@@ -35,16 +35,24 @@ PerformanceMonitoring::addRecentGroup(Pe
void
PerformanceMonitoring::reset()
{
// All ongoing measures are dependent on the current iteration#.
// By incrementing it, we mark all data as stale. Stale data will
// be overwritten progressively during the execution.
++iteration_;
recentGroups_.clear();
+
+ // Every so often, we will be rescheduled to another CPU. If this
+ // happens, we may end up with an entirely unsynchronized
+ // timestamp counter. If we do not reset
+ // `highestTimestampCounter_`, we could end up ignoring entirely
+ // valid sets of measures just because we are on a CPU that has a
+ // lower RDTSC.
+ highestTimestampCounter_ = 0;
}
void
PerformanceMonitoring::start()
{
if (!isMonitoringJank_)
return;
@@ -153,16 +161,29 @@ PerformanceMonitoring::commit()
// Reset immediately, to make sure that we're not hit by the end
// of a nested event loop (which would cause `commit` to be called
// twice in succession).
reset();
return success;
}
+uint64_t
+PerformanceMonitoring::monotonicReadTimestampCounter()
+{
+#if defined(MOZ_HAVE_RDTSC)
+ const uint64_t hardware = ReadTimestampCounter();
+ if (highestTimestampCounter_ < hardware)
+ highestTimestampCounter_ = hardware;
+ return highestTimestampCounter_;
+#else
+ return 0;
+#endif // defined(MOZ_HAVE_RDTSC)
+}
+
void
PerformanceMonitoring::dispose(JSRuntime* rt)
{
reset();
for (CompartmentsIter c(rt, SkipAtoms); !c.done(); c.next()) {
c->performanceMonitoring.unlink();
}
}
@@ -267,17 +288,17 @@ AutoStopwatch::enter()
JSRuntime* runtime = cx_->runtime();
if (runtime->performanceMonitoring.isMonitoringCPOW()) {
CPOWTimeStart_ = runtime->performanceMonitoring.totalCPOWTime;
isMonitoringCPOW_ = true;
}
if (runtime->performanceMonitoring.isMonitoringJank()) {
- cyclesStart_ = this->getCycles();
+ cyclesStart_ = this->getCycles(runtime);
cpuStart_ = this->getCPU();
isMonitoringJank_ = true;
}
}
bool
AutoStopwatch::exit()
{
@@ -290,18 +311,18 @@ AutoStopwatch::exit()
// If possible, discard results when we don't end on the
// same CPU as we started. Note that we can be
// rescheduled to another CPU beween `getCycles()` and
// `getCPU()`. We hope that this will happen rarely
// enough that the impact on our statistics will remain
// limited.
const cpuid_t cpuEnd = this->getCPU();
if (isSameCPU(cpuStart_, cpuEnd)) {
- const uint64_t cyclesEnd = getCycles();
- cyclesDelta = getDelta(cyclesEnd, cyclesStart_);
+ const uint64_t cyclesEnd = getCycles(runtime);
+ cyclesDelta = cyclesEnd - cyclesStart_; // Always >= 0 by definition of `getCycles`.
}
#if WINVER >= 0x600
updateTelemetry(cpuStart_, cpuEnd);
#elif defined(__linux__)
updateTelemetry(cpuStart_, cpuEnd);
#endif // WINVER >= 0x600 || _linux__
}
@@ -377,23 +398,19 @@ uint64_t
AutoStopwatch::getDelta(const uint64_t end, const uint64_t start) const
{
if (start >= end)
return 0;
return end - start;
}
uint64_t
-AutoStopwatch::getCycles() const
+AutoStopwatch::getCycles(JSRuntime* runtime) const
{
-#if defined(MOZ_HAVE_RDTSC)
- return ReadTimestampCounter();
-#else
- return 0;
-#endif // defined(MOZ_HAVE_RDTSC)
+ return runtime->performanceMonitoring.monotonicReadTimestampCounter();
}
cpuid_t inline
AutoStopwatch::getCPU() const
{
#if defined(XP_WIN) && WINVER >= _WIN32_WINNT_VISTA
PROCESSOR_NUMBER proc;
GetCurrentProcessorNumberEx(&proc);
--- a/js/src/vm/Stopwatch.h
+++ b/js/src/vm/Stopwatch.h
@@ -80,16 +80,17 @@ struct PerformanceMonitoring {
, stopwatchCommitCallback(nullptr)
, stopwatchCommitClosure(nullptr)
, getGroupsCallback(nullptr)
, getGroupsClosure(nullptr)
, isMonitoringJank_(false)
, isMonitoringCPOW_(false)
, iteration_(0)
, startedAtIteration_(0)
+ , highestTimestampCounter_(0)
{ }
/**
* Reset the stopwatch.
*
* This method is meant to be called whenever we start
* processing an event, to ensure that we stop any ongoing
* measurement that would otherwise provide irrelevant
@@ -207,16 +208,23 @@ struct PerformanceMonitoring {
/**
* The total amount of time spent waiting on CPOWs since the
* start of the process, in microseconds.
*/
uint64_t totalCPOWTime;
/**
+ * A variant of RDTSC artificially made monotonic.
+ *
+ * Always return 0 on platforms that do not support RDTSC.
+ */
+ uint64_t monotonicReadTimestampCounter();
+
+ /**
* Data extracted by the AutoStopwatch to determine how often
* we reschedule the process to a different CPU during the
* execution of JS.
*
* Warning: These values are incremented *only* on platforms
* that offer a syscall/libcall to check on which CPU a
* process is currently executed.
*/
@@ -280,16 +288,22 @@ struct PerformanceMonitoring {
* stopwatch results.
*/
uint64_t startedAtIteration_;
/**
* Groups used in the current iteration.
*/
GroupVector recentGroups_;
+
+ /**
+ * The highest value of the timestamp counter encountered
+ * during this iteration.
+ */
+ uint64_t highestTimestampCounter_;
};
#if WINVER >= 0x0600
struct cpuid_t {
WORD group_;
BYTE number_;
cpuid_t(WORD group, BYTE number)
: group_(group),
@@ -372,17 +386,17 @@ class AutoStopwatch final {
// but is not guaranteed to be so.
//
// If `start <= end`, return `end - start`.
// Otherwise, return `0`.
uint64_t inline getDelta(const uint64_t end, const uint64_t start) const;
// Return the value of the Timestamp Counter, as provided by the CPU.
// 0 on platforms for which we do not have access to a Timestamp Counter.
- uint64_t inline getCycles() const;
+ uint64_t inline getCycles(JSRuntime*) const;
// Return the identifier of the current CPU, on platforms for which we have
// access to the current CPU.
cpuid_t inline getCPU() const;
// Compare two CPU identifiers.
bool inline isSameCPU(const cpuid_t& a, const cpuid_t& b) const;