Bug 1476757 - WIP: Grow and shrink the profiler buffer dynamically so as to not waste memory when a large buffer size limit is picked. draft
authorMarkus Stange <mstange@themasta.com>
Tue, 22 May 2018 17:30:08 -0400
changeset 820081 4a7a78de31bed422154d906f29eca7a8537dc73d
parent 820080 78e0e68fff02686ac0b468753f434a85f2e8c893
child 820082 62fe3d356a3f5fcf3e42a4e3631110d09d217a12
push id116712
push userbmo:mstange@themasta.com
push dateWed, 18 Jul 2018 20:59:38 +0000
bugs1476757
milestone63.0a1
Bug 1476757 - WIP: Grow and shrink the profiler buffer dynamically so as to not waste memory when a large buffer size limit is picked. I haven't run tests on this yet, and I haven't checked what the implications of changing the meaning of "entry size" are for all consumers of this piece of data. In the past, the buffer "entry size" (which is really more of a count than a size) referred to the actual current capacity of the buffer. Now it refers to the maximum capacity. I think we need to audit especially the callers of profiler_get_buffer_info(): - The gtests - and the old performance devtool, which uses nsIProfiler.getBufferInfo to draw a progress bar during capturing, and makes chunking decisions based on it. MozReview-Commit-ID: 4c26L5MhN2e
tools/profiler/core/platform.cpp
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -39,16 +39,17 @@
 #include "mozilla/UniquePtr.h"
 #include "mozilla/Vector.h"
 #include "GeckoProfiler.h"
 #include "VTuneProfiler.h"
 #include "GeckoProfilerReporter.h"
 #include "ProfilerIOInterposeObserver.h"
 #include "mozilla/AutoProfilerLabel.h"
 #include "mozilla/ExtensionPolicyService.h"
+#include "mozilla/MathAlgorithms.h"
 #include "mozilla/Scheduler.h"
 #include "mozilla/StackWalk.h"
 #include "mozilla/StaticPtr.h"
 #include "mozilla/SystemGroup.h"
 #include "mozilla/ThreadLocal.h"
 #include "mozilla/TimeStamp.h"
 #include "mozilla/Tuple.h"
 #include "mozilla/extensions/WebExtensionPolicy.h"
@@ -311,16 +312,18 @@ private:
 #ifdef USE_LUL_STACKWALK
   // LUL's state. Null prior to the first activation, non-null thereafter.
   UniquePtr<lul::LUL> mLul;
 #endif
 };
 
 CorePS* CorePS::sInstance = nullptr;
 
+static const uint32_t kInitialProfileBufferCapacity = 1024;
+
 class SamplerThread;
 
 static SamplerThread*
 NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, double aInterval);
 
 struct LiveProfiledThreadData
 {
   RegisteredThread* mRegisteredThread;
@@ -358,17 +361,17 @@ private:
   }
 
   ActivePS(PSLockRef aLock, uint32_t aEntries, double aInterval,
            uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount)
     : mGeneration(sNextGeneration++)
     , mEntries(aEntries)
     , mInterval(aInterval)
     , mFeatures(AdjustFeatures(aFeatures, aFilterCount))
-    , mBuffer(MakeUnique<ProfileBuffer>(aEntries))
+    , mBuffer(MakeUnique<ProfileBuffer>(std::min(kInitialProfileBufferCapacity, aEntries)))
       // The new sampler thread doesn't start sampling immediately because the
       // main loop within Run() is blocked until this function's caller unlocks
       // gPSMutex.
     , mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval))
     , mInterposeObserver(ProfilerFeature::HasMainThreadIO(aFeatures)
                          ? new ProfilerIOInterposeObserver()
                          : nullptr)
 #undef HAS_FEATURE
@@ -637,16 +640,45 @@ public:
       [bufferRangeStart](UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
         Maybe<uint64_t> bufferPosition =
           aProfiledThreadData->BufferPositionWhenUnregistered();
         MOZ_RELEASE_ASSERT(bufferPosition, "should have unregistered this thread");
         return *bufferPosition < bufferRangeStart;
       });
   }
 
+  static void EnsureAdequateBufferCapacity(PSLockRef aLockRef)
+  {
+    ProfileBuffer& buffer = Buffer(aLockRef);
+    uint32_t maxCapacity = RoundUpPow2(Entries(aLockRef));
+    uint32_t minCapacity = std::min(kInitialProfileBufferCapacity, maxCapacity);
+    uint32_t usedSize = buffer.UsedSize();
+    uint32_t currentCapacity = buffer.mEntrySize;
+    // The usedSize should always be between 35% and 90% of the capacity.
+    // If the usedSize exceeds 90% of the capacity, enlarge the capacity.
+    // Enlarging the capacity will at least double it, so then the usedSize
+    // will be a bit above 45% of the new capacity.
+    // If the usedSize goes below 35% of the capacity, shrink the capacity.
+    // Shrinking the capacity will at least halve it, so then the usedSize will
+    // be at most 70% of the new capacity.
+    uint32_t minDesiredCapacity = usedSize * 100 / 90;
+    uint32_t maxDesiredCapacity = usedSize * 100 / 35;
+
+    uint32_t newCapacity = currentCapacity;
+    while (newCapacity < minDesiredCapacity && newCapacity < maxCapacity) {
+      newCapacity *= 2;
+    }
+    while (newCapacity > maxDesiredCapacity && newCapacity > minCapacity) {
+      newCapacity /= 2;
+    }
+    MOZ_RELEASE_ASSERT(newCapacity >= minCapacity);
+    MOZ_RELEASE_ASSERT(newCapacity <= maxCapacity);
+    buffer.SetCapacityPow2(newCapacity);
+  }
+
 private:
   // The singleton instance.
   static ActivePS* sInstance;
 
   // We need to track activity generations. If we didn't we could have the
   // following scenario.
   //
   // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
@@ -661,17 +693,17 @@ private:
   //   profiler_stop() is stuck, unable to finish.
   //
   // By checking ActivePS *and* the generation, we can avoid this scenario.
   // sNextGeneration is used to track the next generation number; it is static
   // because it must persist across different ActivePS instantiations.
   const uint32_t mGeneration;
   static uint32_t sNextGeneration;
 
-  // The number of entries in mBuffer.
+  // The maximum number of entries in mBuffer.
   const uint32_t mEntries;
 
   // The interval between samples, measured in milliseconds.
   const double mInterval;
 
   // The profile features that are enabled.
   const uint32_t mFeatures;
 
@@ -2187,16 +2219,18 @@ SamplerThread::Run()
         // The LUL unwind object accumulates frame statistics. Periodically we
         // should poke it to give it a chance to print those statistics.  This
         // involves doing I/O (fprintf, __android_log_print, etc.) and so
         // can't safely be done from the critical section inside
         // SuspendAndSampleAndResumeThread, which is why it is done here.
         CorePS::Lul(lock)->MaybeShowStats();
 #endif
       }
+
+      ActivePS::EnsureAdequateBufferCapacity(lock);
     }
     // gPSMutex is not held after this point.
 
     // Calculate how long a sleep to request.  After the sleep, measure how
     // long we actually slept and take the difference into account when
     // calculating the sleep interval for the next iteration.  This is an
     // attempt to keep "to schedule" in the presence of inaccuracy of the
     // actual sleep intervals.