Bug 1475899: Part 4 - Add memory reporter for committed thread stack sizes on Linux. r?erahm draft
authorKris Maglione <maglione.k@gmail.com>
Sat, 14 Jul 2018 02:21:30 -0700
changeset 819390 4a8cb980ca2dac89ac6198117af0d0b93b1f81cb
parent 819055 42c69cdbb25b62681d35a1bbf9658f0f675093f4
child 819391 42d8d4be7b52a66dc165a8dcfa824ea3d000c490
push id116548
push usermaglione.k@gmail.com
push dateTue, 17 Jul 2018 20:10:48 +0000
reviewerserahm
bugs1475899
milestone63.0a1
Bug 1475899: Part 4 - Add memory reporter for committed thread stack sizes on Linux. r?erahm Later patches add support for Windows. OS-X will need a follow-up. MozReview-Commit-ID: DDd6uir4KzM
xpcom/base/nsMemoryReporterManager.cpp
xpcom/threads/nsThread.cpp
xpcom/threads/nsThread.h
--- a/xpcom/base/nsMemoryReporterManager.cpp
+++ b/xpcom/base/nsMemoryReporterManager.cpp
@@ -24,16 +24,17 @@
 #if defined(XP_UNIX) || defined(MOZ_DMD)
 #include "nsMemoryInfoDumper.h"
 #endif
 #include "nsNetCID.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/MemoryReportingProcess.h"
 #include "mozilla/PodOperations.h"
 #include "mozilla/Preferences.h"
+#include "mozilla/ResultExtensions.h"
 #include "mozilla/Services.h"
 #include "mozilla/Telemetry.h"
 #include "mozilla/UniquePtrExtensions.h"
 #include "mozilla/dom/MemoryReportTypes.h"
 #include "mozilla/dom/ContentParent.h"
 #include "mozilla/gfx/GPUProcessManager.h"
 #include "mozilla/ipc/FileDescriptorUtils.h"
 
@@ -51,16 +52,19 @@ using namespace dom;
 
 #if defined(MOZ_MEMORY)
 #  define HAVE_JEMALLOC_STATS 1
 #  include "mozmemory.h"
 #endif  // MOZ_MEMORY
 
 #if defined(XP_LINUX)
 
+#include "mozilla/MemoryMapping.h"
+#include "nsThread.h"
+
 #include <malloc.h>
 #include <string.h>
 #include <stdlib.h>
 
 static MOZ_MUST_USE nsresult
 GetProcSelfStatmField(int aField, int64_t* aN)
 {
   // There are more than two fields, but we're only interested in the first
@@ -1393,16 +1397,118 @@ public:
       "Memory used by dynamic atom objects and chars (which are stored "
       "at the end of each atom object).");
 
     return NS_OK;
   }
 };
 NS_IMPL_ISUPPORTS(AtomTablesReporter, nsIMemoryReporter)
 
+#ifdef XP_LINUX
+class ThreadStacksReporter final : public nsIMemoryReporter
+{
+  ~ThreadStacksReporter() = default;
+
+public:
+  NS_DECL_ISUPPORTS
+
+  NS_IMETHOD CollectReports(nsIHandleReportCallback* aHandleReport,
+                            nsISupports* aData, bool aAnonymize) override
+  {
+    nsTArray<MemoryMapping> mappings(1024);
+    MOZ_TRY(GetMemoryMappings(mappings));
+
+    // Enumerating over active threads requires holding a lock, so we collect
+    // info on all threads, and then call our reporter callbacks after releasing
+    // the lock.
+    struct ThreadData
+    {
+      nsCString mName;
+      uint32_t mThreadId;
+      size_t mPrivateSize;
+    };
+    AutoTArray<ThreadData, 32> threads;
+
+    for (auto* thread : nsThread::Enumerate()) {
+      if (!thread->StackBase()) {
+        continue;
+      }
+
+      int idx = mappings.BinaryIndexOf(thread->StackBase());
+      if (idx < 0) {
+        continue;
+      }
+      // Referenced() is the combined size of all pages in the region which have
+      // ever been touched, and are therefore consuming memory. For stack
+      // regions, these pages are guaranteed to be un-shared unless we fork
+      // after creating threads (which we don't).
+      size_t privateSize = mappings[idx].Referenced();
+
+      // On Linux, we have to be very careful matching memory regions to thread
+      // stacks.
+      //
+      // To begin with, the kernel only reports VM stats for regions of all
+      // adjacent pages with the same flags, protection, and backing file.
+      // There's no way to get finer-grained usage information for a subset of
+      // those pages.
+      //
+      // Stack segments always have a guard page at the bottom of the stack
+      // (assuming we only support stacks that grow down), so there's no danger
+      // of them being merged with other stack regions. At the top, there's no
+      // protection page, and no way to allocate one without using pthreads
+      // directly and allocating our own stacks. So we get around the problem by
+      // adding an extra VM flag (NOHUGEPAGES) to our stack region, which we
+      // don't expect to be set on any heap regions. But this is not fool-proof.
+      //
+      // A second kink is that different C libraries (and different versions
+      // thereof) report stack base locations and sizes differently with regard
+      // to the guard page. For the libraries that include the guard page in the
+      // stack size base pointer, we need to adjust those values to compensate.
+      // But it's possible that our logic will get out of sync with library
+      // changes, or someone will compile with an unexpected library.
+      //
+      //
+      // The upshot of all of this is that there may be configurations that our
+      // special cases don't cover. And if there are, we want to know about it.
+      // So assert that total size of the memory region we're reporting actually
+      // matches the allocated size of the thread stack.
+      MOZ_ASSERT(mappings[idx].Size() == thread->StackSize(),
+                 "Mapping region size doesn't match stack allocation size");
+
+      threads.AppendElement(ThreadData{
+        nsCString(PR_GetThreadName(thread->GetPRThread())),
+        thread->ThreadId(),
+        // On Linux, it's possible (but unlikely) that our stack region will
+        // have been merged with adjacent heap regions, in which case we'll get
+        // combined size information for both. So we take the minimum of the
+        // reported private size and the requested stack size to avoid the
+        // possible of majorly over-reporting in that case.
+        std::min(privateSize, thread->StackSize()),
+      });
+    }
+
+    for (auto& thread : threads) {
+      nsPrintfCString path("explicit/thread-stacks/%s (tid=%u)",
+                           thread.mName.get(), thread.mThreadId);
+
+      aHandleReport->Callback(
+          EmptyCString(), path,
+          KIND_NONHEAP, UNITS_BYTES,
+          thread.mPrivateSize,
+          NS_LITERAL_CSTRING("The sizes of thread stacks which have been "
+                             "committed to memory."),
+          aData);
+    }
+
+    return NS_OK;
+  }
+};
+NS_IMPL_ISUPPORTS(ThreadStacksReporter, nsIMemoryReporter)
+#endif
+
 #ifdef DEBUG
 
 // Ideally, this would be implemented in BlockingResourceBase.cpp.
 // However, this ends up breaking the linking step of various unit tests due
 // to adding a new dependency to libdmd for a commonly used feature (mutexes)
 // in  DMD  builds. So instead we do it here.
 class DeadlockDetectorReporter final : public nsIMemoryReporter
 {
@@ -1554,16 +1660,20 @@ nsMemoryReporterManager::Init()
 #endif
 
 #ifdef HAVE_SYSTEM_HEAP_REPORTER
   RegisterStrongReporter(new SystemHeapReporter());
 #endif
 
   RegisterStrongReporter(new AtomTablesReporter());
 
+#ifdef XP_LINUX
+  RegisterStrongReporter(new ThreadStacksReporter());
+#endif
+
 #ifdef DEBUG
   RegisterStrongReporter(new DeadlockDetectorReporter());
 #endif
 
 #ifdef MOZ_GECKO_PROFILER
   // We have to register this here rather than in profiler_init() because
   // profiler_init() runs prior to nsMemoryReporterManager's creation.
   RegisterStrongReporter(new GeckoProfilerReporter());
--- a/xpcom/threads/nsThread.cpp
+++ b/xpcom/threads/nsThread.cpp
@@ -2,16 +2,17 @@
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsThread.h"
 
 #include "base/message_loop.h"
+#include "base/platform_thread.h"
 
 // Chromium's logging can sometimes leak through...
 #ifdef LOG
 #undef LOG
 #endif
 
 #include "mozilla/ReentrantMonitor.h"
 #include "nsMemoryPressure.h"
@@ -405,16 +406,17 @@ nsThread::Enumerate()
 nsThread::ThreadFunc(void* aArg)
 {
   using mozilla::ipc::BackgroundChild;
 
   ThreadInitData* initData = static_cast<ThreadInitData*>(aArg);
   nsThread* self = initData->thread;  // strong reference
 
   self->mThread = PR_GetCurrentThread();
+  self->mThreadId = uint32_t(PlatformThread::CurrentId());
   self->mVirtualThread = GetCurrentVirtualThread();
   self->mEventTarget->SetCurrentThread();
   SetupCurrentThreadForChaosMode();
 
   if (!initData->name.IsEmpty()) {
     NS_SetCurrentThreadName(initData->name.BeginReading());
   }
 
--- a/xpcom/threads/nsThread.h
+++ b/xpcom/threads/nsThread.h
@@ -69,16 +69,18 @@ public:
   PRThread* GetPRThread()
   {
     return mThread;
   }
 
   const void* StackBase() const { return mStackBase; }
   size_t StackSize() const { return mStackSize; }
 
+  uint32_t ThreadId() const { return mThreadId; }
+
   // If this flag is true, then the nsThread was created using
   // nsIThreadManager::NewThread.
   bool ShutdownRequired()
   {
     return mShutdownRequired;
   }
 
   // Clear the observer list.
@@ -172,16 +174,17 @@ protected:
   RefPtr<mozilla::ThreadEventTarget> mEventTarget;
 
   mozilla::CycleCollectedJSContext* mScriptObserver;
 
   // Only accessed on the target thread.
   nsAutoTObserverArray<NotNull<nsCOMPtr<nsIThreadObserver>>, 2> mEventObservers;
 
   int32_t   mPriority;
+  uint32_t  mThreadId;
   PRThread* mThread;
   uint32_t  mNestedEventLoopDepth;
   uint32_t  mStackSize;
   void*     mStackBase = nullptr;
 
   // The shutdown context for ourselves.
   struct nsThreadShutdownContext* mShutdownContext;
   // The shutdown contexts for any other threads we've asked to shut down.