Bug 1417234 - Use SRWLock as Mutex for mozjemalloc on Windows. r?njn draft
authorMike Hommey <mh+mozilla@glandium.org>
Tue, 14 Nov 2017 12:58:33 +0900
changeset 697911 2a74780841777ba7adcb5a510eadd101c012b459
parent 697275 c616a6fd5e4b20cca139fcdd3957682afaa862b9
child 697995 9d691be276733483cd2346a07e129c38bb43bf03
push id89141
push userbmo:mh+mozilla@glandium.org
push dateTue, 14 Nov 2017 23:42:06 +0000
reviewersnjn
bugs1417234
milestone59.0a1
Bug 1417234 - Use SRWLock as Mutex for mozjemalloc on Windows. r?njn SRWLock is more lightweight than CriticalSection, but is only available on Windows Vista and more. So until we actually dropped support Windows XP, we had to use CriticalSection. Now that all supported Windows versions do have SRWLock, this is a switch we can make, and not only because SRWLock is more lightweight, but because it can be statically initialized like on other platforms, allowing to use the same initialization code as on other platforms, and removing the requirement for a DllMain, which in turn can allow to statically link mozjemalloc in some cases, instead of requiring a shared library (DllMain only works on shared libraries), or manually call the initialization function soon enough. There is a downside, though: SRWLock, as opposed to CriticalSection, is not fair, meaning it can have thread scheduling implications, and can theoretically increase latency on some threads. However, it is the default used by Rust Mutex, meaning it's at least good enough there. Let's see how things go with this.
memory/build/mozjemalloc.cpp
memory/replace/logalloc/replay/Replay.cpp
--- a/memory/build/mozjemalloc.cpp
+++ b/memory/build/mozjemalloc.cpp
@@ -537,17 +537,17 @@ static void*
 base_alloc(size_t aSize);
 
 // Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
 // places, because they require malloc()ed memory, which causes bootstrapping
 // issues in some cases.
 struct Mutex
 {
 #if defined(XP_WIN)
-  CRITICAL_SECTION mMutex;
+  SRWLOCK mMutex;
 #elif defined(XP_DARWIN)
   OSSpinLock mMutex;
 #else
   pthread_mutex_t mMutex;
 #endif
 
   inline bool Init();
 
@@ -571,17 +571,17 @@ private:
   MOZ_DECL_USE_GUARD_OBJECT_NOTIFIER;
   Mutex& mMutex;
 };
 
 // Set to true once the allocator has been initialized.
 static Atomic<bool> malloc_initialized(false);
 
 #if defined(XP_WIN)
-// No init lock for Windows.
+static Mutex gInitLock = { SRWLOCK_INIT };
 #elif defined(XP_DARWIN)
 static Mutex gInitLock = { OS_SPINLOCK_INIT };
 #elif defined(XP_LINUX) && !defined(ANDROID)
 static Mutex gInitLock = { PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP };
 #else
 static Mutex gInitLock = { PTHREAD_MUTEX_INITIALIZER };
 #endif
 
@@ -1253,23 +1253,18 @@ chunk_ensure_zero(void* aPtr, size_t aSi
 static void*
 huge_malloc(size_t size, bool zero, arena_t* aArena);
 static void*
 huge_palloc(size_t aSize, size_t aAlignment, bool aZero, arena_t* aArena);
 static void*
 huge_ralloc(void* aPtr, size_t aSize, size_t aOldSize, arena_t* aArena);
 static void
 huge_dalloc(void* aPtr, arena_t* aArena);
-#ifdef XP_WIN
-extern "C"
-#else
-static
-#endif
-  bool
-  malloc_init_hard();
+static bool
+malloc_init_hard();
 
 #ifdef XP_DARWIN
 #define FORK_HOOK extern "C"
 #else
 #define FORK_HOOK static
 #endif
 FORK_HOOK void
 _malloc_prefork(void);
@@ -1279,31 +1274,27 @@ FORK_HOOK void
 _malloc_postfork_child(void);
 
 // End forward declarations.
 // ***************************************************************************
 
 // FreeBSD's pthreads implementation calls malloc(3), so the malloc
 // implementation has to take pains to avoid infinite recursion during
 // initialization.
-#if defined(XP_WIN)
-#define malloc_init() true
-#else
 // Returns whether the allocator was successfully initialized.
 static inline bool
 malloc_init()
 {
 
   if (malloc_initialized == false) {
     return malloc_init_hard();
   }
 
   return true;
 }
-#endif
 
 static void
 _malloc_message(const char* p)
 {
 #if !defined(XP_WIN)
 #define _write write
 #endif
   // Pretend to check _write() errors to suppress gcc warnings about
@@ -1333,19 +1324,17 @@ pthread_atfork(void (*)(void), void (*)(
 // cases. We also can't use constructors, because for statics, they would fire
 // after the first use of malloc, resetting the locks.
 
 // Initializes a mutex. Returns whether initialization succeeded.
 bool
 Mutex::Init()
 {
 #if defined(XP_WIN)
-  if (!InitializeCriticalSectionAndSpinCount(&mMutex, 5000)) {
-    return false;
-  }
+  InitializeSRWLock(&mMutex);
 #elif defined(XP_DARWIN)
   mMutex = OS_SPINLOCK_INIT;
 #elif defined(XP_LINUX) && !defined(ANDROID)
   pthread_mutexattr_t attr;
   if (pthread_mutexattr_init(&attr) != 0) {
     return false;
   }
   pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
@@ -1361,29 +1350,29 @@ Mutex::Init()
 #endif
   return true;
 }
 
 void
 Mutex::Lock()
 {
 #if defined(XP_WIN)
-  EnterCriticalSection(&mMutex);
+  AcquireSRWLockExclusive(&mMutex);
 #elif defined(XP_DARWIN)
   OSSpinLockLock(&mMutex);
 #else
   pthread_mutex_lock(&mMutex);
 #endif
 }
 
 void
 Mutex::Unlock()
 {
 #if defined(XP_WIN)
-  LeaveCriticalSection(&mMutex);
+  ReleaseSRWLockExclusive(&mMutex);
 #elif defined(XP_DARWIN)
   OSSpinLockUnlock(&mMutex);
 #else
   pthread_mutex_unlock(&mMutex);
 #endif
 }
 
 // End mutex.
@@ -4066,29 +4055,24 @@ GetKernelPageSize()
     MOZ_ASSERT(result != -1);
     return result;
 #endif
   })();
   return kernel_page_size;
 }
 
 // Returns whether the allocator was successfully initialized.
-#if !defined(XP_WIN)
-static
-#endif
-  bool
-  malloc_init_hard()
+static bool
+malloc_init_hard()
 {
   unsigned i;
   const char* opts;
   long result;
 
-#ifndef XP_WIN
   MutexAutoLock lock(gInitLock);
-#endif
 
   if (malloc_initialized) {
     // Another thread initialized the allocator before this one
     // acquired gInitLock.
     return true;
   }
 
   if (!thread_arena.init()) {
@@ -5073,32 +5057,9 @@ void*
   return nullptr;
 }
 
 size_t
 _msize(void* aPtr)
 {
   return DefaultMalloc::malloc_usable_size(aPtr);
 }
-
-// In the new style jemalloc integration jemalloc is built as a separate
-// shared library.  Since we're no longer hooking into the CRT binary,
-// we need to initialize the heap at the first opportunity we get.
-// DLL_PROCESS_ATTACH in DllMain is that opportunity.
-BOOL APIENTRY
-DllMain(HINSTANCE hModule, DWORD reason, LPVOID lpReserved)
-{
-  switch (reason) {
-    case DLL_PROCESS_ATTACH:
-      // Don't force the system to page DllMain back in every time
-      // we create/destroy a thread
-      DisableThreadLibraryCalls(hModule);
-      // Initialize the heap
-      malloc_init_hard();
-      break;
-
-    case DLL_PROCESS_DETACH:
-      break;
-  }
-
-  return TRUE;
-}
 #endif
--- a/memory/replace/logalloc/replay/Replay.cpp
+++ b/memory/replace/logalloc/replay/Replay.cpp
@@ -279,22 +279,16 @@ MOZ_BEGIN_EXTERN_C
 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC
 #include "malloc_decls.h"
 
 #define MALLOC_DECL(name, return_type, ...) \
   return_type name(__VA_ARGS__);
 #define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC
 #include "malloc_decls.h"
 
-/* mozjemalloc relies on DllMain to initialize, but DllMain is not invoked
- * for executables, so manually invoke mozjemalloc initialization. */
-#if defined(_WIN32)
-void malloc_init_hard(void);
-#endif
-
 #ifdef ANDROID
 /* mozjemalloc uses MozTagAnonymousMemory, which doesn't have an inline
  * implementation on Android */
 void
 MozTagAnonymousMemory(const void* aPtr, size_t aLength, const char* aTag) {}
 
 /* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have.
  * While gecko has one in libmozglue, the replay program can't use that.
@@ -470,20 +464,16 @@ private:
 
 int
 main()
 {
   size_t first_pid = 0;
   FdReader reader(0);
   Replay replay;
 
-#if defined(_WIN32)
-  malloc_init_hard();
-#endif
-
   /* Read log from stdin and dispatch function calls to the Replay instance.
    * The log format is essentially:
    *   <pid> <function>([<args>])[=<result>]
    * <args> is a comma separated list of arguments.
    *
    * The logs are expected to be preprocessed so that allocations are
    * attributed a tracking slot. The input is trusted not to have crazy
    * values for these slot numbers.