Bug 1364358 - Remove mozjemalloc support for automatic multiple arenas. r?erahm draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 11 May 2017 17:21:36 +0900
changeset 579095 48efe052f3cd2f279091a1118ca63fb41be36af1
parent 579094 dc8f2edeb6128f3c7ebb21b72c59bbf825d83453
child 579096 e42ff803a0a7c408970c9cc50532d0b52635586b
push id59145
push userbmo:mh+mozilla@glandium.org
push dateTue, 16 May 2017 23:08:32 +0000
reviewerserahm
bugs1364358
milestone55.0a1
Bug 1364358 - Remove mozjemalloc support for automatic multiple arenas. r?erahm As per explained 2 commits earlier, we remove the support for multiple arenas. We however keep the `arenas` list and the `narenas` count to use it them to track the opted-in per-thread arenas.
memory/mozjemalloc/jemalloc.c
--- a/memory/mozjemalloc/jemalloc.c
+++ b/memory/mozjemalloc/jemalloc.c
@@ -97,17 +97,16 @@
  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  *          Metadata are stored in a separate red-black tree.
  *
  *******************************************************************************
  */
 
 #ifdef MOZ_MEMORY_ANDROID
 #define NO_TLS
-#define _pthread_self() pthread_self()
 #endif
 
 /*
  * On Linux, we use madvise(MADV_DONTNEED) to release memory back to the
  * operating system.  If we release 1MB of live pages with MADV_DONTNEED, our
  * RSS will decrease by 1MB (almost) immediately.
  *
  * On Mac, we use madvise(MADV_FREE).  Unlike MADV_DONTNEED on Linux, MADV_FREE
@@ -135,23 +134,16 @@
  * defaults the A and J runtime options to off.  These settings are appropriate
  * for production systems.
  */
 #ifndef MOZ_MEMORY_DEBUG
 #  define	MALLOC_PRODUCTION
 #endif
 
 /*
- * Use only one arena by default.  Mozilla does not currently make extensive
- * use of concurrent allocation, so the increased fragmentation associated with
- * multiple arenas is not warranted.
- */
-#define	MOZ_MEMORY_NARENAS_DEFAULT_ONE
-
-/*
  * Pass this set of options to jemalloc as its default. It does not override
  * the options passed via the MALLOC_OPTIONS environment variable but is
  * applied in addition to them.
  */
 #ifdef MOZ_WIDGET_GONK
     /* Reduce the amount of unused dirty pages to 1MiB on B2G */
 #   define MOZ_MALLOC_OPTIONS "ff"
 #else
@@ -228,18 +220,16 @@
 #define	STDERR_FILENO 2
 #define	PATH_MAX MAX_PATH
 #define	vsnprintf _vsnprintf
 
 #ifndef NO_TLS
 static unsigned long tlsIndex = 0xffffffff;
 #endif
 
-#define	_pthread_self() __threadid()
-
 /* use MSVC intrinsics */
 #pragma intrinsic(_BitScanForward)
 static __forceinline int
 ffs(int x)
 {
 	unsigned long i;
 
 	if (_BitScanForward(&i, x) != 0)
@@ -331,17 +321,16 @@ typedef long ssize_t;
 
 #include <errno.h>
 #include <limits.h>
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
 #endif
 #include <pthread.h>
 #ifdef MOZ_MEMORY_DARWIN
-#define _pthread_self pthread_self
 #define _pthread_mutex_init pthread_mutex_init
 #define _pthread_mutex_trylock pthread_mutex_trylock
 #define _pthread_mutex_lock pthread_mutex_lock
 #define _pthread_mutex_unlock pthread_mutex_unlock
 #endif
 #include <sched.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -1012,21 +1001,16 @@ struct arena_s {
 	arena_bin_t		bins[1]; /* Dynamically sized. */
 };
 
 /******************************************************************************/
 /*
  * Data.
  */
 
-#ifndef MOZ_MEMORY_NARENAS_DEFAULT_ONE
-/* Number of CPUs. */
-static unsigned		ncpus;
-#endif
-
 #ifdef JEMALLOC_MUNMAP
 static const bool config_munmap = true;
 #else
 static const bool config_munmap = false;
 #endif
 
 #ifdef JEMALLOC_RECYCLE
 static const bool config_recycle = true;
@@ -1218,19 +1202,16 @@ static size_t		base_committed;
  */
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
  */
 static arena_t		**arenas;
 static unsigned		narenas;
-#ifndef NO_TLS
-static unsigned		next_arena;
-#endif
 #ifdef MOZ_MEMORY
 static malloc_spinlock_t arenas_lock; /* Protects arenas initialization. */
 #else
 static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
 #endif
 
 #ifndef NO_TLS
 /*
@@ -1280,17 +1261,16 @@ static size_t	opt_chunk_2pow = CHUNK_2PO
 static bool	opt_utrace = false;
 #endif
 #ifdef MALLOC_SYSV
 static bool	opt_sysv = false;
 #endif
 #ifdef MALLOC_XMALLOC
 static bool	opt_xmalloc = false;
 #endif
-static int	opt_narenas_lshift = 0;
 
 #ifdef MALLOC_UTRACE
 typedef struct {
 	void	*p;
 	size_t	s;
 	void	*r;
 } malloc_utrace_t;
 
@@ -1337,19 +1317,16 @@ static void	*chunk_alloc_mmap(size_t siz
 static void	*chunk_recycle(extent_tree_t *chunks_szad,
 	extent_tree_t *chunks_ad, size_t size,
 	size_t alignment, bool base, bool *zero);
 static void	*chunk_alloc(size_t size, size_t alignment, bool base, bool zero);
 static void	chunk_record(extent_tree_t *chunks_szad,
 	extent_tree_t *chunks_ad, void *chunk, size_t size);
 static bool	chunk_dalloc_mmap(void *chunk, size_t size);
 static void	chunk_dealloc(void *chunk, size_t size);
-#ifndef NO_TLS
-static arena_t	*choose_arena_hard(void);
-#endif
 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
     bool large, bool zero);
 static void arena_chunk_init(arena_t *arena, arena_chunk_t *chunk);
 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
 static arena_run_t *arena_run_alloc(arena_t *arena, arena_bin_t *bin,
     size_t size, bool large, bool zero);
 static void	arena_purge(arena_t *arena, bool all);
 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
@@ -2945,18 +2922,17 @@ jemalloc_thread_local_arena_impl(bool en
 #else
 	arenas_map = arena;
 #endif
 
 #endif
 }
 
 /*
- * Choose an arena based on a per-thread value (fast-path code, calls slow-path
- * code if necessary).
+ * Choose an arena based on a per-thread value.
  */
 static inline arena_t *
 choose_arena(void)
 {
 	arena_t *ret;
 
 	/*
 	 * We can only use TLS if this is a PIC library, since for the static
@@ -2968,99 +2944,25 @@ choose_arena(void)
 #  ifdef MOZ_MEMORY_WINDOWS
 	ret = (arena_t*)TlsGetValue(tlsIndex);
 #  elif defined(MOZ_MEMORY_DARWIN)
 	ret = (arena_t*)pthread_getspecific(tlsIndex);
 #  else
 	ret = arenas_map;
 #  endif
 
-	if (ret == NULL) {
-		ret = choose_arena_hard();
+	if (ret == NULL)
+#endif
+	{
+		ret = arenas[0];
 		RELEASE_ASSERT(ret != NULL);
 	}
-#else
-	if (narenas > 1) {
-		unsigned long ind;
-
-		/*
-		 * Hash _pthread_self() to one of the arenas.  There is a prime
-		 * number of arenas, so this has a reasonable chance of
-		 * working.  Even so, the hashing can be easily thwarted by
-		 * inconvenient _pthread_self() values.  Without specific
-		 * knowledge of how _pthread_self() calculates values, we can't
-		 * easily do much better than this.
-		 */
-		ind = (unsigned long) _pthread_self() % narenas;
-
-		/*
-		 * Optimistially assume that arenas[ind] has been initialized.
-		 * At worst, we find out that some other thread has already
-		 * done so, after acquiring the lock in preparation.  Note that
-		 * this lazy locking also has the effect of lazily forcing
-		 * cache coherency; without the lock acquisition, there's no
-		 * guarantee that modification of arenas[ind] by another thread
-		 * would be seen on this CPU for an arbitrary amount of time.
-		 *
-		 * In general, this approach to modifying a synchronized value
-		 * isn't a good idea, but in this case we only ever modify the
-		 * value once, so things work out well.
-		 */
-		ret = arenas[ind];
-		if (ret == NULL) {
-			/*
-			 * Avoid races with another thread that may have already
-			 * initialized arenas[ind].
-			 */
-			malloc_spin_lock(&arenas_lock);
-			if (arenas[ind] == NULL)
-				ret = arenas_extend((unsigned)ind);
-			else
-				ret = arenas[ind];
-			malloc_spin_unlock(&arenas_lock);
-		}
-	} else
-		ret = arenas[0];
-#endif
-
-	RELEASE_ASSERT(ret != NULL);
 	return (ret);
 }
 
-#ifndef NO_TLS
-/*
- * Choose an arena based on a per-thread value (slow-path code only, called
- * only by choose_arena()).
- */
-static arena_t *
-choose_arena_hard(void)
-{
-	arena_t *ret;
-
-	if (narenas > 1) {
-		malloc_spin_lock(&arenas_lock);
-		if ((ret = arenas[next_arena]) == NULL)
-			ret = arenas_extend(next_arena);
-		next_arena = (next_arena + 1) % narenas;
-		malloc_spin_unlock(&arenas_lock);
-	} else
-		ret = arenas[0];
-
-#ifdef MOZ_MEMORY_WINDOWS
-	TlsSetValue(tlsIndex, ret);
-#elif defined(MOZ_MEMORY_DARWIN)
-	pthread_setspecific(tlsIndex, ret);
-#else
-	arenas_map = ret;
-#endif
-
-	return (ret);
-}
-#endif
-
 static inline int
 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
 {
 	uintptr_t a_chunk = (uintptr_t)a;
 	uintptr_t b_chunk = (uintptr_t)b;
 
 	assert(a != NULL);
 	assert(b != NULL);
@@ -5043,128 +4945,16 @@ huge_dalloc(void *ptr)
 	malloc_mutex_unlock(&huge_mtx);
 
 	/* Unmap chunk. */
 	chunk_dealloc(node->addr, CHUNK_CEILING(node->size));
 
 	base_node_dealloc(node);
 }
 
-#ifndef MOZ_MEMORY_NARENAS_DEFAULT_ONE
-#ifdef MOZ_MEMORY_BSD
-static inline unsigned
-malloc_ncpus(void)
-{
-	unsigned ret;
-	int mib[2];
-	size_t len;
-
-	mib[0] = CTL_HW;
-	mib[1] = HW_NCPU;
-	len = sizeof(ret);
-	if (sysctl(mib, 2, &ret, &len, (void *) 0, 0) == -1) {
-		/* Error. */
-		return (1);
-	}
-
-	return (ret);
-}
-#elif (defined(MOZ_MEMORY_LINUX))
-#include <fcntl.h>
-
-static inline unsigned
-malloc_ncpus(void)
-{
-	unsigned ret;
-	int fd, nread, column;
-	char buf[1024];
-	static const char matchstr[] = "processor\t:";
-	int i;
-
-	/*
-	 * sysconf(3) would be the preferred method for determining the number
-	 * of CPUs, but it uses malloc internally, which causes untennable
-	 * recursion during malloc initialization.
-	 */
-	fd = open("/proc/cpuinfo", O_RDONLY);
-	if (fd == -1)
-		return (1); /* Error. */
-	/*
-	 * Count the number of occurrences of matchstr at the beginnings of
-	 * lines.  This treats hyperthreaded CPUs as multiple processors.
-	 */
-	column = 0;
-	ret = 0;
-	while (true) {
-		nread = read(fd, &buf, sizeof(buf));
-		if (nread <= 0)
-			break; /* EOF or error. */
-		for (i = 0;i < nread;i++) {
-			char c = buf[i];
-			if (c == '\n')
-				column = 0;
-			else if (column != -1) {
-				if (c == matchstr[column]) {
-					column++;
-					if (column == sizeof(matchstr) - 1) {
-						column = -1;
-						ret++;
-					}
-				} else
-					column = -1;
-			}
-		}
-	}
-
-	if (ret == 0)
-		ret = 1; /* Something went wrong in the parser. */
-	close(fd);
-
-	return (ret);
-}
-#elif (defined(MOZ_MEMORY_DARWIN))
-#include <mach/mach_init.h>
-#include <mach/mach_host.h>
-
-static inline unsigned
-malloc_ncpus(void)
-{
-	kern_return_t error;
-	natural_t n;
-	processor_info_array_t pinfo;
-	mach_msg_type_number_t pinfocnt;
-
-	error = host_processor_info(mach_host_self(), PROCESSOR_BASIC_INFO,
-				    &n, &pinfo, &pinfocnt);
-	if (error != KERN_SUCCESS)
-		return (1); /* Error. */
-	else
-		return (n);
-}
-#elif (defined(MOZ_MEMORY_SOLARIS))
-
-static inline unsigned
-malloc_ncpus(void)
-{
-	return sysconf(_SC_NPROCESSORS_ONLN);
-}
-#else
-static inline unsigned
-malloc_ncpus(void)
-{
-
-	/*
-	 * We lack a way to determine the number of CPUs on this platform, so
-	 * assume 1 CPU.
-	 */
-	return (1);
-}
-#endif
-#endif
-
 static void
 malloc_print_stats(void)
 {
 
 	if (opt_print_stats) {
 		char s[UMAX2S_BUFSIZE];
 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
 		    "");
@@ -5191,19 +4981,16 @@ malloc_print_stats(void)
 #ifdef MALLOC_XMALLOC
 		_malloc_message(opt_xmalloc ? "X" : "x", "", "", "");
 #endif
 #ifdef MALLOC_FILL
 		_malloc_message(opt_zero ? "Z" : "z", "", "", "");
 #endif
 		_malloc_message("\n", "", "", "");
 
-#ifndef MOZ_MEMORY_NARENAS_DEFAULT_ONE
-		_malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
-#endif
 		_malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n",
 		    "");
 		_malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s),
 		    "\n", "");
 		_malloc_message("Quantum size: ", umax2s(quantum, 10, s), "\n",
 		    "");
 		_malloc_message("Max small size: ", umax2s(small_max, 10, s),
 		    "\n", "");
@@ -5343,25 +5130,18 @@ malloc_init_hard(void)
 	/* Get page size and number of CPUs */
 #ifdef MOZ_MEMORY_WINDOWS
 	{
 		SYSTEM_INFO info;
 
 		GetSystemInfo(&info);
 		result = info.dwPageSize;
 
-#ifndef MOZ_MEMORY_NARENAS_DEFAULT_ONE
-		ncpus = info.dwNumberOfProcessors;
-#endif
 	}
 #else
-#ifndef MOZ_MEMORY_NARENAS_DEFAULT_ONE
-	ncpus = malloc_ncpus();
-#endif
-
 	result = sysconf(_SC_PAGESIZE);
 	assert(result != -1);
 #endif
 
 	/* We assume that the page size is a power of 2. */
 	assert(((result - 1) & result) == 0);
 #ifdef MALLOC_STATIC_SIZES
 	if (pagesize % (size_t) result) {
@@ -5501,22 +5281,16 @@ MALLOC_OUT:
 						opt_chunk_2pow--;
 					break;
 				case 'K':
 					if (opt_chunk_2pow + 1 <
 					    (sizeof(size_t) << 3))
 						opt_chunk_2pow++;
 					break;
 #endif
-				case 'n':
-					opt_narenas_lshift--;
-					break;
-				case 'N':
-					opt_narenas_lshift++;
-					break;
 				case 'p':
 					opt_print_stats = false;
 					break;
 				case 'P':
 					opt_print_stats = true;
 					break;
 #ifndef MALLOC_STATIC_SIZES
 				case 'q':
@@ -5667,95 +5441,34 @@ MALLOC_OUT:
 	/* Initialize base allocation data structures. */
 #ifdef MALLOC_STATS
 	base_mapped = 0;
 	base_committed = 0;
 #endif
 	base_nodes = NULL;
 	malloc_mutex_init(&base_mtx);
 
-#ifdef MOZ_MEMORY_NARENAS_DEFAULT_ONE
 	narenas = 1;
-#else
-	if (ncpus > 1) {
-		/*
-		 * For SMP systems, create four times as many arenas as there
-		 * are CPUs by default.
-		 */
-		opt_narenas_lshift += 2;
-	}
-
-	/* Determine how many arenas to use. */
-	narenas = ncpus;
-#endif
-	if (opt_narenas_lshift > 0) {
-		if ((narenas << opt_narenas_lshift) > narenas)
-			narenas <<= opt_narenas_lshift;
-		/*
-		 * Make sure not to exceed the limits of what base_alloc() can
-		 * handle.
-		 */
-		if (narenas * sizeof(arena_t *) > chunksize)
-			narenas = chunksize / sizeof(arena_t *);
-	} else if (opt_narenas_lshift < 0) {
-		if ((narenas >> -opt_narenas_lshift) < narenas)
-			narenas >>= -opt_narenas_lshift;
-		/* Make sure there is at least one arena. */
-		if (narenas == 0)
-			narenas = 1;
-	}
-
-#ifdef NO_TLS
-	if (narenas > 1) {
-		static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
-		    23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
-		    89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
-		    151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
-		    223, 227, 229, 233, 239, 241, 251, 257, 263};
-		unsigned nprimes, parenas;
-
-		/*
-		 * Pick a prime number of hash arenas that is more than narenas
-		 * so that direct hashing of pthread_self() pointers tends to
-		 * spread allocations evenly among the arenas.
-		 */
-		assert((narenas & 1) == 0); /* narenas must be even. */
-		nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
-		parenas = primes[nprimes - 1]; /* In case not enough primes. */
-		for (i = 1; i < nprimes; i++) {
-			if (primes[i] > narenas) {
-				parenas = primes[i];
-				break;
-			}
-		}
-		narenas = parenas;
-	}
-#endif
-
-#ifndef NO_TLS
-	next_arena = 0;
-#endif
 
 	/* Allocate and initialize arenas. */
 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
 	if (arenas == NULL) {
 #ifndef MOZ_MEMORY_WINDOWS
 		malloc_mutex_unlock(&init_lock);
 #endif
 		return (true);
 	}
 	/*
 	 * Zero the array.  In practice, this should always be pre-zeroed,
 	 * since it was just mmap()ed, but let's be sure.
 	 */
 	memset(arenas, 0, sizeof(arena_t *) * narenas);
 
 	/*
-	 * Initialize one arena here.  The rest are lazily created in
-	 * choose_arena_hard().
+	 * Initialize one arena here.
 	 */
 	arenas_extend(0);
 	if (arenas[0] == NULL) {
 #ifndef MOZ_MEMORY_WINDOWS
 		malloc_mutex_unlock(&init_lock);
 #endif
 		return (true);
 	}