Bug 1364358 - Keep track of mozjemalloc thread-local arenas. r?erahm draft
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 12 May 2017 21:21:11 +0900
changeset 579096 e42ff803a0a7c408970c9cc50532d0b52635586b
parent 579095 48efe052f3cd2f279091a1118ca63fb41be36af1
child 579100 b8f7f776e1198671171b598b554d8777740f9b43
push id59145
push userbmo:mh+mozilla@glandium.org
push dateTue, 16 May 2017 23:08:32 +0000
reviewerserahm
bugs1364358, 1361258
milestone55.0a1
Bug 1364358 - Keep track of mozjemalloc thread-local arenas. r?erahm jemalloc_stats, as well as pre/post-fork hooks are using the `arenas` list along the `narenas` count to iterate over all arenas setup by mozjemalloc. Up until previous commit, that was used for automatic multiple arenas support, which is now removed. But mozjemalloc still supports running with multiple arenas, in the form of opted-in, per-thread arenas. After bug 1361258, those arenas weren't tracked, and now that `arenas` only contains the default arena, we can now fill it with those thread-local arenas. Keeping the automatic multiple arenas support, which we don't use and don't really plan to, would have meant using a separate list for them.
memory/mozjemalloc/jemalloc.c
--- a/memory/mozjemalloc/jemalloc.c
+++ b/memory/mozjemalloc/jemalloc.c
@@ -1345,18 +1345,17 @@ static void	arena_dalloc_large(arena_t *
     void *ptr);
 static void	arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t size, size_t oldsize);
 static bool	arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t size, size_t oldsize);
 static bool	arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
 static bool	arena_new(arena_t *arena);
-static arena_t	*arenas_extend(unsigned ind);
-#define NO_INDEX ((unsigned) -1)
+static arena_t	*arenas_extend();
 static void	*huge_malloc(size_t size, bool zero);
 static void	*huge_palloc(size_t size, size_t alignment, bool zero);
 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
 static void	huge_dalloc(void *ptr);
 static void	malloc_print_stats(void);
 #ifndef MOZ_MEMORY_WINDOWS
 static
 #endif
@@ -2895,40 +2894,51 @@ chunk_dealloc(void *chunk, size_t size)
 /*
  * End chunk management functions.
  */
 /******************************************************************************/
 /*
  * Begin arena.
  */
 
-MOZ_JEMALLOC_API void
-jemalloc_thread_local_arena_impl(bool enabled)
+static inline arena_t *
+thread_local_arena(bool enabled)
 {
 #ifndef NO_TLS
 	arena_t *arena;
 
 	if (enabled) {
 		/* The arena will essentially be leaked if this function is
 		 * called with `false`, but it doesn't matter at the moment.
 		 * because in practice nothing actually calls this function
 		 * with `false`, except maybe at shutdown. */
-		arena = arenas_extend(NO_INDEX);
+		arena = arenas_extend();
 	} else {
+		malloc_spin_lock(&arenas_lock);
 		arena = arenas[0];
+		malloc_spin_unlock(&arenas_lock);
 	}
 #ifdef MOZ_MEMORY_WINDOWS
 	TlsSetValue(tlsIndex, arena);
 #elif defined(MOZ_MEMORY_DARWIN)
 	pthread_setspecific(tlsIndex, arena);
 #else
 	arenas_map = arena;
 #endif
 
-#endif
+	return arena;
+#else
+	return arenas[0];
+#endif
+}
+
+MOZ_JEMALLOC_API void
+jemalloc_thread_local_arena_impl(bool enabled)
+{
+	thread_local_arena(enabled);
 }
 
 /*
  * Choose an arena based on a per-thread value.
  */
 static inline arena_t *
 choose_arena(void)
 {
@@ -2944,22 +2954,23 @@ choose_arena(void)
 #  ifdef MOZ_MEMORY_WINDOWS
 	ret = (arena_t*)TlsGetValue(tlsIndex);
 #  elif defined(MOZ_MEMORY_DARWIN)
 	ret = (arena_t*)pthread_getspecific(tlsIndex);
 #  else
 	ret = arenas_map;
 #  endif
 
-	if (ret == NULL)
-#endif
-	{
-		ret = arenas[0];
-		RELEASE_ASSERT(ret != NULL);
+	if (ret == NULL) {
+                ret = thread_local_arena(false);
 	}
+#else
+	ret = arenas[0];
+#endif
+	RELEASE_ASSERT(ret != NULL);
 	return (ret);
 }
 
 static inline int
 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
 {
 	uintptr_t a_chunk = (uintptr_t)a;
 	uintptr_t b_chunk = (uintptr_t)b;
@@ -4692,45 +4703,82 @@ arena_new(arena_t *arena)
 
 #if defined(MALLOC_DEBUG) || defined(MOZ_JEMALLOC_HARD_ASSERTS)
 	arena->magic = ARENA_MAGIC;
 #endif
 
 	return (false);
 }
 
-/* Create a new arena and insert it into the arenas array at index ind. */
-static arena_t *
-arenas_extend(unsigned ind)
+static inline arena_t *
+arenas_fallback()
 {
-	arena_t *ret;
-
-	/* Allocate enough space for trailing bins. */
-	ret = (arena_t *)base_alloc(sizeof(arena_t)
-	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
-	if (ret != NULL && arena_new(ret) == false) {
-		if (ind != NO_INDEX) {
-			arenas[ind] = ret;
-		}
-		return (ret);
-	}
 	/* Only reached if there is an OOM error. */
 
 	/*
 	 * OOM here is quite inconvenient to propagate, since dealing with it
 	 * would require a check for failure in the fast path.  Instead, punt
-	 * by using arenas[0].  In practice, this is an extremely unlikely
-	 * failure.
+	 * by using arenas[0].
+	 * In practice, this is an extremely unlikely failure.
 	 */
 	_malloc_message(_getprogname(),
 	    ": (malloc) Error initializing arena\n", "", "");
 	if (opt_abort)
 		abort();
 
-	return (arenas[0]);
+	return arenas[0];
+}
+
+/* Create a new arena and return it. */
+static arena_t *
+arenas_extend()
+{
+	/*
+	 * The list of arenas is first allocated to contain at most 16 elements,
+	 * and when the limit is reached, the list is grown such that it can
+	 * contain 16 more elements.
+	 */
+	const size_t arenas_growth = 16;
+	arena_t *ret;
+
+
+	/* Allocate enough space for trailing bins. */
+	ret = (arena_t *)base_alloc(sizeof(arena_t)
+	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
+	if (ret == NULL || arena_new(ret)) {
+		return arenas_fallback();
+        }
+
+	malloc_spin_lock(&arenas_lock);
+
+	/* Allocate and initialize arenas. */
+	if (narenas % arenas_growth == 0) {
+		size_t max_arenas = ((narenas + arenas_growth) / arenas_growth) * arenas_growth;
+		/*
+		 * We're unfortunately leaking the previous allocation ;
+		 * the base allocator doesn't know how to free things
+		 */
+		arena_t** new_arenas = (arena_t **)base_alloc(sizeof(arena_t *) * max_arenas);
+		if (new_arenas == NULL) {
+			ret = arenas ? arenas_fallback() : NULL;
+			malloc_spin_unlock(&arenas_lock);
+			return (ret);
+		}
+		memcpy(new_arenas, arenas, narenas * sizeof(arena_t *));
+		/*
+		 * Zero the array.  In practice, this should always be pre-zeroed,
+		 * since it was just mmap()ed, but let's be sure.
+		 */
+		memset(new_arenas + narenas, 0, sizeof(arena_t *) * (max_arenas - narenas));
+		arenas = new_arenas;
+	}
+	arenas[narenas++] = ret;
+
+	malloc_spin_unlock(&arenas_lock);
+	return (ret);
 }
 
 /*
  * End arena.
  */
 /******************************************************************************/
 /*
  * Begin general internal functions.
@@ -5006,27 +5054,29 @@ malloc_print_stats(void)
 		{
 			size_t allocated, mapped = 0;
 			unsigned i;
 			arena_t *arena;
 
 			/* Calculate and print allocated/mapped stats. */
 
 			/* arenas. */
+			malloc_spin_lock(&arenas_lock);
 			for (i = 0, allocated = 0; i < narenas; i++) {
 				if (arenas[i] != NULL) {
 					malloc_spin_lock(&arenas[i]->lock);
 					allocated +=
 					    arenas[i]->stats.allocated_small;
 					allocated +=
 					    arenas[i]->stats.allocated_large;
 					mapped += arenas[i]->stats.mapped;
 					malloc_spin_unlock(&arenas[i]->lock);
 				}
 			}
+			malloc_spin_unlock(&arenas_lock);
 
 			/* huge/base. */
 			malloc_mutex_lock(&huge_mtx);
 			allocated += huge_allocated;
 			mapped += huge_mapped;
 			malloc_mutex_unlock(&huge_mtx);
 
 			malloc_mutex_lock(&base_mtx);
@@ -5046,27 +5096,29 @@ malloc_print_stats(void)
 			    "huge: nmalloc      ndalloc    allocated\n");
 #ifdef MOZ_MEMORY_WINDOWS
 			malloc_printf(" %12llu %12llu %12lu\n",
 			    huge_nmalloc, huge_ndalloc, huge_allocated);
 #else
 			malloc_printf(" %12llu %12llu %12zu\n",
 			    huge_nmalloc, huge_ndalloc, huge_allocated);
 #endif
+			malloc_spin_lock(&arenas_lock);
 			/* Print stats for each arena. */
 			for (i = 0; i < narenas; i++) {
 				arena = arenas[i];
 				if (arena != NULL) {
 					malloc_printf(
 					    "\narenas[%u]:\n", i);
 					malloc_spin_lock(&arena->lock);
 					stats_print(arena);
 					malloc_spin_unlock(&arena->lock);
 				}
 			}
+			malloc_spin_unlock(&arenas_lock);
 		}
 #endif /* #ifdef MALLOC_STATS */
 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
 	}
 }
 
 /*
  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
@@ -5441,37 +5493,23 @@ MALLOC_OUT:
 	/* Initialize base allocation data structures. */
 #ifdef MALLOC_STATS
 	base_mapped = 0;
 	base_committed = 0;
 #endif
 	base_nodes = NULL;
 	malloc_mutex_init(&base_mtx);
 
-	narenas = 1;
-
-	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
-	if (arenas == NULL) {
-#ifndef MOZ_MEMORY_WINDOWS
-		malloc_mutex_unlock(&init_lock);
-#endif
-		return (true);
-	}
-	/*
-	 * Zero the array.  In practice, this should always be pre-zeroed,
-	 * since it was just mmap()ed, but let's be sure.
-	 */
-	memset(arenas, 0, sizeof(arena_t *) * narenas);
+	malloc_spin_init(&arenas_lock);
 
 	/*
 	 * Initialize one arena here.
 	 */
-	arenas_extend(0);
-	if (arenas[0] == NULL) {
+	arenas_extend();
+	if (arenas == NULL || arenas[0] == NULL) {
 #ifndef MOZ_MEMORY_WINDOWS
 		malloc_mutex_unlock(&init_lock);
 #endif
 		return (true);
 	}
 #ifndef NO_TLS
 	/*
 	 * Assign the initial arena to the initial thread, in order to avoid
@@ -5482,18 +5520,16 @@ MALLOC_OUT:
 	TlsSetValue(tlsIndex, arenas[0]);
 #elif defined(MOZ_MEMORY_DARWIN)
 	pthread_setspecific(tlsIndex, arenas[0]);
 #else
 	arenas_map = arenas[0];
 #endif
 #endif
 
-	malloc_spin_init(&arenas_lock);
-
 #ifdef MALLOC_VALIDATE
 	chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);
 	if (chunk_rtree == NULL)
 		return (true);
 #endif
 
 	malloc_initialized = true;
 
@@ -5980,16 +6016,17 @@ jemalloc_stats_impl(jemalloc_stats_t *st
 
 	/* Get base mapped/allocated. */
 	malloc_mutex_lock(&base_mtx);
 	non_arena_mapped += base_mapped;
 	stats->bookkeeping += base_committed;
 	assert(base_mapped >= base_committed);
 	malloc_mutex_unlock(&base_mtx);
 
+	malloc_spin_lock(&arenas_lock);
 	/* Iterate over arenas. */
 	for (i = 0; i < narenas; i++) {
 		arena_t *arena = arenas[i];
 		size_t arena_mapped, arena_allocated, arena_committed, arena_dirty, j,
 		    arena_unused, arena_headers;
 		arena_run_t* run;
 		arena_chunk_map_t* mapelm;
 
@@ -6039,16 +6076,17 @@ jemalloc_stats_impl(jemalloc_stats_t *st
 		stats->mapped += arena_mapped;
 		stats->allocated += arena_allocated;
 		stats->page_cache += arena_dirty;
 		stats->waste += arena_committed -
 		    arena_allocated - arena_dirty - arena_unused - arena_headers;
 		stats->bin_unused += arena_unused;
 		stats->bookkeeping += arena_headers;
 	}
+	malloc_spin_unlock(&arenas_lock);
 
 	/* Account for arena chunk headers in bookkeeping rather than waste. */
 	chunk_header_size =
 	    ((stats->mapped / stats->chunksize) * arena_chunk_header_npages) <<
 	    pagesize_2pow;
 
 	stats->mapped += non_arena_mapped;
 	stats->bookkeeping += chunk_header_size;
@@ -6106,21 +6144,23 @@ hard_purge_arena(arena_t *arena)
 
 	malloc_spin_unlock(&arena->lock);
 }
 
 MOZ_JEMALLOC_API void
 jemalloc_purge_freed_pages_impl()
 {
 	size_t i;
+	malloc_spin_lock(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
 		arena_t *arena = arenas[i];
 		if (arena != NULL)
 			hard_purge_arena(arena);
 	}
+	malloc_spin_unlock(&arenas_lock);
 	if (!config_munmap || config_recycle) {
 		malloc_mutex_lock(&chunks_mtx);
 		extent_node_t *node = extent_tree_szad_first(&chunks_szad_mmap);
 		while (node) {
 			pages_decommit(node->addr, node->size);
 			pages_commit(node->addr, node->size);
 			node->zeroed = true;
 			node = extent_tree_szad_next(&chunks_szad_mmap, node);
@@ -6185,25 +6225,27 @@ size_t
 	return malloc_usable_size_impl(ptr);
 }
 #endif
 
 MOZ_JEMALLOC_API void
 jemalloc_free_dirty_pages_impl(void)
 {
 	size_t i;
+	malloc_spin_lock(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
 		arena_t *arena = arenas[i];
 
 		if (arena != NULL) {
 			malloc_spin_lock(&arena->lock);
 			arena_purge(arena, true);
 			malloc_spin_unlock(&arena->lock);
 		}
 	}
+	malloc_spin_unlock(&arenas_lock);
 }
 
 /*
  * End non-standard functions.
  */
 /******************************************************************************/
 /*
  * Begin library-private functions, used by threading libraries for protection