Bug 1397101 - Reduce the number of dirty pages we allow to be kept in thread local arenas. r?njn draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 14 Sep 2017 07:26:30 +0900
changeset 664554 27313c255db72920dcf0cd87f32648c09e9a6604
parent 664545 3b18e792cc3eb0bf381a422511c5bdde66f10af8
child 664555 7fbd7a84416c8e129665ecce4f111c0346d33f10
push id79720
push userbmo:mh+mozilla@glandium.org
push dateThu, 14 Sep 2017 02:03:42 +0000
reviewersnjn
bugs1397101, 1361258
milestone57.0a1
Bug 1397101 - Reduce the number of dirty pages we allow to be kept in thread local arenas. r?njn Until bug 1361258, there was only ever one mozjemalloc arena, and the number of dirty pages we allow to be kept dirty, fixed to 1MB per arena, was, in fact, 1MB for an entire process. With stylo using thread local arenas, we now can have multiple arenas per process, multiplying that number of dirty pages. While those dirty pages may be reused later on, when other allocations end up filling them later on, the fact that a relatively large number of them is kept around for each stylo thread (in proportion to the amount of memory ever allocated by stylo), combined with the fact that the memory use from stylo depends on the workload generated by the pages being visited, those dirty pages may very well not be used for a rather long time. This is less of a problem with the main arena, used for most everything else. So, for each arena except the main one, we decrease the number of dirty pages we allow to be kept around to 1/8 of the current value. We do this by introducing a per-arena configuration of that maximum number.
memory/build/mozjemalloc.cpp
--- a/memory/build/mozjemalloc.cpp
+++ b/memory/build/mozjemalloc.cpp
@@ -728,16 +728,20 @@ struct arena_t {
 
 	/*
 	 * Current count of pages within unused runs that are potentially
 	 * dirty, and for which madvise(... MADV_FREE) has not been called.  By
 	 * tracking this, we can institute a limit on how much dirty unused
 	 * memory is mapped for each arena.
 	 */
 	size_t			ndirty;
+	/*
+	 * Maximum value allowed for ndirty.
+	 */
+	size_t			dirty_max;
 
 	/*
 	 * Size/address-ordered tree of this arena's available runs.  This tree
 	 * is used for first-best-fit run allocation.
 	 */
 	arena_avail_tree_t	runs_avail;
 
 	/*
@@ -2792,26 +2796,26 @@ arena_run_alloc(arena_t *arena, arena_bi
 }
 
 static void
 arena_purge(arena_t *arena, bool all)
 {
 	arena_chunk_t *chunk;
 	size_t i, npages;
 	/* If all is set purge all dirty pages. */
-	size_t dirty_max = all ? 1 : opt_dirty_max;
+	size_t dirty_max = all ? 1 : arena->dirty_max;
 #ifdef MOZ_DEBUG
 	size_t ndirty = 0;
 	rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
 	    chunk) {
 		ndirty += chunk->ndirty;
 	} rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
 	MOZ_ASSERT(ndirty == arena->ndirty);
 #endif
-	MOZ_DIAGNOSTIC_ASSERT(all || (arena->ndirty > opt_dirty_max));
+	MOZ_DIAGNOSTIC_ASSERT(all || (arena->ndirty > arena->dirty_max));
 
 	/*
 	 * Iterate downward through chunks until enough dirty memory has been
 	 * purged.  Terminate as soon as possible in order to minimize the
 	 * number of system calls, even if a chunk has only been partially
 	 * purged.
 	 */
 	while (arena->ndirty > (dirty_max >> 1)) {
@@ -2981,18 +2985,18 @@ arena_run_dalloc(arena_t *arena, arena_r
 	/* Insert into runs_avail, now that coalescing is complete. */
 	arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
 
 	/* Deallocate chunk if it is now completely unused. */
 	if ((chunk->map[arena_chunk_header_npages].bits & (~pagesize_mask |
 	    CHUNK_MAP_ALLOCATED)) == arena_maxclass)
 		arena_chunk_dealloc(arena, chunk);
 
-	/* Enforce opt_dirty_max. */
-	if (arena->ndirty > opt_dirty_max)
+	/* Enforce dirty_max. */
+	if (arena->ndirty > arena->dirty_max)
 		arena_purge(arena, false);
 }
 
 static void
 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     size_t oldsize, size_t newsize)
 {
 	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> pagesize_2pow;
@@ -4021,16 +4025,19 @@ arena_new(arena_t *arena)
 	/* Initialize chunks. */
 	arena_chunk_tree_dirty_new(&arena->chunks_dirty);
 #ifdef MALLOC_DOUBLE_PURGE
 	new (&arena->chunks_madvised) mozilla::DoublyLinkedList<arena_chunk_t>();
 #endif
 	arena->spare = nullptr;
 
 	arena->ndirty = 0;
+	// Reduce the maximum amount of dirty pages we allow to be kept on
+	// thread local arenas. TODO: make this more flexible.
+	arena->dirty_max = opt_dirty_max >> 3;
 
 	arena_avail_tree_new(&arena->runs_avail);
 
 	/* Initialize bins. */
 	prev_run_size = pagesize;
 
 	/* (2^n)-spaced tiny bins. */
 	for (i = 0; i < ntbins; i++) {
@@ -4615,16 +4622,20 @@ MALLOC_OUT:
    */
   arenas_extend();
   if (!arenas || !arenas[0]) {
 #ifndef XP_WIN
     malloc_mutex_unlock(&init_lock);
 #endif
     return true;
   }
+  /* arena_new() sets this to a lower value for thread local arenas;
+   * reset to the default value for the main arenas */
+  arenas[0]->dirty_max = opt_dirty_max;
+
 #ifndef NO_TLS
   /*
    * Assign the initial arena to the initial thread.
    */
   thread_arena.set(arenas[0]);
 #endif
 
   chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);