Bug 1397101 - Reduce the number of dirty pages we allow to be kept in thread local arenas. r?njn
Until
bug 1361258, there was only ever one mozjemalloc arena, and the
number of dirty pages we allow to be kept dirty, fixed to 1MB per arena,
was, in fact, 1MB for an entire process.
With stylo using thread local arenas, we now can have multiple arenas
per process, multiplying that number of dirty pages.
While those dirty pages may be reused later on, when other allocations
end up filling them later on, the fact that a relatively large number of
them is kept around for each stylo thread (in proportion to the amount of
memory ever allocated by stylo), combined with the fact that the memory
use from stylo depends on the workload generated by the pages being
visited, those dirty pages may very well not be used for a rather long
time. This is less of a problem with the main arena, used for most
everything else.
So, for each arena except the main one, we decrease the number of dirty
pages we allow to be kept around to 1/8 of the current value. We do this
by introducing a per-arena configuration of that maximum number.
--- a/memory/build/mozjemalloc.cpp
+++ b/memory/build/mozjemalloc.cpp
@@ -728,16 +728,20 @@ struct arena_t {
/*
* Current count of pages within unused runs that are potentially
* dirty, and for which madvise(... MADV_FREE) has not been called. By
* tracking this, we can institute a limit on how much dirty unused
* memory is mapped for each arena.
*/
size_t ndirty;
+ /*
+ * Maximum value allowed for ndirty.
+ */
+ size_t dirty_max;
/*
* Size/address-ordered tree of this arena's available runs. This tree
* is used for first-best-fit run allocation.
*/
arena_avail_tree_t runs_avail;
/*
@@ -2792,26 +2796,26 @@ arena_run_alloc(arena_t *arena, arena_bi
}
static void
arena_purge(arena_t *arena, bool all)
{
arena_chunk_t *chunk;
size_t i, npages;
/* If all is set purge all dirty pages. */
- size_t dirty_max = all ? 1 : opt_dirty_max;
+ size_t dirty_max = all ? 1 : arena->dirty_max;
#ifdef MOZ_DEBUG
size_t ndirty = 0;
rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
chunk) {
ndirty += chunk->ndirty;
} rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
MOZ_ASSERT(ndirty == arena->ndirty);
#endif
- MOZ_DIAGNOSTIC_ASSERT(all || (arena->ndirty > opt_dirty_max));
+ MOZ_DIAGNOSTIC_ASSERT(all || (arena->ndirty > arena->dirty_max));
/*
* Iterate downward through chunks until enough dirty memory has been
* purged. Terminate as soon as possible in order to minimize the
* number of system calls, even if a chunk has only been partially
* purged.
*/
while (arena->ndirty > (dirty_max >> 1)) {
@@ -2981,18 +2985,18 @@ arena_run_dalloc(arena_t *arena, arena_r
/* Insert into runs_avail, now that coalescing is complete. */
arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
/* Deallocate chunk if it is now completely unused. */
if ((chunk->map[arena_chunk_header_npages].bits & (~pagesize_mask |
CHUNK_MAP_ALLOCATED)) == arena_maxclass)
arena_chunk_dealloc(arena, chunk);
- /* Enforce opt_dirty_max. */
- if (arena->ndirty > opt_dirty_max)
+ /* Enforce dirty_max. */
+ if (arena->ndirty > arena->dirty_max)
arena_purge(arena, false);
}
static void
arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
size_t oldsize, size_t newsize)
{
size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> pagesize_2pow;
@@ -4021,16 +4025,19 @@ arena_new(arena_t *arena)
/* Initialize chunks. */
arena_chunk_tree_dirty_new(&arena->chunks_dirty);
#ifdef MALLOC_DOUBLE_PURGE
new (&arena->chunks_madvised) mozilla::DoublyLinkedList<arena_chunk_t>();
#endif
arena->spare = nullptr;
arena->ndirty = 0;
+ // Reduce the maximum amount of dirty pages we allow to be kept on
+ // thread local arenas. TODO: make this more flexible.
+ arena->dirty_max = opt_dirty_max >> 3;
arena_avail_tree_new(&arena->runs_avail);
/* Initialize bins. */
prev_run_size = pagesize;
/* (2^n)-spaced tiny bins. */
for (i = 0; i < ntbins; i++) {
@@ -4615,16 +4622,20 @@ MALLOC_OUT:
*/
arenas_extend();
if (!arenas || !arenas[0]) {
#ifndef XP_WIN
malloc_mutex_unlock(&init_lock);
#endif
return true;
}
+ /* arena_new() sets this to a lower value for thread local arenas;
+ * reset to the default value for the main arenas */
+ arenas[0]->dirty_max = opt_dirty_max;
+
#ifndef NO_TLS
/*
* Assign the initial arena to the initial thread.
*/
thread_arena.set(arenas[0]);
#endif
chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);