--- a/memory/mozjemalloc/jemalloc.c
+++ b/memory/mozjemalloc/jemalloc.c
@@ -188,23 +188,16 @@
/*
* MALLOC_VALIDATE causes malloc_usable_size() to perform some pointer
* validation. There are many possible errors that validation does not even
* attempt to detect.
*/
#define MALLOC_VALIDATE
-/*
- * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
- * re-balances arena load if exponentially averaged contention exceeds a
- * certain threshold.
- */
-/* #define MALLOC_BALANCE */
-
#if defined(MOZ_MEMORY_LINUX) && !defined(MOZ_MEMORY_ANDROID)
#define _GNU_SOURCE /* For mremap(2). */
#if 0 /* Enable in order to test decommit code on Linux. */
# define MALLOC_DECOMMIT
#endif
#endif
#include <sys/types.h>
@@ -235,17 +228,16 @@
#define STDERR_FILENO 2
#define PATH_MAX MAX_PATH
#define vsnprintf _vsnprintf
#ifndef NO_TLS
static unsigned long tlsIndex = 0xffffffff;
#endif
-#define __thread
#define _pthread_self() __threadid()
/* use MSVC intrinsics */
#pragma intrinsic(_BitScanForward)
static __forceinline int
ffs(int x)
{
unsigned long i;
@@ -514,23 +506,16 @@ static pthread_key_t tlsIndex;
# define SIZEOF_INT_2POW 2
#endif
/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
#if (!defined(PIC) && !defined(NO_TLS))
# define NO_TLS
#endif
-#ifdef NO_TLS
- /* MALLOC_BALANCE requires TLS. */
-# ifdef MALLOC_BALANCE
-# undef MALLOC_BALANCE
-# endif
-#endif
-
/*
* Size and alignment of memory chunks that are allocated by the OS's virtual
* memory system.
*/
#define CHUNK_2POW_DEFAULT 20
/* Maximum number of dirty pages per arena. */
#define DIRTY_MAX_DEFAULT (1U << 8)
@@ -600,34 +585,16 @@ static pthread_key_t tlsIndex;
/*
* Conversion from spinning to blocking is expensive; we use (1U <<
* BLOCK_COST_2POW) to estimate how many more times costly blocking is than
* worst-case spinning.
*/
#define BLOCK_COST_2POW 4
-#ifdef MALLOC_BALANCE
- /*
- * We use an exponential moving average to track recent lock contention,
- * where the size of the history window is N, and alpha=2/(N+1).
- *
- * Due to integer math rounding, very small values here can cause
- * substantial degradation in accuracy, thus making the moving average decay
- * faster than it would with precise calculation.
- */
-# define BALANCE_ALPHA_INV_2POW 9
-
- /*
- * Threshold value for the exponential moving contention average at which to
- * re-assign a thread.
- */
-# define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
-#endif
-
/******************************************************************************/
/* MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive. */
#if defined(MALLOC_DECOMMIT) && defined(MALLOC_DOUBLE_PURGE)
#error MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
#endif
/*
@@ -731,21 +698,16 @@ struct arena_stats_s {
/* Per-size-category statistics. */
size_t allocated_small;
uint64_t nmalloc_small;
uint64_t ndalloc_small;
size_t allocated_large;
uint64_t nmalloc_large;
uint64_t ndalloc_large;
-
-#ifdef MALLOC_BALANCE
- /* Number of times this arena reassigned a thread due to contention. */
- uint64_t nbalance;
-#endif
};
#endif /* #ifdef MALLOC_STATS */
/******************************************************************************/
/*
* Extent data structures.
*/
@@ -1020,24 +982,16 @@ struct arena_s {
size_t ndirty;
/*
* Size/address-ordered tree of this arena's available runs. This tree
* is used for first-best-fit run allocation.
*/
arena_avail_tree_t runs_avail;
-#ifdef MALLOC_BALANCE
- /*
- * The arena load balancing machinery needs to keep track of how much
- * lock contention there is. This value is exponentially averaged.
- */
- uint32_t contention;
-#endif
-
/*
* bins is used to store rings of free regions of the following sizes,
* assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
*
* bins[i] | size |
* --------+------+
* 0 | 2 |
* 1 | 4 |
@@ -1266,21 +1220,17 @@ static size_t base_committed;
/*
* Arenas that are used to service external requests. Not all elements of the
* arenas array are necessarily used; arenas are created lazily as needed.
*/
static arena_t **arenas;
static unsigned narenas;
#ifndef NO_TLS
-# ifdef MALLOC_BALANCE
-static unsigned narenas_2pow;
-# else
static unsigned next_arena;
-# endif
#endif
#ifdef MOZ_MEMORY
static malloc_spinlock_t arenas_lock; /* Protects arenas initialization. */
#else
static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
#endif
#ifndef NO_TLS
@@ -1312,19 +1262,16 @@ static bool opt_abort = false;
#ifdef MALLOC_FILL
static const bool opt_junk = false;
static const bool opt_poison = true;
static const bool opt_zero = false;
#endif
#endif
static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
-#ifdef MALLOC_BALANCE
-static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
-#endif
static bool opt_print_stats = false;
#ifdef MALLOC_STATIC_SIZES
#define opt_quantum_2pow QUANTUM_2POW_MIN
#define opt_small_max_2pow SMALL_MAX_2POW_DEFAULT
#define opt_chunk_2pow CHUNK_2POW_DEFAULT
#else
static size_t opt_quantum_2pow = QUANTUM_2POW_MIN;
static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
@@ -1409,19 +1356,16 @@ static void arena_purge(arena_t *arena,
static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, size_t oldsize, size_t newsize);
static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
-#ifdef MALLOC_BALANCE
-static void arena_lock_balance_hard(arena_t *arena);
-#endif
static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
size_t alloc_size);
static size_t arena_salloc(const void *ptr);
static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
void *ptr);
static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
void *ptr, size_t size, size_t oldsize);
@@ -1831,66 +1775,16 @@ pow2_ceil(size_t x)
x |= x >> 16;
#if (SIZEOF_PTR == 8)
x |= x >> 32;
#endif
x++;
return (x);
}
-#ifdef MALLOC_BALANCE
-/*
- * Use a simple linear congruential pseudo-random number generator:
- *
- * prn(y) = (a*x + c) % m
- *
- * where the following constants ensure maximal period:
- *
- * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
- * c == Odd number (relatively prime to 2^n).
- * m == 2^32
- *
- * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
- *
- * This choice of m has the disadvantage that the quality of the bits is
- * proportional to bit position. For example. the lowest bit has a cycle of 2,
- * the next has a cycle of 4, etc. For this reason, we prefer to use the upper
- * bits.
- */
-# define PRN_DEFINE(suffix, var, a, c) \
-static inline void \
-sprn_##suffix(uint32_t seed) \
-{ \
- var = seed; \
-} \
- \
-static inline uint32_t \
-prn_##suffix(uint32_t lg_range) \
-{ \
- uint32_t ret, x; \
- \
- assert(lg_range > 0); \
- assert(lg_range <= 32); \
- \
- x = (var * (a)) + (c); \
- var = x; \
- ret = x >> (32 - lg_range); \
- \
- return (ret); \
-}
-# define SPRN(suffix, seed) sprn_##suffix(seed)
-# define PRN(suffix, lg_range) prn_##suffix(lg_range)
-#endif
-
-#ifdef MALLOC_BALANCE
-/* Define the PRNG used for arena assignment. */
-static __thread uint32_t balance_x;
-PRN_DEFINE(balance, balance_x, 1297, 1301)
-#endif
-
#ifdef MALLOC_UTRACE
static int
utrace(const void *addr, size_t len)
{
malloc_utrace_t *ut = (malloc_utrace_t *)addr;
char buf_a[UMAX2S_BUFSIZE];
char buf_b[UMAX2S_BUFSIZE];
@@ -3152,39 +3046,22 @@ choose_arena(void)
*/
static arena_t *
choose_arena_hard(void)
{
arena_t *ret;
assert(isthreaded);
-#ifdef MALLOC_BALANCE
- /* Seed the PRNG used for arena load balancing. */
- SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
-#endif
-
if (narenas > 1) {
-#ifdef MALLOC_BALANCE
- unsigned ind;
-
- ind = PRN(balance, narenas_2pow);
- if ((ret = arenas[ind]) == NULL) {
- malloc_spin_lock(&arenas_lock);
- if ((ret = arenas[ind]) == NULL)
- ret = arenas_extend(ind);
- malloc_spin_unlock(&arenas_lock);
- }
-#else
malloc_spin_lock(&arenas_lock);
if ((ret = arenas[next_arena]) == NULL)
ret = arenas_extend(next_arena);
next_arena = (next_arena + 1) % narenas;
malloc_spin_unlock(&arenas_lock);
-#endif
} else
ret = arenas[0];
#ifdef MOZ_MEMORY_WINDOWS
TlsSetValue(tlsIndex, ret);
#elif defined(MOZ_MEMORY_DARWIN)
pthread_setspecific(tlsIndex, ret);
#else
@@ -4110,79 +3987,16 @@ arena_bin_run_size_calc(arena_bin_t *bin
bin->run_size = good_run_size;
bin->nregs = good_nregs;
bin->regs_mask_nelms = good_mask_nelms;
bin->reg0_offset = good_reg0_offset;
return (good_run_size);
}
-#ifdef MALLOC_BALANCE
-static inline void
-arena_lock_balance(arena_t *arena)
-{
- unsigned contention;
-
- contention = malloc_spin_lock(&arena->lock);
- if (narenas > 1) {
- /*
- * Calculate the exponentially averaged contention for this
- * arena. Due to integer math always rounding down, this value
- * decays somewhat faster then normal.
- */
- arena->contention = (((uint64_t)arena->contention
- * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
- + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
- if (arena->contention >= opt_balance_threshold)
- arena_lock_balance_hard(arena);
- }
-}
-
-static void
-arena_lock_balance_hard(arena_t *arena)
-{
- uint32_t ind;
-
- arena->contention = 0;
-#ifdef MALLOC_STATS
- arena->stats.nbalance++;
-#endif
- ind = PRN(balance, narenas_2pow);
- if (arenas[ind] != NULL) {
-#ifdef MOZ_MEMORY_WINDOWS
- TlsSetValue(tlsIndex, arenas[ind]);
-#elif defined(MOZ_MEMORY_DARWIN)
- pthread_setspecific(tlsIndex, arenas[ind]);
-#else
- arenas_map = arenas[ind];
-#endif
- } else {
- malloc_spin_lock(&arenas_lock);
- if (arenas[ind] != NULL) {
-#ifdef MOZ_MEMORY_WINDOWS
- TlsSetValue(tlsIndex, arenas[ind]);
-#elif defined(MOZ_MEMORY_DARWIN)
- pthread_setspecific(tlsIndex, arenas[ind]);
-#else
- arenas_map = arenas[ind];
-#endif
- } else {
-#ifdef MOZ_MEMORY_WINDOWS
- TlsSetValue(tlsIndex, arenas_extend(ind));
-#elif defined(MOZ_MEMORY_DARWIN)
- pthread_setspecific(tlsIndex, arenas_extend(ind));
-#else
- arenas_map = arenas_extend(ind);
-#endif
- }
- malloc_spin_unlock(&arenas_lock);
- }
-}
-#endif
-
static inline void *
arena_malloc_small(arena_t *arena, size_t size, bool zero)
{
void *ret;
arena_bin_t *bin;
arena_run_t *run;
if (size < small_min) {
@@ -4207,21 +4021,17 @@ arena_malloc_small(arena_t *arena, size_
} else {
/* Sub-page. */
size = pow2_ceil(size);
bin = &arena->bins[ntbins + nqbins
+ (ffs((int)(size >> opt_small_max_2pow)) - 2)];
}
RELEASE_ASSERT(size == bin->reg_size);
-#ifdef MALLOC_BALANCE
- arena_lock_balance(arena);
-#else
malloc_spin_lock(&arena->lock);
-#endif
if ((run = bin->runcur) != NULL && run->nfree > 0)
ret = arena_bin_malloc_easy(arena, bin, run);
else
ret = arena_bin_malloc_hard(arena, bin);
if (ret == NULL) {
malloc_spin_unlock(&arena->lock);
return (NULL);
@@ -4249,21 +4059,17 @@ arena_malloc_small(arena_t *arena, size_
static void *
arena_malloc_large(arena_t *arena, size_t size, bool zero)
{
void *ret;
/* Large allocation. */
size = PAGE_CEILING(size);
-#ifdef MALLOC_BALANCE
- arena_lock_balance(arena);
-#else
malloc_spin_lock(&arena->lock);
-#endif
ret = (void *)arena_run_alloc(arena, NULL, size, true, zero);
if (ret == NULL) {
malloc_spin_unlock(&arena->lock);
return (NULL);
}
#ifdef MALLOC_STATS
arena->stats.nmalloc_large++;
arena->stats.allocated_large += size;
@@ -4325,21 +4131,17 @@ arena_palloc(arena_t *arena, size_t alig
{
void *ret;
size_t offset;
arena_chunk_t *chunk;
assert((size & pagesize_mask) == 0);
assert((alignment & pagesize_mask) == 0);
-#ifdef MALLOC_BALANCE
- arena_lock_balance(arena);
-#else
malloc_spin_lock(&arena->lock);
-#endif
ret = (void *)arena_run_alloc(arena, NULL, alloc_size, true, false);
if (ret == NULL) {
malloc_spin_unlock(&arena->lock);
return (NULL);
}
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
@@ -4750,41 +4552,33 @@ arena_ralloc_large_shrink(arena_t *arena
{
assert(size < oldsize);
/*
* Shrink the run, and make trailing pages available for other
* allocations.
*/
-#ifdef MALLOC_BALANCE
- arena_lock_balance(arena);
-#else
malloc_spin_lock(&arena->lock);
-#endif
arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
true);
#ifdef MALLOC_STATS
arena->stats.allocated_large -= oldsize - size;
#endif
malloc_spin_unlock(&arena->lock);
}
static bool
arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t size, size_t oldsize)
{
size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow;
size_t npages = oldsize >> pagesize_2pow;
-#ifdef MALLOC_BALANCE
- arena_lock_balance(arena);
-#else
malloc_spin_lock(&arena->lock);
-#endif
RELEASE_ASSERT(oldsize == (chunk->map[pageind].bits & ~pagesize_mask));
/* Try to extend the run. */
assert(size > oldsize);
if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
& CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
~pagesize_mask) >= size - oldsize) {
/*
@@ -4956,20 +4750,16 @@ arena_new(arena_t *arena)
LinkedList_Init(&arena->chunks_madvised);
#endif
arena->spare = NULL;
arena->ndirty = 0;
arena_avail_tree_new(&arena->runs_avail);
-#ifdef MALLOC_BALANCE
- arena->contention = 0;
-#endif
-
/* Initialize bins. */
prev_run_size = pagesize;
/* (2^n)-spaced tiny bins. */
for (i = 0; i < ntbins; i++) {
bin = &arena->bins[i];
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
@@ -5422,20 +5212,16 @@ malloc_print_stats(void)
#endif
_malloc_message("\n", "", "", "");
#ifndef MOZ_MEMORY_NARENAS_DEFAULT_ONE
_malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
#endif
_malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n",
"");
-#ifdef MALLOC_BALANCE
- _malloc_message("Arena balance threshold: ",
- umax2s(opt_balance_threshold, 10, s), "\n", "");
-#endif
_malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s),
"\n", "");
_malloc_message("Quantum size: ", umax2s(quantum, 10, s), "\n",
"");
_malloc_message("Max small size: ", umax2s(small_max, 10, s),
"\n", "");
_malloc_message("Max dirty pages per arena: ",
umax2s(opt_dirty_max, 10, s), "\n", "");
@@ -5443,36 +5229,30 @@ malloc_print_stats(void)
_malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "",
"");
_malloc_message(" (2^", umax2s(opt_chunk_2pow, 10, s), ")\n",
"");
#ifdef MALLOC_STATS
{
size_t allocated, mapped = 0;
-#ifdef MALLOC_BALANCE
- uint64_t nbalance = 0;
-#endif
unsigned i;
arena_t *arena;
/* Calculate and print allocated/mapped stats. */
/* arenas. */
for (i = 0, allocated = 0; i < narenas; i++) {
if (arenas[i] != NULL) {
malloc_spin_lock(&arenas[i]->lock);
allocated +=
arenas[i]->stats.allocated_small;
allocated +=
arenas[i]->stats.allocated_large;
mapped += arenas[i]->stats.mapped;
-#ifdef MALLOC_BALANCE
- nbalance += arenas[i]->stats.nbalance;
-#endif
malloc_spin_unlock(&arenas[i]->lock);
}
}
/* huge/base. */
malloc_mutex_lock(&huge_mtx);
allocated += huge_allocated;
mapped += huge_mapped;
@@ -5485,21 +5265,16 @@ malloc_print_stats(void)
#ifdef MOZ_MEMORY_WINDOWS
malloc_printf("Allocated: %lu, mapped: %lu\n",
allocated, mapped);
#else
malloc_printf("Allocated: %zu, mapped: %zu\n",
allocated, mapped);
#endif
-#ifdef MALLOC_BALANCE
- malloc_printf("Arena balance reassignments: %llu\n",
- nbalance);
-#endif
-
/* Print chunk stats. */
malloc_printf(
"huge: nmalloc ndalloc allocated\n");
#ifdef MOZ_MEMORY_WINDOWS
malloc_printf(" %12llu %12llu %12lu\n",
huge_nmalloc, huge_ndalloc, huge_allocated);
#else
malloc_printf(" %12llu %12llu %12zu\n",
@@ -5697,30 +5472,16 @@ MALLOC_OUT:
for (k = 0; k < nreps; k++) {
switch (opts[j]) {
case 'a':
opt_abort = false;
break;
case 'A':
opt_abort = true;
break;
- case 'b':
-#ifdef MALLOC_BALANCE
- opt_balance_threshold >>= 1;
-#endif
- break;
- case 'B':
-#ifdef MALLOC_BALANCE
- if (opt_balance_threshold == 0)
- opt_balance_threshold = 1;
- else if ((opt_balance_threshold << 1)
- > opt_balance_threshold)
- opt_balance_threshold <<= 1;
-#endif
- break;
#ifdef MALLOC_FILL
#ifndef MALLOC_PRODUCTION
case 'c':
opt_poison = false;
break;
case 'C':
opt_poison = true;
break;
@@ -5952,22 +5713,16 @@ MALLOC_OUT:
narenas = chunksize / sizeof(arena_t *);
} else if (opt_narenas_lshift < 0) {
if ((narenas >> -opt_narenas_lshift) < narenas)
narenas >>= -opt_narenas_lshift;
/* Make sure there is at least one arena. */
if (narenas == 0)
narenas = 1;
}
-#ifdef MALLOC_BALANCE
- assert(narenas != 0);
- for (narenas_2pow = 0;
- (narenas >> (narenas_2pow + 1)) != 0;
- narenas_2pow++);
-#endif
#ifdef NO_TLS
if (narenas > 1) {
static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
223, 227, 229, 233, 239, 241, 251, 257, 263};
@@ -5987,19 +5742,17 @@ MALLOC_OUT:
break;
}
}
narenas = parenas;
}
#endif
#ifndef NO_TLS
-# ifndef MALLOC_BALANCE
next_arena = 0;
-# endif
#endif
/* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
if (arenas == NULL) {
#ifndef MOZ_MEMORY_WINDOWS
malloc_mutex_unlock(&init_lock);
#endif
@@ -6032,24 +5785,16 @@ MALLOC_OUT:
TlsSetValue(tlsIndex, arenas[0]);
#elif defined(MOZ_MEMORY_DARWIN)
pthread_setspecific(tlsIndex, arenas[0]);
#else
arenas_map = arenas[0];
#endif
#endif
- /*
- * Seed here for the initial thread, since choose_arena_hard() is only
- * called for other threads. The seed value doesn't really matter.
- */
-#ifdef MALLOC_BALANCE
- SPRN(balance, 42);
-#endif
-
malloc_spin_init(&arenas_lock);
#ifdef MALLOC_VALIDATE
chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);
if (chunk_rtree == NULL)
return (true);
#endif
@@ -6506,23 +6251,16 @@ jemalloc_stats_impl(jemalloc_stats_t *st
#endif
false;
stats->opt_zero =
#ifdef MALLOC_FILL
opt_zero ? true :
#endif
false;
stats->narenas = narenas;
- stats->balance_threshold =
-#ifdef MALLOC_BALANCE
- opt_balance_threshold
-#else
- SIZE_T_MAX
-#endif
- ;
stats->quantum = quantum;
stats->small_max = small_max;
stats->large_max = arena_maxclass;
stats->chunksize = chunksize;
stats->dirty_max = opt_dirty_max;
/*
* Gather current memory usage statistics.