Bug 1414168 - Demystify the last test in the main arena_bin_run_size_calc loop. r?njn draft
authorMike Hommey <mh+mozilla@glandium.org>
Tue, 07 Nov 2017 08:55:37 +0900
changeset 695333 bb4072ba1df5b081fb2d1442c5b0eac34e532057
parent 695332 2976915982866a134f88d071a895c1b6a78eb7fe
child 695334 ea74190f3f52016a5a1e2b2481ee8ef2dac8e267
push id88395
push userbmo:mh+mozilla@glandium.org
push dateThu, 09 Nov 2017 03:01:34 +0000
reviewersnjn
bugs1414168
milestone58.0a1
Bug 1414168 - Demystify the last test in the main arena_bin_run_size_calc loop. r?njn The description above the RUN_* constant definitions talks about binary fixed point math, which is one way to look at the problem, but a clearer one is to look at it as comparing ratios in a way that doesn't use divisions. So, starting from the current expression: (try_reg0_offset << RUN_BFP) <= RUN_MAX_OVRHD * try_run_size This can be rewritten as try_reg0_offset * (1 << RUN_BFP) <= RUN_MAX_OVRHD * try_run_size Dividing both sides with ((1 << RUN_BFP) * try_run_size), and simplifying, gives us: try_reg0_offset / try_run_size <= RUN_MAX_OVRHD / (1 << RUN_BFP) Replacing the constants: try_reg0_offset / try_run_size <= 0x3d / (1 << 12) or try_reg0_offset / try_run_size <= 61 / 4096 61 / 4096 is roughly 1.5%. So what the check really intends to do is check that the overhead is below 1.5%. So we introduce a helper class and a user-defined literal that makes the test more self-descriptive, while producing identical machine code. This is a lot of code to add, but I think it's one of those cases where abstraction can help make the code clearer.
memory/build/Utils.h
memory/build/mozjemalloc.cpp
--- a/memory/build/Utils.h
+++ b/memory/build/Utils.h
@@ -2,16 +2,17 @@
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef Utils_h
 #define Utils_h
 
+#include "mozilla/CheckedInt.h"
 #include "mozilla/TemplateLib.h"
 
 // Helper for log2 of powers of 2 at compile time.
 template<size_t N>
 struct Log2 : mozilla::tl::CeilingLog2<N>
 {
   using mozilla::tl::CeilingLog2<N>::value;
   static_assert(1ULL << value == N, "Number is not a power of 2");
@@ -36,9 +37,91 @@ constexpr unsigned long long int operato
   return aNum * 1024;
 }
 
 constexpr unsigned long long int operator"" _MiB(unsigned long long int aNum)
 {
   return aNum * 1024_KiB;
 }
 
+constexpr long double operator""_percent(long double aPercent)
+{
+  return aPercent / 100;
+}
+
+// Helper for (fast) comparison of fractions without involving divisions or
+// floats.
+class Fraction
+{
+public:
+  explicit constexpr Fraction(size_t aNumerator, size_t aDenominator)
+    : mNumerator(aNumerator)
+    , mDenominator(aDenominator)
+  {
+  }
+
+  MOZ_IMPLICIT constexpr Fraction(long double aValue)
+    // We use an arbitrary power of two as denominator that provides enough
+    // precision for our use case.
+    : mNumerator(aValue * 4096)
+    , mDenominator(4096)
+  {
+  }
+
+  inline bool operator<(const Fraction& aOther) const
+  {
+#ifndef MOZ_DEBUG
+    // We are comparing A / B < C / D, with all A, B, C and D being positive
+    // numbers. Multiplying both sides with B * D, we have:
+    // (A * B * D) / B < (C * B * D) / D, which can then be simplified as
+    // A * D < C * B. When can thus compare our fractions without actually
+    // doing any division.
+    // This however assumes the multiplied quantities are small enough not
+    // to overflow the multiplication. We use CheckedInt on debug builds
+    // to enforce the assumption.
+    return mNumerator * aOther.mDenominator < aOther.mNumerator * mDenominator;
+#else
+    mozilla::CheckedInt<size_t> numerator(mNumerator);
+    mozilla::CheckedInt<size_t> denominator(mDenominator);
+    // value() asserts when the multiplication overflowed.
+    size_t lhs = (numerator * aOther.mDenominator).value();
+    size_t rhs = (aOther.mNumerator * denominator).value();
+    return lhs < rhs;
 #endif
+  }
+
+  inline bool operator>(const Fraction& aOther) const { return aOther < *this; }
+
+  inline bool operator>=(const Fraction& aOther) const
+  {
+    return !(*this < aOther);
+  }
+
+  inline bool operator<=(const Fraction& aOther) const
+  {
+    return !(*this > aOther);
+  }
+
+  inline bool operator==(const Fraction& aOther) const
+  {
+#ifndef MOZ_DEBUG
+    // Same logic as operator<
+    return mNumerator * aOther.mDenominator == aOther.mNumerator * mDenominator;
+#else
+    mozilla::CheckedInt<size_t> numerator(mNumerator);
+    mozilla::CheckedInt<size_t> denominator(mDenominator);
+    size_t lhs = (numerator * aOther.mDenominator).value();
+    size_t rhs = (aOther.mNumerator * denominator).value();
+    return lhs == rhs;
+#endif
+  }
+
+  inline bool operator!=(const Fraction& aOther) const
+  {
+    return !(*this == aOther);
+  }
+
+private:
+  size_t mNumerator;
+  size_t mDenominator;
+};
+
+#endif
--- a/memory/build/mozjemalloc.cpp
+++ b/memory/build/mozjemalloc.cpp
@@ -898,16 +898,18 @@ struct arena_bin_t
   // Number of elements in a run's regs_mask for this bin's size class.
   uint32_t mRunNumRegionsMask;
 
   // Offset of first region in a run for this bin's size class.
   uint32_t mRunFirstRegionOffset;
 
   // Current number of runs in this bin, full or otherwise.
   unsigned long mNumRuns;
+
+  static constexpr long double kRunOverhead = 1.5_percent;
 };
 
 struct arena_t
 {
 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
   uint32_t mMagic;
 #define ARENA_MAGIC 0x947d3d24
 #endif
@@ -2952,17 +2954,17 @@ arena_t::MallocBinHard(arena_bin_t* aBin
   return MallocBinEasy(aBin, aBin->mCurrentRun);
 }
 
 // Calculate bin->mRunSize such that it meets the following constraints:
 //
 //   *) bin->mRunSize >= min_run_size
 //   *) bin->mRunSize <= gMaxLargeClass
 //   *) bin->mRunSize <= gMaxBinClass
-//   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
+//   *) run header overhead <= kRunOverhead
 //
 // bin->mRunNumRegions, bin->mRunNumRegionsMask, and bin->mRunFirstRegionOffset are
 // also calculated here, since these settings are all interdependent.
 static size_t
 arena_bin_run_size_calc(arena_bin_t* bin, size_t min_run_size)
 {
   size_t try_run_size, good_run_size;
   unsigned good_nregs, good_mask_nelms, good_reg0_offset;
@@ -3017,18 +3019,18 @@ arena_bin_run_size_calc(arena_bin_t* bin
       break;
     }
 
     // This doesn't match the comment above RUN_MAX_OVRHD_RELAX.
     if (RUN_MAX_OVRHD * (bin->mSizeClass << 3) <= RUN_MAX_OVRHD_RELAX) {
       break;
     }
 
-    // Try to keep the run overhead below RUN_MAX_OVRHD.
-    if ((try_reg0_offset << RUN_BFP) <= RUN_MAX_OVRHD * try_run_size) {
+    // Try to keep the run overhead below kRunOverhead.
+    if (Fraction(try_reg0_offset, try_run_size) <= arena_bin_t::kRunOverhead) {
       break;
     }
   }
 
   MOZ_ASSERT(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1)) <=
              good_reg0_offset);
   MOZ_ASSERT((good_mask_nelms << (LOG2(sizeof(int)) + 3)) >= good_nregs);