Bug 1336027 - wasm baseline, refactor registers and register allocation, r?bbouvier draft
authorLars T Hansen <lhansen@mozilla.com>
Thu, 12 Oct 2017 15:44:46 +0200
changeset 697052 e46622ad704cc9d5f7582ec0b8faa2058908f3b6
parent 697051 bf43da2af9495c72690013a7129df723b0057a2e
child 739999 71477936f50abc1891c15f8d9d4d9b595a20bdd6
push id88872
push userbmo:lhansen@mozilla.com
push dateMon, 13 Nov 2017 09:11:14 +0000
reviewersbbouvier
bugs1336027
milestone59.0a1
Bug 1336027 - wasm baseline, refactor registers and register allocation, r?bbouvier MozReview-Commit-ID: J7NawzsOJ1x
js/src/jit/RegisterAllocator.h
js/src/wasm/WasmBaselineCompile.cpp
--- a/js/src/jit/RegisterAllocator.h
+++ b/js/src/jit/RegisterAllocator.h
@@ -276,24 +276,17 @@ class RegisterAllocator
 
     RegisterAllocator(MIRGenerator* mir, LIRGenerator* lir, LIRGraph& graph)
       : mir(mir),
         lir(lir),
         graph(graph),
         allRegisters_(RegisterSet::All())
     {
         if (mir->compilingWasm()) {
-#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
-            allRegisters_.take(AnyRegister(HeapReg));
-#elif defined(JS_CODEGEN_ARM64)
-            allRegisters_.take(AnyRegister(HeapReg));
-            allRegisters_.take(AnyRegister(HeapLenReg));
-#endif
-            allRegisters_.take(FramePointer);
+            takeWasmRegisters(allRegisters_);
         } else {
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64)
             if (mir->instrumentedProfiling())
                 allRegisters_.take(AnyRegister(FramePointer));
 #endif
         }
     }
 
@@ -354,16 +347,29 @@ class RegisterAllocator
                 break;
             ins = next;
         }
 
         return outputOf(ins);
     }
 
     void dumpInstructions();
+
+  public:
+    template<typename TakeableSet>
+    static void takeWasmRegisters(TakeableSet& regs) {
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+            regs.take(HeapReg);
+#elif defined(JS_CODEGEN_ARM64)
+            regs.take(HeapReg);
+            regs.take(HeapLenReg);
+#endif
+            regs.take(FramePointer);
+    }
 };
 
 static inline AnyRegister
 GetFixedRegister(const LDefinition* def, const LUse* use)
 {
     return def->isFloatReg()
            ? AnyRegister(FloatRegister::FromCode(use->registerCode()))
            : AnyRegister(Register::FromCode(use->registerCode()));
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -70,23 +70,25 @@
  *   that should be assigned to registers.  Or something like that.  Wasm makes
  *   this simple.  Static assignments are desirable because they are not flushed
  *   to memory by the pre-block sync() call.)
  */
 
 #include "wasm/WasmBaselineCompile.h"
 
 #include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
 
 #include "jit/AtomicOp.h"
 #include "jit/IonTypes.h"
 #include "jit/JitAllocPolicy.h"
 #include "jit/Label.h"
 #include "jit/MacroAssembler.h"
 #include "jit/MIR.h"
+#include "jit/RegisterAllocator.h"
 #include "jit/Registers.h"
 #include "jit/RegisterSets.h"
 #if defined(JS_CODEGEN_ARM)
 # include "jit/arm/Assembler-arm.h"
 #endif
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
 # include "jit/x86-shared/Architecture-x86-shared.h"
 # include "jit/x86-shared/Assembler-x86-shared.h"
@@ -98,16 +100,17 @@
 #include "wasm/WasmValidate.h"
 
 #include "jit/MacroAssembler-inl.h"
 
 using mozilla::DebugOnly;
 using mozilla::FloatingPoint;
 using mozilla::FloorLog2;
 using mozilla::IsPowerOfTwo;
+using mozilla::Maybe;
 using mozilla::SpecificNaN;
 
 namespace js {
 namespace wasm {
 
 using namespace js::jit;
 using JS::GenericNaN;
 
@@ -293,180 +296,522 @@ BaseLocalIter::operator++(int)
 {
     MOZ_ASSERT(!done_);
     index_++;
     if (!argsIter_.done())
         argsIter_++;
     settle();
 }
 
-class BaseCompiler
-{
-    // We define our own ScratchRegister abstractions, deferring to
-    // the platform's when possible.
+// The strongly typed register wrappers are especially useful to distinguish
+// float registers from double registers.
+
+struct RegI32 : public Register
+{
+    RegI32() : Register(Register::Invalid()) {}
+    explicit RegI32(Register reg) : Register(reg) {}
+};
+
+struct RegI64 : public Register64
+{
+    RegI64() : Register64(Register64::Invalid()) {}
+    explicit RegI64(Register64 reg) : Register64(reg) {}
+};
+
+struct RegF32 : public FloatRegister
+{
+    RegF32() : FloatRegister() {}
+    explicit RegF32(FloatRegister reg) : FloatRegister(reg) {}
+};
+
+struct RegF64 : public FloatRegister
+{
+    RegF64() : FloatRegister() {}
+    explicit RegF64(FloatRegister reg) : FloatRegister(reg) {}
+};
+
+struct AnyReg
+{
+    explicit AnyReg(RegI32 r) { tag = I32; i32_ = r; }
+    explicit AnyReg(RegI64 r) { tag = I64; i64_ = r; }
+    explicit AnyReg(RegF32 r) { tag = F32; f32_ = r; }
+    explicit AnyReg(RegF64 r) { tag = F64; f64_ = r; }
+
+    RegI32 i32() const {
+        MOZ_ASSERT(tag == I32);
+        return i32_;
+    }
+    RegI64 i64() const {
+        MOZ_ASSERT(tag == I64);
+        return i64_;
+    }
+    RegF32 f32() const {
+        MOZ_ASSERT(tag == F32);
+        return f32_;
+    }
+    RegF64 f64() const {
+        MOZ_ASSERT(tag == F64);
+        return f64_;
+    }
+    AnyRegister any() const {
+        switch (tag) {
+          case F32: return AnyRegister(f32_);
+          case F64: return AnyRegister(f64_);
+          case I32: return AnyRegister(i32_);
+          case I64:
+#ifdef JS_PUNBOX64
+            return AnyRegister(i64_.reg);
+#else
+            // The compiler is written so that this is never needed: any() is
+            // called on arbitrary registers for asm.js but asm.js does not have
+            // 64-bit ints.  For wasm, any() is called on arbitrary registers
+            // only on 64-bit platforms.
+            MOZ_CRASH("AnyReg::any() on 32-bit platform");
+#endif
+	  default:
+            MOZ_CRASH();
+        }
+        // Work around GCC 5 analysis/warning bug.
+        MOZ_CRASH("AnyReg::any(): impossible case");
+    }
+
+    union {
+        RegI32 i32_;
+        RegI64 i64_;
+        RegF32 f32_;
+        RegF64 f64_;
+    };
+    enum { I32, I64, F32, F64 } tag;
+};
+
+class BaseCompilerInterface
+{
+  public:
+    // Spill all spillable registers.
+    //
+    // TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
+    // spilling only enough registers to satisfy current needs.
+    virtual void sync() = 0;
+};
+
+// Register allocator.
+
+class BaseRegAlloc
+{
+    // Notes on float register allocation.
+    //
+    // The general rule in SpiderMonkey is that float registers can alias double
+    // registers, but there are predicates to handle exceptions to that rule:
+    // hasUnaliasedDouble() and hasMultiAlias().  The way aliasing actually
+    // works is platform dependent and exposed through the aliased(n, &r)
+    // predicate, etc.
+    //
+    //  - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
+    //    cannot be treated as float.
+    //  - hasMultiAlias(): on ARM and MIPS a double register aliases two float
+    //    registers.
+    //
+    // On some platforms (x86, x64, ARM64) but not all (ARM)
+    // ScratchFloat32Register is the same as ScratchDoubleRegister.
+    //
+    // It's a basic invariant of the AllocatableRegisterSet that it deals
+    // properly with aliasing of registers: if s0 or s1 are allocated then d0 is
+    // not allocatable; if s0 and s1 are freed individually then d0 becomes
+    // allocatable.
+
+    BaseCompilerInterface&        bc;
+    AllocatableGeneralRegisterSet availGPR;
+    AllocatableFloatRegisterSet   availFPU;
+#ifdef DEBUG
+    AllocatableGeneralRegisterSet allGPR;       // The registers available to the compiler
+    AllocatableFloatRegisterSet   allFPU;       //   after removing ScratchReg, HeapReg, etc
+    AllocatableGeneralRegisterSet knownGPR;     // Union of free registers and registers on
+    AllocatableFloatRegisterSet   knownFPU;     //   the value stack
+    bool                          scratchTaken;
+#endif
+#ifdef JS_CODEGEN_X86
+    AllocatableGeneralRegisterSet singleByteRegs;
+#endif
+
+    bool hasGPR() {
+        return !availGPR.empty();
+    }
+
+    bool hasGPR64() {
+#ifdef JS_PUNBOX64
+        return !availGPR.empty();
+#else
+        if (availGPR.empty())
+            return false;
+        Register r = allocGPR();
+        bool available = !availGPR.empty();
+        freeGPR(r);
+        return available;
+#endif
+    }
+
+    template<MIRType t>
+    bool hasFPU() {
+        return availFPU.hasAny<RegTypeOf<t>::value>();
+    }
+
+    bool isAvailableGPR(Register r) {
+        return availGPR.has(r);
+    }
+
+    bool isAvailableFPU(FloatRegister r) {
+        return availFPU.has(r);
+    }
+
+    void allocGPR(Register r) {
+        MOZ_ASSERT(isAvailableGPR(r));
+        availGPR.take(r);
+    }
+
+    Register allocGPR() {
+        MOZ_ASSERT(hasGPR());
+        return availGPR.takeAny();
+    }
+
+    void allocInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+        MOZ_ASSERT(isAvailableGPR(r.reg));
+        availGPR.take(r.reg);
+#else
+        MOZ_ASSERT(isAvailableGPR(r.low));
+        availGPR.take(r.low);
+        MOZ_ASSERT(isAvailableGPR(r.high));
+        availGPR.take(r.high);
+#endif
+    }
+
+    Register64 allocInt64() {
+        MOZ_ASSERT(hasGPR64());
+#ifdef JS_PUNBOX64
+        return Register64(availGPR.takeAny());
+#else
+        Register high = availGPR.takeAny();
+        Register low = availGPR.takeAny();
+        return Register64(high, low);
+#endif
+    }
+
+#ifdef JS_CODEGEN_ARM
+    // r12 is normally the ScratchRegister and r13 is always the stack pointer,
+    // so the highest possible pair has r10 as the even-numbered register.
+
+    static const uint32_t pairLimit = 10;
+
+    bool hasGPRPair() {
+        for (uint32_t i = 0; i <= pairLimit; i += 2) {
+            if (isAvailableGPR(Register::FromCode(i)) && isAvailableGPR(Register::FromCode(i + 1)))
+                return true;
+        }
+        return false;
+    }
+
+    void allocGPRPair(Register* low, Register* high) {
+        for (uint32_t i = 0; i <= pairLimit; i += 2) {
+            if (isAvailableGPR(Register::FromCode(i)) &&
+		isAvailableGPR(Register::FromCode(i + 1)))
+	    {
+                *low = Register::FromCode(i);
+                *high = Register::FromCode(i + 1);
+                allocGPR(*low);
+                allocGPR(*high);
+                return;
+            }
+        }
+        MOZ_CRASH("No pair");
+    }
+#endif
+
+    void allocFPU(FloatRegister r) {
+        MOZ_ASSERT(isAvailableFPU(r));
+        availFPU.take(r);
+    }
+
+    template<MIRType t>
+    FloatRegister allocFPU() {
+        return availFPU.takeAny<RegTypeOf<t>::value>();
+    }
+
+    void freeGPR(Register r) {
+        availGPR.add(r);
+    }
+
+    void freeInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+        availGPR.add(r.reg);
+#else
+        availGPR.add(r.low);
+        availGPR.add(r.high);
+#endif
+    }
+
+    void freeFPU(FloatRegister r) {
+        availFPU.add(r);
+    }
+
+  public:
+    explicit BaseRegAlloc(BaseCompilerInterface& bc)
+      : bc(bc)
+      , availGPR(GeneralRegisterSet::All())
+      , availFPU(FloatRegisterSet::All())
+#ifdef DEBUG
+      , scratchTaken(false)
+#endif
+#ifdef JS_CODEGEN_X86
+      , singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
+#endif
+    {
+	RegisterAllocator::takeWasmRegisters(availGPR);
+
+#if defined(JS_CODEGEN_ARM)
+        availGPR.take(ScratchRegARM);
+#elif defined(JS_CODEGEN_X86)
+        availGPR.take(ScratchRegX86);
+#endif
+
+#ifdef DEBUG
+        allGPR = availGPR;
+        allFPU = availFPU;
+#endif
+    }
+
+#ifdef DEBUG
+    bool scratchRegisterTaken() const {
+        return scratchTaken;
+    }
+
+    void setScratchRegisterTaken(bool state) {
+        scratchTaken = state;
+    }
+#endif
+
+#ifdef JS_CODEGEN_X86
+    bool isSingleByteI32(Register r) {
+        return singleByteRegs.has(r);
+    }
+#endif
+
+    bool isAvailableI32(RegI32 r) {
+        return isAvailableGPR(r);
+    }
+
+    bool isAvailableI64(RegI64 r) {
+#ifdef JS_PUNBOX64
+        return isAvailableGPR(r.reg);
+#else
+        return isAvailableGPR(r.low) && isAvailableGPR(r.high);
+#endif
+    }
+
+    bool isAvailableF32(RegF32 r) {
+        return isAvailableFPU(r);
+    }
+
+    bool isAvailableF64(RegF64 r) {
+        return isAvailableFPU(r);
+    }
+
+    // TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
+    // failure, only as much as we need.
+
+    MOZ_MUST_USE RegI32 needI32() {
+        if (!hasGPR())
+            bc.sync();
+        return RegI32(allocGPR());
+    }
+
+    void needI32(RegI32 specific) {
+        if (!isAvailableI32(specific))
+            bc.sync();
+        allocGPR(specific);
+    }
+
+    MOZ_MUST_USE RegI64 needI64() {
+        if (!hasGPR64())
+            bc.sync();
+        return RegI64(allocInt64());
+    }
+
+    void needI64(RegI64 specific) {
+        if (!isAvailableI64(specific))
+            bc.sync();
+        allocInt64(specific);
+    }
+
+    MOZ_MUST_USE RegF32 needF32() {
+        if (!hasFPU<MIRType::Float32>())
+            bc.sync();
+        return RegF32(allocFPU<MIRType::Float32>());
+    }
+
+    void needF32(RegF32 specific) {
+        if (!isAvailableF32(specific))
+            bc.sync();
+        allocFPU(specific);
+    }
+
+    MOZ_MUST_USE RegF64 needF64() {
+        if (!hasFPU<MIRType::Double>())
+            bc.sync();
+        return RegF64(allocFPU<MIRType::Double>());
+    }
+
+    void needF64(RegF64 specific) {
+        if (!isAvailableF64(specific))
+            bc.sync();
+        allocFPU(specific);
+    }
+
+    void freeI32(RegI32 r) {
+        freeGPR(r);
+    }
+
+    void freeI64(RegI64 r) {
+        freeInt64(r);
+    }
+
+    void freeF64(RegF64 r) {
+        freeFPU(r);
+    }
+
+    void freeF32(RegF32 r) {
+        freeFPU(r);
+    }
+
+#ifdef JS_CODEGEN_ARM
+    MOZ_MUST_USE RegI64 needI64Pair() {
+        if (!hasGPRPair())
+            bc.sync();
+        Register low, high;
+        allocGPRPair(&low, &high);
+        return RegI64(Register64(high, low));
+    }
+#endif
+
+#ifdef DEBUG
+    void startLeakCheck() {
+        knownGPR = availGPR;
+        knownFPU = availFPU;
+    }
+
+    void addKnownI32(RegI32 r) {
+        knownGPR.add(r);
+    }
+
+    void addKnownI64(RegI64 r) {
+# ifdef JS_PUNBOX64
+        knownGPR.add(r.reg);
+# else
+        knownGPR.add(r.high);
+        knownGPR.add(r.low);
+# endif
+    }
+
+    void addKnownF32(RegF32 r) {
+        knownFPU.add(r);
+    }
+
+    void addKnownF64(RegF64 r) {
+        knownFPU.add(r);
+    }
+
+    void endLeakCheck() {
+        MOZ_ASSERT(knownGPR.bits() == allGPR.bits());
+        MOZ_ASSERT(knownFPU.bits() == allFPU.bits());
+    }
+#endif
+};
+
+// ScratchRegister abstractions.  We define our own, deferring to the platform's
+// when possible.
 
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
-    typedef ScratchDoubleScope ScratchF64;
+typedef ScratchDoubleScope ScratchF64;
 #else
-    class ScratchF64
-    {
-      public:
-        ScratchF64(BaseCompiler& b) {}
-        operator FloatRegister() const {
-            MOZ_CRASH("BaseCompiler platform hook - ScratchF64");
-        }
-    };
+class ScratchF64
+{
+  public:
+    ScratchF64(BaseRegAlloc&) {}
+    operator FloatRegister() const {
+        MOZ_CRASH("BaseCompiler platform hook - ScratchF64");
+    }
+};
 #endif
 
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
-    typedef ScratchFloat32Scope ScratchF32;
+typedef ScratchFloat32Scope ScratchF32;
 #else
-    class ScratchF32
-    {
-      public:
-        ScratchF32(BaseCompiler& b) {}
-        operator FloatRegister() const {
-            MOZ_CRASH("BaseCompiler platform hook - ScratchF32");
-        }
-    };
+class ScratchF32
+{
+  public:
+    ScratchF32(BaseRegAlloc&) {}
+    operator FloatRegister() const {
+        MOZ_CRASH("BaseCompiler platform hook - ScratchF32");
+    }
+};
 #endif
 
 #if defined(JS_CODEGEN_X64)
-    typedef ScratchRegisterScope ScratchI32;
+typedef ScratchRegisterScope ScratchI32;
 #elif defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
-    class ScratchI32
-    {
+class ScratchI32
+{
 # ifdef DEBUG
-        BaseCompiler& bc;
-      public:
-        explicit ScratchI32(BaseCompiler& bc) : bc(bc) {
-            MOZ_ASSERT(!bc.scratchRegisterTaken());
-            bc.setScratchRegisterTaken(true);
-        }
-        ~ScratchI32() {
-            MOZ_ASSERT(bc.scratchRegisterTaken());
-            bc.setScratchRegisterTaken(false);
-        }
+    BaseRegAlloc& ra;
+  public:
+    explicit ScratchI32(BaseRegAlloc& ra) : ra(ra) {
+        MOZ_ASSERT(!ra.scratchRegisterTaken());
+        ra.setScratchRegisterTaken(true);
+    }
+    ~ScratchI32() {
+        MOZ_ASSERT(ra.scratchRegisterTaken());
+        ra.setScratchRegisterTaken(false);
+    }
 # else
-      public:
-        explicit ScratchI32(BaseCompiler& bc) {}
+  public:
+    explicit ScratchI32(BaseRegAlloc&) {}
 # endif
-        operator Register() const {
+    operator Register() const {
 # ifdef JS_CODEGEN_X86
-            return ScratchRegX86;
+        return ScratchRegX86;
 # else
-            return ScratchRegARM;
+        return ScratchRegARM;
 # endif
-        }
-    };
+    }
+};
 #else
-    class ScratchI32
-    {
-      public:
-        ScratchI32(BaseCompiler& bc) {}
-        operator Register() const {
-            MOZ_CRASH("BaseCompiler platform hook - ScratchI32");
-        }
-    };
+class ScratchI32
+{
+public:
+    ScratchI32(BaseRegAlloc&) {}
+    operator Register() const {
+        MOZ_CRASH("BaseCompiler platform hook - ScratchI32");
+    }
+};
 #endif
 
 #if defined(JS_CODEGEN_X86)
-    // ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
-    // no other register will do.  And we would normally have to allocate that
-    // register using ScratchI32 since normally the scratch register is EBX.
-    // But the whole point of ScratchI32 is to hide that relationship.  By using
-    // the ScratchEBX alias, we document that at that point we require the
-    // scratch register to be EBX.
-    typedef ScratchI32 ScratchEBX;
-#endif
-
+// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
+// no other register will do.  And we would normally have to allocate that
+// register using ScratchI32 since normally the scratch register is EBX.
+// But the whole point of ScratchI32 is to hide that relationship.  By using
+// the ScratchEBX alias, we document that at that point we require the
+// scratch register to be EBX.
+typedef ScratchI32 ScratchEBX;
+#endif
+
+class BaseCompiler final : public BaseCompilerInterface
+{
     typedef Vector<NonAssertingLabel, 8, SystemAllocPolicy> LabelVector;
     typedef Vector<MIRType, 8, SystemAllocPolicy> MIRTypeVector;
 
-    // The strongly typed register wrappers have saved my bacon a few
-    // times; though they are largely redundant they stay, for now.
-
-    struct RegI32 : public Register
-    {
-        RegI32() : Register(Register::Invalid()) {}
-        explicit RegI32(Register reg) : Register(reg) {}
-    };
-
-    struct RegI64 : public Register64
-    {
-        RegI64() : Register64(Register64::Invalid()) {}
-        explicit RegI64(Register64 reg) : Register64(reg) {}
-    };
-
-    struct RegF32 : public FloatRegister
-    {
-        RegF32() : FloatRegister() {}
-        explicit RegF32(FloatRegister reg) : FloatRegister(reg) {}
-    };
-
-    struct RegF64 : public FloatRegister
-    {
-        RegF64() : FloatRegister() {}
-        explicit RegF64(FloatRegister reg) : FloatRegister(reg) {}
-    };
-
-    struct AnyReg
-    {
-        AnyReg() { tag = NONE; }
-        explicit AnyReg(RegI32 r) { tag = I32; i32_ = r; }
-        explicit AnyReg(RegI64 r) { tag = I64; i64_ = r; }
-        explicit AnyReg(RegF32 r) { tag = F32; f32_ = r; }
-        explicit AnyReg(RegF64 r) { tag = F64; f64_ = r; }
-
-        RegI32 i32() {
-            MOZ_ASSERT(tag == I32);
-            return i32_;
-        }
-        RegI64 i64() {
-            MOZ_ASSERT(tag == I64);
-            return i64_;
-        }
-        RegF32 f32() {
-            MOZ_ASSERT(tag == F32);
-            return f32_;
-        }
-        RegF64 f64() {
-            MOZ_ASSERT(tag == F64);
-            return f64_;
-        }
-        AnyRegister any() {
-            switch (tag) {
-              case F32: return AnyRegister(f32_);
-              case F64: return AnyRegister(f64_);
-              case I32: return AnyRegister(i32_);
-              case I64:
-#ifdef JS_PUNBOX64
-                return AnyRegister(i64_.reg);
-#else
-                // The compiler is written so that this is never needed: any() is called
-                // on arbitrary registers for asm.js but asm.js does not have 64-bit ints.
-                // For wasm, any() is called on arbitrary registers only on 64-bit platforms.
-                MOZ_CRASH("AnyReg::any() on 32-bit platform");
-#endif
-              case NONE:
-                MOZ_CRASH("AnyReg::any() on NONE");
-            }
-            // Work around GCC 5 analysis/warning bug.
-            MOZ_CRASH("AnyReg::any(): impossible case");
-        }
-
-        union {
-            RegI32 i32_;
-            RegI64 i64_;
-            RegF32 f32_;
-            RegF64 f64_;
-        };
-        enum { NONE, I32, I64, F32, F64 } tag;
-    };
-
     struct Local
     {
         Local() : type_(MIRType::None), offs_(UINT32_MAX) {}
         Local(MIRType type, uint32_t offs) : type_(type), offs_(offs) {}
 
         void init(MIRType type_, uint32_t offs_) {
             this->type_ = type_;
             this->offs_ = offs_;
@@ -519,20 +864,16 @@ class BaseCompiler
 
         // The baseline compiler uses the iterator's control stack, attaching
         // its own control information.
         typedef Control ControlItem;
     };
 
     typedef OpIter<BaseCompilePolicy> BaseOpIter;
 
-    // Volatile registers except ReturnReg.
-
-    static LiveRegisterSet VolatileReturnGPR;
-
     // The baseline compiler will use OOL code more sparingly than
     // Baldr since our code is not high performance and frills like
     // code density and branch prediction friendliness will be less
     // important.
 
     class OutOfLineCode : public TempObject
     {
       private:
@@ -627,47 +968,40 @@ class BaseCompiler
 
     LatentOp                    latentOp_;       // Latent operation for branch (seen next)
     ValType                     latentType_;     // Operand type, if latentOp_ is true
     Assembler::Condition        latentIntCmp_;   // Comparison operator, if latentOp_ == Compare, int types
     Assembler::DoubleCondition  latentDoubleCmp_;// Comparison operator, if latentOp_ == Compare, float types
 
     FuncOffsets                 offsets_;
     MacroAssembler&             masm;            // No '_' suffix - too tedious...
-
-    AllocatableGeneralRegisterSet availGPR_;
-    AllocatableFloatRegisterSet   availFPU_;
-#ifdef DEBUG
-    bool                          scratchRegisterTaken_;
-    AllocatableGeneralRegisterSet allGPR_;       // The registers available to the compiler
-    AllocatableFloatRegisterSet   allFPU_;       //   after removing ScratchReg, HeapReg, etc
-#endif
+    BaseRegAlloc                ra;              // Ditto
 
     Vector<Local, 8, SystemAllocPolicy> localInfo_;
     Vector<OutOfLineCode*, 8, SystemAllocPolicy> outOfLine_;
 
     // On specific platforms we sometimes need to use specific registers.
 
 #ifdef JS_CODEGEN_X64
     RegI64 specific_rax;
     RegI64 specific_rcx;
     RegI64 specific_rdx;
 #endif
 
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
     RegI32 specific_eax;
     RegI32 specific_ecx;
     RegI32 specific_edx;
+    RegI32 specific_edi;
+    RegI32 specific_esi;
 #endif
 
 #if defined(JS_CODEGEN_X86)
     RegI64 specific_ecx_ebx;
     RegI64 specific_edx_eax;
-
-    AllocatableGeneralRegisterSet singleByteRegs_;
 #endif
 
 #if defined(JS_NUNBOX32)
     RegI64 abiReturnRegI64;
 #endif
 
     // The join registers are used to carry values out of blocks.
     // JoinRegI32 and joinRegI64 must overlap: emitBrIf and
@@ -696,25 +1030,17 @@ class BaseCompiler
 
     MOZ_MUST_USE bool emitFunction();
     void emitInitStackLocals();
 
     const SigWithId& sig() const { return *env_.funcSigs[func_.index]; }
 
     // Used by some of the ScratchRegister implementations.
     operator MacroAssembler&() const { return masm; }
-
-#ifdef DEBUG
-    bool scratchRegisterTaken() const {
-        return scratchRegisterTaken_;
-    }
-    void setScratchRegisterTaken(bool state) {
-        scratchRegisterTaken_ = state;
-    }
-#endif
+    operator BaseRegAlloc&() { return ra; }
 
   private:
 
     ////////////////////////////////////////////////////////////
     //
     // Out of line code management.
 
     MOZ_MUST_USE OutOfLineCode* addOutOfLineCode(OutOfLineCode* ool) {
@@ -786,176 +1112,16 @@ class BaseCompiler
 
     int32_t frameOffsetFromSlot(uint32_t slot, MIRType type) {
         MOZ_ASSERT(localInfo_[slot].type() == type);
         return localInfo_[slot].offs();
     }
 
     ////////////////////////////////////////////////////////////
     //
-    // Low-level register allocation.
-
-    bool isAvailable(Register r) {
-        return availGPR_.has(r);
-    }
-
-    bool hasGPR() {
-        return !availGPR_.empty();
-    }
-
-    void allocGPR(Register r) {
-        MOZ_ASSERT(isAvailable(r));
-        availGPR_.take(r);
-    }
-
-    Register allocGPR() {
-        MOZ_ASSERT(hasGPR());
-        return availGPR_.takeAny();
-    }
-
-    void freeGPR(Register r) {
-        availGPR_.add(r);
-    }
-
-    bool isAvailable(Register64 r) {
-#ifdef JS_PUNBOX64
-        return isAvailable(r.reg);
-#else
-        return isAvailable(r.low) && isAvailable(r.high);
-#endif
-    }
-
-    bool hasInt64() {
-#ifdef JS_PUNBOX64
-        return !availGPR_.empty();
-#else
-        if (availGPR_.empty())
-            return false;
-        Register r = allocGPR();
-        bool available = !availGPR_.empty();
-        freeGPR(r);
-        return available;
-#endif
-    }
-
-    void allocInt64(Register64 r) {
-        MOZ_ASSERT(isAvailable(r));
-#ifdef JS_PUNBOX64
-        availGPR_.take(r.reg);
-#else
-        availGPR_.take(r.low);
-        availGPR_.take(r.high);
-#endif
-    }
-
-    Register64 allocInt64() {
-        MOZ_ASSERT(hasInt64());
-#ifdef JS_PUNBOX64
-        return Register64(availGPR_.takeAny());
-#else
-        Register high = availGPR_.takeAny();
-        Register low = availGPR_.takeAny();
-        return Register64(high, low);
-#endif
-    }
-
-    void freeInt64(Register64 r) {
-#ifdef JS_PUNBOX64
-        availGPR_.add(r.reg);
-#else
-        availGPR_.add(r.low);
-        availGPR_.add(r.high);
-#endif
-    }
-
-#ifdef JS_CODEGEN_ARM
-    // r12 is normally the ScratchRegister and r13 is always the stack pointer,
-    // so the highest possible pair has r10 as the even-numbered register.
-
-    static const uint32_t pairLimit = 10;
-
-    bool hasGPRPair() {
-        for (uint32_t i = 0; i <= pairLimit; i += 2) {
-            if (isAvailable(Register::FromCode(i)) && isAvailable(Register::FromCode(i + 1)))
-                return true;
-        }
-        return false;
-    }
-
-    void allocGPRPair(Register* low, Register* high) {
-        for (uint32_t i = 0; i <= pairLimit; i += 2) {
-            if (isAvailable(Register::FromCode(i)) && isAvailable(Register::FromCode(i + 1))) {
-                *low = Register::FromCode(i);
-                *high = Register::FromCode(i + 1);
-                allocGPR(*low);
-                allocGPR(*high);
-                return;
-            }
-        }
-        MOZ_CRASH("No pair");
-    }
-#endif
-
-    // Notes on float register allocation.
-    //
-    // The general rule in SpiderMonkey is that float registers can
-    // alias double registers, but there are predicates to handle
-    // exceptions to that rule: hasUnaliasedDouble() and
-    // hasMultiAlias().  The way aliasing actually works is platform
-    // dependent and exposed through the aliased(n, &r) predicate,
-    // etc.
-    //
-    //  - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double
-    //    registers that cannot be treated as float.
-    //  - hasMultiAlias(): on ARM and MIPS a double register aliases
-    //    two float registers.
-    //  - notes in Architecture-arm.h indicate that when we use a
-    //    float register that aliases a double register we only use
-    //    the low float register, never the high float register.  I
-    //    think those notes lie, or at least are confusing.
-    //  - notes in Architecture-mips32.h suggest that the MIPS port
-    //    will use both low and high float registers except on the
-    //    Longsoon, which may be the only MIPS that's being tested, so
-    //    who knows what's working.
-    //  - SIMD is not yet implemented on ARM or MIPS so constraints
-    //    may change there.
-    //
-    // On some platforms (x86, x64, ARM64) but not all (ARM)
-    // ScratchFloat32Register is the same as ScratchDoubleRegister.
-    //
-    // It's a basic invariant of the AllocatableRegisterSet that it
-    // deals properly with aliasing of registers: if s0 or s1 are
-    // allocated then d0 is not allocatable; if s0 and s1 are freed
-    // individually then d0 becomes allocatable.
-
-    template<MIRType t>
-    bool hasFPU() {
-        return availFPU_.hasAny<RegTypeOf<t>::value>();
-    }
-
-    bool isAvailable(FloatRegister r) {
-        return availFPU_.has(r);
-    }
-
-    void allocFPU(FloatRegister r) {
-        MOZ_ASSERT(isAvailable(r));
-        availFPU_.take(r);
-    }
-
-    template<MIRType t>
-    FloatRegister allocFPU() {
-        return availFPU_.takeAny<RegTypeOf<t>::value>();
-    }
-
-    void freeFPU(FloatRegister r) {
-        availFPU_.add(r);
-    }
-
-    ////////////////////////////////////////////////////////////
-    //
     // Value stack and high-level register allocation.
     //
     // The value stack facilitates some on-the-fly register allocation
     // and immediate-constant use.  It tracks constants, latent
     // references to locals, register contents, and values on the CPU
     // stack.
     //
     // The stack can be flushed to memory using sync().  This is handy
@@ -1044,38 +1210,34 @@ class BaseCompiler
 
     Vector<Stk, 8, SystemAllocPolicy> stk_;
 
     Stk& push() {
         stk_.infallibleEmplaceBack(Stk());
         return stk_.back();
     }
 
-    Register64 invalidRegister64() {
-        return Register64::Invalid();
-    }
-
     RegI32 invalidI32() {
         return RegI32(Register::Invalid());
     }
 
     RegI64 invalidI64() {
-        return RegI64(invalidRegister64());
+        return RegI64(Register64::Invalid());
     }
 
     RegF64 invalidF64() {
         return RegF64(InvalidFloatReg);
     }
 
     RegI32 fromI64(RegI64 r) {
         return RegI32(lowPart(r));
     }
 
     RegI64 widenI32(RegI32 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableI32(r));
 #ifdef JS_PUNBOX64
         return RegI64(Register64(r));
 #else
         RegI32 high = needI32();
         return RegI64(Register64(high, r));
 #endif
     }
 
@@ -1105,113 +1267,78 @@ class BaseCompiler
     }
 
     void maybeClearHighPart(RegI64 r) {
 #ifdef JS_NUNBOX32
         masm.move32(Imm32(0), r.high);
 #endif
     }
 
-    void freeI32(RegI32 r) {
-        freeGPR(r);
-    }
-
-    void freeI64(RegI64 r) {
-        freeInt64(r);
-    }
+    bool isAvailableI32(RegI32 r) { return ra.isAvailableI32(r); }
+    bool isAvailableI64(RegI64 r) { return ra.isAvailableI64(r); }
+    bool isAvailableF32(RegF32 r) { return ra.isAvailableF32(r); }
+    bool isAvailableF64(RegF64 r) { return ra.isAvailableF64(r); }
+
+    MOZ_MUST_USE RegI32 needI32() { return ra.needI32(); }
+    MOZ_MUST_USE RegI64 needI64() { return ra.needI64(); }
+    MOZ_MUST_USE RegF32 needF32() { return ra.needF32(); }
+    MOZ_MUST_USE RegF64 needF64() { return ra.needF64(); }
+
+    void needI32(RegI32 specific) { ra.needI32(specific); }
+    void needI64(RegI64 specific) { ra.needI64(specific); }
+    void needF32(RegF32 specific) { ra.needF32(specific); }
+    void needF64(RegF64 specific) { ra.needF64(specific); }
+
+#if defined(JS_CODEGEN_ARM)
+    MOZ_MUST_USE RegI64 needI64Pair() { return ra.needI64Pair(); }
+#endif
+
+    void freeI32(RegI32 r) { ra.freeI32(r); }
+    void freeI64(RegI64 r) { ra.freeI64(r); }
+    void freeF32(RegF32 r) { ra.freeF32(r); }
+    void freeF64(RegF64 r) { ra.freeF64(r); }
 
     void freeI64Except(RegI64 r, RegI32 except) {
 #ifdef JS_PUNBOX64
         MOZ_ASSERT(r.reg == except);
 #else
         MOZ_ASSERT(r.high == except || r.low == except);
         freeI64(r);
         needI32(except);
 #endif
     }
 
-    void freeF64(RegF64 r) {
-        freeFPU(r);
-    }
-
-    void freeF32(RegF32 r) {
-        freeFPU(r);
-    }
-
-    MOZ_MUST_USE RegI32 needI32() {
-        if (!hasGPR())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegI32(allocGPR());
-    }
-
-    void needI32(RegI32 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocGPR(specific);
+    void maybeFreeI32(RegI32 r) {
+	if (r != invalidI32())
+	    freeI32(r);
+    }
+
+    void maybeFreeI64(RegI64 r) {
+	if (r != invalidI64())
+	    freeI64(r);
+    }
+
+    void needI32NoSync(RegI32 r) {
+        MOZ_ASSERT(isAvailableI32(r));
+        needI32(r);
     }
 
     // TODO / OPTIMIZE: need2xI32() can be optimized along with needI32()
     // to avoid sync(). (Bug 1316802)
 
     void need2xI32(RegI32 r0, RegI32 r1) {
         needI32(r0);
         needI32(r1);
     }
 
-    MOZ_MUST_USE RegI64 needI64() {
-        if (!hasInt64())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegI64(allocInt64());
-    }
-
-    void needI64(RegI64 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocInt64(specific);
-    }
-
     void need2xI64(RegI64 r0, RegI64 r1) {
         needI64(r0);
         needI64(r1);
     }
 
-#ifdef JS_CODEGEN_ARM
-    MOZ_MUST_USE RegI64 needI64Pair() {
-        if (!hasGPRPair())
-            sync();
-        Register low, high;
-        allocGPRPair(&low, &high);
-        return RegI64(Register64(high, low));
-    }
-#endif
-
-    MOZ_MUST_USE RegF32 needF32() {
-        if (!hasFPU<MIRType::Float32>())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegF32(allocFPU<MIRType::Float32>());
-    }
-
-    void needF32(RegF32 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocFPU(specific);
-    }
-
-    MOZ_MUST_USE RegF64 needF64() {
-        if (!hasFPU<MIRType::Double>())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegF64(allocFPU<MIRType::Double>());
-    }
-
-    void needF64(RegF64 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocFPU(specific);
-    }
-
     void moveI32(RegI32 src, RegI32 dest) {
         if (src != dest)
             masm.move32(src, dest);
     }
 
     void moveI64(RegI64 src, RegI64 dest) {
         if (src != dest)
             masm.move64(src, dest);
@@ -1448,17 +1575,17 @@ class BaseCompiler
     //  - Operations that need specific registers: multiply, quotient,
     //    remainder, will tend to sync because the registers we need
     //    will tend to be allocated.  We may be able to avoid that by
     //    prioritizing registers differently (takeLast instead of
     //    takeFirst) but we may also be able to allocate an unused
     //    register on demand to free up one we need, thus avoiding the
     //    sync.  That type of fix would go into needI32().
 
-    void sync() {
+    void sync() final {
         size_t start = 0;
         size_t lim = stk_.length();
 
         for (size_t i = lim; i > 0; i--) {
             // Memory opcodes are first in the enum, single check against MemLast is fine.
             if (stk_[i - 1].kind() <= Stk::MemLast) {
                 start = i;
                 break;
@@ -1565,35 +1692,35 @@ class BaseCompiler
     void syncLocal(uint32_t slot) {
         if (hasLocal(slot))
             sync();            // TODO / OPTIMIZE: Improve this?  (Bug 1316817)
     }
 
     // Push the register r onto the stack.
 
     void pushI32(RegI32 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableI32(r));
         Stk& x = push();
         x.setI32Reg(r);
     }
 
     void pushI64(RegI64 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableI64(r));
         Stk& x = push();
         x.setI64Reg(r);
     }
 
     void pushF64(RegF64 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableF64(r));
         Stk& x = push();
         x.setF64Reg(r);
     }
 
     void pushF32(RegF32 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableF32(r));
         Stk& x = push();
         x.setF32Reg(r);
     }
 
     // Push the value onto the stack.
 
     void pushI32(int32_t v) {
         Stk& x = push();
@@ -1927,112 +2054,116 @@ class BaseCompiler
     // On the other hand, we sync() before every block and only the
     // JoinReg is live out of the block.  But on the way out, we
     // currently pop the JoinReg before freeing regs to be discarded,
     // so there is a real risk of some pointless shuffling there.  If
     // we instead integrate the popping of the join reg into the
     // popping of the stack we can just use the JoinReg as it will
     // become available in that process.
 
-    MOZ_MUST_USE AnyReg popJoinRegUnlessVoid(ExprType type) {
+    MOZ_MUST_USE Maybe<AnyReg> popJoinRegUnlessVoid(ExprType type) {
         switch (type) {
           case ExprType::Void: {
-            return AnyReg();
+            return Nothing();
           }
           case ExprType::I32: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterI32 || k == Stk::ConstI32 || k == Stk::MemI32 ||
                        k == Stk::LocalI32);
-            return AnyReg(popI32(joinRegI32));
+            return Some(AnyReg(popI32(joinRegI32)));
           }
           case ExprType::I64: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterI64 || k == Stk::ConstI64 || k == Stk::MemI64 ||
                        k == Stk::LocalI64);
-            return AnyReg(popI64(joinRegI64));
+            return Some(AnyReg(popI64(joinRegI64)));
           }
           case ExprType::F64: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterF64 || k == Stk::ConstF64 || k == Stk::MemF64 ||
                        k == Stk::LocalF64);
-            return AnyReg(popF64(joinRegF64));
+            return Some(AnyReg(popF64(joinRegF64)));
           }
           case ExprType::F32: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterF32 || k == Stk::ConstF32 || k == Stk::MemF32 ||
                        k == Stk::LocalF32);
-            return AnyReg(popF32(joinRegF32));
+            return Some(AnyReg(popF32(joinRegF32)));
           }
           default: {
             MOZ_CRASH("Compiler bug: unexpected expression type");
           }
         }
     }
 
     // If we ever start not sync-ing on entry to Block (but instead try to sync
     // lazily) then this may start asserting because it does not spill the
     // joinreg if the joinreg is already allocated.  Note, it *can't* spill the
     // joinreg in the contexts it's being used, so some other solution will need
     // to be found.
 
-    MOZ_MUST_USE AnyReg captureJoinRegUnlessVoid(ExprType type) {
+    MOZ_MUST_USE Maybe<AnyReg> captureJoinRegUnlessVoid(ExprType type) {
         switch (type) {
           case ExprType::I32:
-            allocGPR(joinRegI32);
-            return AnyReg(joinRegI32);
+            MOZ_ASSERT(isAvailableI32(joinRegI32));
+            needI32(joinRegI32);
+            return Some(AnyReg(joinRegI32));
           case ExprType::I64:
-            allocInt64(joinRegI64);
-            return AnyReg(joinRegI64);
+            MOZ_ASSERT(isAvailableI64(joinRegI64));
+            needI64(joinRegI64);
+            return Some(AnyReg(joinRegI64));
           case ExprType::F32:
-            allocFPU(joinRegF32);
-            return AnyReg(joinRegF32);
+            MOZ_ASSERT(isAvailableF32(joinRegF32));
+            needF32(joinRegF32);
+            return Some(AnyReg(joinRegF32));
           case ExprType::F64:
-            allocFPU(joinRegF64);
-            return AnyReg(joinRegF64);
+            MOZ_ASSERT(isAvailableF64(joinRegF64));
+            needF64(joinRegF64);
+            return Some(AnyReg(joinRegF64));
           case ExprType::Void:
-            return AnyReg();
+            return Nothing();
           default:
             MOZ_CRASH("Compiler bug: unexpected type");
         }
     }
 
-    void pushJoinRegUnlessVoid(AnyReg r) {
-        switch (r.tag) {
-          case AnyReg::NONE:
-            break;
+    void pushJoinRegUnlessVoid(const Maybe<AnyReg>& r) {
+	if (!r)
+	    return;
+        switch (r->tag) {
           case AnyReg::I32:
-            pushI32(r.i32());
+            pushI32(r->i32());
             break;
           case AnyReg::I64:
-            pushI64(r.i64());
+            pushI64(r->i64());
             break;
           case AnyReg::F64:
-            pushF64(r.f64());
+            pushF64(r->f64());
             break;
           case AnyReg::F32:
-            pushF32(r.f32());
+            pushF32(r->f32());
             break;
         }
     }
 
-    void freeJoinRegUnlessVoid(AnyReg r) {
-        switch (r.tag) {
-          case AnyReg::NONE:
-            break;
+    void freeJoinRegUnlessVoid(const Maybe<AnyReg>& r) {
+	if (!r)
+	    return;
+        switch (r->tag) {
           case AnyReg::I32:
-            freeI32(r.i32());
+            freeI32(r->i32());
             break;
           case AnyReg::I64:
-            freeI64(r.i64());
+            freeI64(r->i64());
             break;
           case AnyReg::F64:
-            freeF64(r.f64());
+            freeF64(r->f64());
             break;
           case AnyReg::F32:
-            freeF32(r.f32());
+            freeF32(r->f32());
             break;
         }
     }
 
     void maybeReserveJoinRegI(ExprType type) {
         if (type == ExprType::I32)
             needI32(joinRegI32);
         else if (type == ExprType::I64)
@@ -2203,52 +2334,39 @@ class BaseCompiler
         return stk_[stk_.length()-1-relativeDepth];
     }
 
 #ifdef DEBUG
     // Check that we're not leaking registers by comparing the
     // state of the stack + available registers with the set of
     // all available registers.
 
-    // Call this before compiling any code.
-    void setupRegisterLeakCheck() {
-        allGPR_ = availGPR_;
-        allFPU_ = availFPU_;
-    }
-
     // Call this between opcodes.
     void performRegisterLeakCheck() {
-        AllocatableGeneralRegisterSet knownGPR_ = availGPR_;
-        AllocatableFloatRegisterSet knownFPU_ = availFPU_;
+        ra.startLeakCheck();
         for (size_t i = 0 ; i < stk_.length() ; i++) {
             Stk& item = stk_[i];
             switch (item.kind_) {
               case Stk::RegisterI32:
-                knownGPR_.add(item.i32reg());
+                ra.addKnownI32(item.i32reg());
                 break;
               case Stk::RegisterI64:
-#ifdef JS_PUNBOX64
-                knownGPR_.add(item.i64reg().reg);
-#else
-                knownGPR_.add(item.i64reg().high);
-                knownGPR_.add(item.i64reg().low);
-#endif
+                ra.addKnownI64(item.i64reg());
                 break;
               case Stk::RegisterF32:
-                knownFPU_.add(item.f32reg());
+                ra.addKnownF32(item.f32reg());
                 break;
               case Stk::RegisterF64:
-                knownFPU_.add(item.f64reg());
+                ra.addKnownF64(item.f64reg());
                 break;
               default:
                 break;
             }
         }
-        MOZ_ASSERT(knownGPR_.bits() == allGPR_.bits());
-        MOZ_ASSERT(knownFPU_.bits() == allFPU_.bits());
+        ra.endLeakCheck();
     }
 #endif
 
     ////////////////////////////////////////////////////////////
     //
     // Control stack
 
     void initControl(Control& item)
@@ -2802,42 +2920,42 @@ class BaseCompiler
                     Assembler::Always);
 #else
         MOZ_CRASH("BaseCompiler platform hook: tableSwitch");
 #endif
     }
 
     RegI32 captureReturnedI32() {
         RegI32 rv = RegI32(ReturnReg);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableI32(rv));
         needI32(rv);
         return rv;
     }
 
     RegI64 captureReturnedI64() {
         RegI64 rv = RegI64(ReturnReg64);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableI64(rv));
         needI64(rv);
         return rv;
     }
 
     RegF32 captureReturnedF32(const FunctionCall& call) {
         RegF32 rv = RegF32(ReturnFloat32Reg);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableF32(rv));
         needF32(rv);
 #if defined(JS_CODEGEN_ARM)
         if (call.usesSystemAbi && !call.hardFP)
             masm.ma_vxfer(r0, rv);
 #endif
         return rv;
     }
 
     RegF64 captureReturnedF64(const FunctionCall& call) {
         RegF64 rv = RegF64(ReturnDoubleReg);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableF64(rv));
         needF64(rv);
 #if defined(JS_CODEGEN_ARM)
         if (call.usesSystemAbi && !call.hardFP)
             masm.ma_vxfer(r0, r1, rv);
 #endif
         return rv;
     }
 
@@ -2916,17 +3034,17 @@ class BaseCompiler
             checkDivideByZeroI64(rhs);
 
         if (!isUnsigned && (!isConst || c == -1))
             checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false));
 
 # if defined(JS_CODEGEN_X64)
         // The caller must set up the following situation.
         MOZ_ASSERT(srcDest.reg == rax);
-        MOZ_ASSERT(isAvailable(rdx));
+        MOZ_ASSERT(isAvailableI64(specific_rdx));
         if (isUnsigned) {
             masm.xorq(rdx, rdx);
             masm.udivq(rhs.reg);
         } else {
             masm.cqo();
             masm.idivq(rhs.reg);
         }
 # else
@@ -2944,17 +3062,17 @@ class BaseCompiler
             checkDivideByZeroI64(rhs);
 
         if (!isUnsigned && (!isConst || c == -1))
             checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true));
 
 # if defined(JS_CODEGEN_X64)
         // The caller must set up the following situation.
         MOZ_ASSERT(srcDest.reg == rax);
-        MOZ_ASSERT(isAvailable(rdx));
+        MOZ_ASSERT(isAvailableI64(specific_rdx));
 
         if (isUnsigned) {
             masm.xorq(rdx, rdx);
             masm.udivq(rhs.reg);
         } else {
             masm.cqo();
             masm.idivq(rhs.reg);
         }
@@ -3435,32 +3553,31 @@ class BaseCompiler
         if (!check->omitBoundsCheck) {
             masm.wasmBoundsCheck(Assembler::AboveOrEqual, ptr,
                                  Address(tls, offsetof(TlsData, boundsCheckLimit)),
                                  trap(Trap::OutOfBounds));
         }
 #endif
     }
 
-    // This is the temp register passed as the last argument to load()
-    MOZ_MUST_USE size_t loadTemps(const MemoryAccessDesc& access) {
+    void needLoadTemps(const MemoryAccessDesc& access, RegI32* tmp1, RegI32* tmp2, RegI32* tmp3) {
 #if defined(JS_CODEGEN_ARM)
         if (IsUnaligned(access)) {
             switch (access.type()) {
+              case Scalar::Float64:
+		*tmp3 = needI32();
+		MOZ_FALLTHROUGH;
               case Scalar::Float32:
-                return 2;
-              case Scalar::Float64:
-                return 3;
+		*tmp2 = needI32();
+		MOZ_FALLTHROUGH;
               default:
-                return 1;
+		*tmp1 = needI32();
+		break;
             }
         }
-        return 0;
-#else
-        return 0;
 #endif
     }
 
     MOZ_MUST_USE bool needTlsForAccess(const AccessCheck& check) {
 #if defined(JS_CODEGEN_ARM)
         return !check.omitBoundsCheck;
 #elif defined(JS_CODEGEN_X86)
         return true;
@@ -3486,17 +3603,17 @@ class BaseCompiler
 #elif defined(JS_CODEGEN_X86)
         masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
         Operand srcAddr(ptr, access->offset());
 
         if (dest.tag == AnyReg::I64) {
             MOZ_ASSERT(dest.i64() == abiReturnRegI64);
             masm.wasmLoadI64(*access, srcAddr, dest.i64());
         } else {
-            bool byteRegConflict = access->byteSize() == 1 && !singleByteRegs_.has(dest.i32());
+            bool byteRegConflict = access->byteSize() == 1 && !ra.isSingleByteI32(dest.i32());
             AnyRegister out = byteRegConflict ? AnyRegister(ScratchRegX86) : dest.any();
 
             masm.wasmLoad(*access, srcAddr, out);
 
             if (byteRegConflict)
                 masm.mov(ScratchRegX86, dest.i32());
         }
 #elif defined(JS_CODEGEN_ARM)
@@ -3524,54 +3641,53 @@ class BaseCompiler
         }
 #else
         MOZ_CRASH("BaseCompiler platform hook: load");
 #endif
 
         return true;
     }
 
-    MOZ_MUST_USE size_t storeTemps(const MemoryAccessDesc& access, ValType srcType) {
+    void needStoreTemps(const MemoryAccessDesc& access, ValType srcType, RegI32* tmp) {
 #if defined(JS_CODEGEN_ARM)
         if (IsUnaligned(access) && srcType != ValType::I32)
-            return 1;
-#endif
-        return 0;
+	    *tmp = needI32();
+#endif
     }
 
     // ptr and src must not be the same register.
     // This may destroy ptr and src.
     MOZ_MUST_USE bool store(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr,
                             AnyReg src, RegI32 tmp)
     {
         prepareMemoryAccess(access, check, tls, ptr);
 
         // Emit the store
 #if defined(JS_CODEGEN_X64)
-        MOZ_ASSERT(tmp == Register::Invalid());
+        MOZ_ASSERT(tmp == invalidI32());
         Operand dstAddr(HeapReg, ptr, TimesOne, access->offset());
 
         masm.wasmStore(*access, src.any(), dstAddr);
 #elif defined(JS_CODEGEN_X86)
-        MOZ_ASSERT(tmp == Register::Invalid());
+        MOZ_ASSERT(tmp == invalidI32());
         masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
         Operand dstAddr(ptr, access->offset());
 
         if (access->type() == Scalar::Int64) {
             masm.wasmStoreI64(*access, src.i64(), dstAddr);
         } else {
             AnyRegister value;
             if (src.tag == AnyReg::I64) {
-                if (access->byteSize() == 1 && !singleByteRegs_.has(src.i64().low)) {
+                if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) {
                     masm.mov(src.i64().low, ScratchRegX86);
                     value = AnyRegister(ScratchRegX86);
                 } else {
                     value = AnyRegister(src.i64().low);
                 }
-            } else if (access->byteSize() == 1 && !singleByteRegs_.has(src.i32())) {
+            } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) {
                 masm.mov(src.i32(), ScratchRegX86);
                 value = AnyRegister(ScratchRegX86);
             } else {
                 value = src.any();
             }
 
             masm.wasmStore(*access, value, dstAddr);
         }
@@ -3583,22 +3699,22 @@ class BaseCompiler
                 break;
               case AnyReg::F32:
                 masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr, tmp);
                 break;
               case AnyReg::F64:
                 masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr, tmp);
                 break;
               default:
-                MOZ_ASSERT(tmp == Register::Invalid());
+                MOZ_ASSERT(tmp == invalidI32());
                 masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr);
                 break;
             }
         } else {
-            MOZ_ASSERT(tmp == Register::Invalid());
+            MOZ_ASSERT(tmp == invalidI32());
             if (access->type() == Scalar::Int64)
                 masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr);
             else if (src.tag == AnyReg::I64)
                 masm.wasmStore(*access, AnyRegister(src.i64().low), HeapReg, ptr, ptr);
             else
                 masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr);
         }
 #else
@@ -3656,40 +3772,39 @@ class BaseCompiler
 
         masm.atomicExchange64(srcAddr, rv, rd);
 
         if (wantResult)
             pushI64(rd);
         else
             freeI64(rd);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+	maybeFreeI32(tls);
         freeI32(rp);
 
 #if defined(JS_CODEGEN_X86)
         freeI32(specific_ecx);
 #elif defined(JS_CODEGEN_ARM)
         freeI64(rv);
 #else
         MOZ_CRASH("BaseCompiler porting interface: xchg64");
 #endif
     }
 
-    MOZ_MUST_USE uint32_t
-    atomicRMWTemps(AtomicOp op, MemoryAccessDesc* access) {
+    void needAtomicRMWTemps(AtomicOp op, MemoryAccessDesc* access, RegI32* tmp) {
 #if defined(JS_CODEGEN_X86)
         // Handled specially in atomicRMW
         if (access->byteSize() == 1)
-            return 0;
+            return;
 #endif
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
-        return op == AtomicFetchAddOp || op == AtomicFetchSubOp ? 0 : 1;
+        if (op != AtomicFetchAddOp && op != AtomicFetchSubOp)
+	    *tmp = needI32();
 #elif defined(JS_CODEGEN_ARM)
-        return 1;
+	*tmp = needI32();
 #else
         MOZ_CRASH("BaseCompiler platform hook: atomicRMWTemps");
 #endif
     }
 
     void
     atomicRMW(AtomicOp op, MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr,
               RegI32 rv, RegI32 rd, RegI32 tmp)
@@ -3742,24 +3857,24 @@ class BaseCompiler
             break;
           }
           default: {
             MOZ_CRASH("Bad type for atomic operation");
           }
         }
     }
 
-    MOZ_MUST_USE uint32_t
-    atomicRMW64Temps(AtomicOp op) {
+    void needAtomicRMW64Temps(AtomicOp op, RegI64* tmp) {
 #if defined(JS_CODEGEN_X86)
         MOZ_CRASH("Do not call on x86");
 #elif defined(JS_CODEGEN_X64)
-        return (op == AtomicFetchAddOp || op == AtomicFetchSubOp) ? 0 : 1;
+	if (op != AtomicFetchAddOp && op != AtomicFetchSubOp)
+	    *tmp = needI64();
 #elif defined(JS_CODEGEN_ARM)
-        return 1;
+	*tmp = needI64Pair();
 #else
         MOZ_CRASH("BaseCompiler platform hook: atomicRMW64Temps");
 #endif
     }
 
     // On x86, T is Address.  On other platforms, it is Register64.
     // U is BaseIndex or Address.
     template <typename T, typename U>
@@ -3782,17 +3897,17 @@ class BaseCompiler
         prepareMemoryAccess(access, check, tls, ptr);
         ATOMIC_PTR(srcAddr, access, tls, ptr);
 
         switch (access->type()) {
           case Scalar::Uint8: {
 #if defined(JS_CODEGEN_X86)
             ScratchEBX scratch(*this);
             MOZ_ASSERT(rd == specific_eax);
-            if (!singleByteRegs_.has(rnew)) {
+            if (!ra.isSingleByteI32(rnew)) {
                 // The replacement value must have a byte persona.
                 masm.movl(rnew, scratch);
                 rnew = RegI32(scratch);
             }
 #endif
             masm.compareExchange8ZeroExtend(srcAddr, rexpect, rnew, rd);
             break;
           }
@@ -3813,17 +3928,17 @@ class BaseCompiler
                    RegI32 rv, RegI32 rd)
     {
         prepareMemoryAccess(access, check, tls, ptr);
         ATOMIC_PTR(srcAddr, access, tls, ptr);
 
         switch (access->type()) {
           case Scalar::Uint8: {
 #if defined(JS_CODEGEN_X86)
-            if (!singleByteRegs_.has(rd)) {
+            if (!ra.isSingleByteI32(rd)) {
                 ScratchEBX scratch(*this);
                 // The output register must have a byte persona.
                 masm.atomicExchange8ZeroExtend(srcAddr, rv, scratch);
                 masm.movl(scratch, rd);
             } else {
                 masm.atomicExchange8ZeroExtend(srcAddr, rv, rd);
             }
 #else
@@ -4035,17 +4150,17 @@ class BaseCompiler
     // Lhs is Register, Register64, or FloatRegister.
     //
     // Rhs is either the same as Lhs, or an immediate expression compatible with
     // Lhs "when applicable".
 
     template<typename Cond, typename Lhs, typename Rhs>
     void jumpConditionalWithJoinReg(BranchState* b, Cond cond, Lhs lhs, Rhs rhs)
     {
-        AnyReg r = popJoinRegUnlessVoid(b->resultType);
+        Maybe<AnyReg> r = popJoinRegUnlessVoid(b->resultType);
 
         if (b->framePushed != BranchState::NoPop && willPopStackBeforeBranch(b->framePushed)) {
             Label notTaken;
             branchTo(b->invertBranch ? cond : Assembler::InvertCondition(cond), lhs, rhs, &notTaken);
             popStackBeforeBranch(b->framePushed);
             masm.jump(b->label);
             masm.bind(&notTaken);
         } else {
@@ -4360,18 +4475,17 @@ BaseCompiler::emitMultiplyI64()
     r1 = popI64();
     r0 = popI64ToSpecific(RegI64(Register64(specific_edx, specific_eax)));
     temp = needI32();
 #else
     pop2xI64(&r0, &r1);
     temp = needI32();
 #endif
     masm.mul64(r1, r0, temp);
-    if (temp != Register::Invalid())
-        freeI32(temp);
+    maybeFreeI32(temp);
     freeI64(r1);
     pushI64(r0);
 }
 
 void
 BaseCompiler::emitMultiplyF32()
 {
     RegF32 r0, r1;
@@ -4989,18 +5103,17 @@ BaseCompiler::emitRotrI64()
 {
     int64_t c;
     if (popConstI64(&c)) {
         RegI64 r = popI64();
         RegI32 temp;
         if (rotate64NeedsTemp())
             temp = needI32();
         masm.rotateRight64(Imm32(c & 63), r, r, temp);
-        if (temp != Register::Invalid())
-            freeI32(temp);
+	maybeFreeI32(temp);
         pushI64(r);
     } else {
         RegI64 r0, r1;
         pop2xI64ForShiftOrRotate(&r0, &r1);
         masm.rotateRight64(lowPart(r1), r0, r0, maybeHighPart(r1));
         freeI64(r1);
         pushI64(r0);
     }
@@ -5028,18 +5141,17 @@ BaseCompiler::emitRotlI64()
 {
     int64_t c;
     if (popConstI64(&c)) {
         RegI64 r = popI64();
         RegI32 temp;
         if (rotate64NeedsTemp())
             temp = needI32();
         masm.rotateLeft64(Imm32(c & 63), r, r, temp);
-        if (temp != Register::Invalid())
-            freeI32(temp);
+	maybeFreeI32(temp);
         pushI64(r);
     } else {
         RegI64 r0, r1;
         pop2xI64ForShiftOrRotate(&r0, &r1);
         masm.rotateLeft64(lowPart(r1), r0, r0, maybeHighPart(r1));
         freeI64(r1);
         pushI64(r0);
     }
@@ -5379,18 +5491,17 @@ void
 BaseCompiler::emitConvertU64ToF32()
 {
     RegI64 r0 = popI64();
     RegF32 f0 = needF32();
     RegI32 temp;
     if (convertI64ToFloatNeedsTemp(ValType::F32, IsUnsigned(true)))
         temp = needI32();
     convertI64ToF32(r0, IsUnsigned(true), f0, temp);
-    if (temp != Register::Invalid())
-        freeI32(temp);
+    maybeFreeI32(temp);
     freeI64(r0);
     pushF32(f0);
 }
 #endif
 
 void
 BaseCompiler::emitConvertF32ToF64()
 {
@@ -5436,18 +5547,17 @@ void
 BaseCompiler::emitConvertU64ToF64()
 {
     RegI64 r0 = popI64();
     RegF64 d0 = needF64();
     RegI32 temp;
     if (convertI64ToFloatNeedsTemp(ValType::F64, IsUnsigned(true)))
         temp = needI32();
     convertI64ToF64(r0, IsUnsigned(true), d0, temp);
-    if (temp != Register::Invalid())
-        freeI32(temp);
+    maybeFreeI32(temp);
     freeI64(r0);
     pushF64(d0);
 }
 #endif // I64_TO_FLOAT_CALLOUT
 
 void
 BaseCompiler::emitReinterpretI32AsF32()
 {
@@ -5658,17 +5768,17 @@ BaseCompiler::emitBlock()
 }
 
 void
 BaseCompiler::endBlock(ExprType type)
 {
     Control& block = controlItem();
 
     // Save the value.
-    AnyReg r;
+    Maybe<AnyReg> r;
     if (!deadCode_) {
         r = popJoinRegUnlessVoid(type);
         block.bceSafeOnExit &= bceSafe_;
     }
 
     // Leave the block.
     popStackOnBlockExit(block.framePushed);
     popValueStackTo(block.stackSize);
@@ -5711,17 +5821,17 @@ BaseCompiler::emitLoop()
     return true;
 }
 
 void
 BaseCompiler::endLoop(ExprType type)
 {
     Control& block = controlItem();
 
-    AnyReg r;
+    Maybe<AnyReg> r;
     if (!deadCode_) {
         r = popJoinRegUnlessVoid(type);
         // block.bceSafeOnExit need not be updated because it won't be used for
         // the fallthrough path.
     }
 
     popStackOnBlockExit(block.framePushed);
     popValueStackTo(block.stackSize);
@@ -5805,17 +5915,17 @@ BaseCompiler::emitElse()
     Control& ifThenElse = controlItem(0);
 
     // See comment in endIfThenElse, below.
 
     // Exit the "then" branch.
 
     ifThenElse.deadThenBranch = deadCode_;
 
-    AnyReg r;
+    Maybe<AnyReg> r;
     if (!deadCode_)
         r = popJoinRegUnlessVoid(thenType);
 
     popStackOnBlockExit(ifThenElse.framePushed);
     popValueStackTo(ifThenElse.stackSize);
 
     if (!deadCode_)
         masm.jump(&ifThenElse.label);
@@ -5842,18 +5952,17 @@ BaseCompiler::endIfThenElse(ExprType typ
     Control& ifThenElse = controlItem();
 
     // The expression type is not a reliable guide to what we'll find
     // on the stack, we could have (if E (i32.const 1) (unreachable))
     // in which case the "else" arm is AnyType but the type of the
     // full expression is I32.  So restore whatever's there, not what
     // we want to find there.  The "then" arm has the same constraint.
 
-    AnyReg r;
-
+    Maybe<AnyReg> r;
     if (!deadCode_) {
         r = popJoinRegUnlessVoid(type);
         ifThenElse.bceSafeOnExit &= bceSafe_;
     }
 
     popStackOnBlockExit(ifThenElse.framePushed);
     popValueStackTo(ifThenElse.stackSize);
 
@@ -5911,17 +6020,17 @@ BaseCompiler::emitBr()
         return true;
 
     Control& target = controlItem(relativeDepth);
     target.bceSafeOnExit &= bceSafe_;
 
     // Save any value in the designated join register, where the
     // normal block exit code will also leave it.
 
-    AnyReg r = popJoinRegUnlessVoid(type);
+    Maybe<AnyReg> r = popJoinRegUnlessVoid(type);
 
     popStackBeforeBranch(target.framePushed);
     masm.jump(&target.label);
 
     // The register holding the join value is free for the remainder
     // of this block.
 
     freeJoinRegUnlessVoid(r);
@@ -5971,17 +6080,17 @@ BaseCompiler::emitBrTable()
     // Don't use joinReg for rc
     maybeReserveJoinRegI(branchValueType);
 
     // Table switch value always on top.
     RegI32 rc = popI32();
 
     maybeUnreserveJoinRegI(branchValueType);
 
-    AnyReg r = popJoinRegUnlessVoid(branchValueType);
+    Maybe<AnyReg> r = popJoinRegUnlessVoid(branchValueType);
 
     Label dispatchCode;
     masm.branch32(Assembler::Below, rc, Imm32(depths.length()), &dispatchCode);
 
     // This is the out-of-range stub.  rc is dead here but we don't need it.
 
     popStackBeforeBranch(controlItem(defaultDepth).framePushed);
     controlItem(defaultDepth).bceSafeOnExit &= bceSafe_;
@@ -6706,17 +6815,17 @@ BaseCompiler::emitSetGlobal()
 // is aligned.
 //
 // (In addition, alignment checking of the pointer can be omitted if the pointer
 // has been checked in dominating code, but we don't do that yet.)
 
 // TODO / OPTIMIZE (bug 1329576): There are opportunities to generate better
 // code by not moving a constant address with a zero offset into a register.
 
-BaseCompiler::RegI32
+RegI32
 BaseCompiler::popMemoryAccess(MemoryAccessDesc* access, AccessCheck* check)
 {
     check->onlyPointerAlignment = (access->offset() & (access->byteSize() - 1)) == 0;
 
     int32_t addrTmp;
     if (popConstI32(&addrTmp)) {
         uint32_t addr = addrTmp;
 
@@ -6741,38 +6850,34 @@ BaseCompiler::popMemoryAccess(MemoryAcce
 
     uint32_t local;
     if (peekLocalI32(&local))
         bceCheckLocal(access, check, local);
 
     return popI32();
 }
 
-BaseCompiler::RegI32
+RegI32
 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check)
 {
-    RegI32 tls = invalidI32();
+    RegI32 tls;
     if (needTlsForAccess(check)) {
         tls = needI32();
         masm.loadWasmTlsRegFromFrame(tls);
     }
     return tls;
 }
 
 bool
 BaseCompiler::loadCommon(MemoryAccessDesc* access, ValType type)
 {
     AccessCheck check;
 
-    size_t temps = loadTemps(*access);
-    MOZ_ASSERT(temps <= 3);
-    RegI32 tmp1 = temps >= 1 ? needI32() : invalidI32();
-    RegI32 tmp2 = temps >= 2 ? needI32() : invalidI32();
-    RegI32 tmp3 = temps >= 3 ? needI32() : invalidI32();
-    RegI32 tls = invalidI32();
+    RegI32 tls, tmp1, tmp2, tmp3;
+    needLoadTemps(*access, &tmp1, &tmp2, &tmp3);
 
     switch (type) {
       case ValType::I32: {
         RegI32 rp = popMemoryAccess(access, &check);
 #ifdef JS_CODEGEN_ARM
         RegI32 rv = IsUnaligned(*access) ? needI32() : rp;
 #else
         RegI32 rv = rp;
@@ -6823,26 +6928,20 @@ BaseCompiler::loadCommon(MemoryAccessDes
         freeI32(rp);
         break;
       }
       default:
         MOZ_CRASH("load type");
         break;
     }
 
-    if (tls != invalidI32())
-        freeI32(tls);
-
-    MOZ_ASSERT(temps <= 3);
-    if (temps >= 1)
-        freeI32(tmp1);
-    if (temps >= 2)
-        freeI32(tmp2);
-    if (temps >= 3)
-        freeI32(tmp3);
+    maybeFreeI32(tls);
+    maybeFreeI32(tmp1);
+    maybeFreeI32(tmp2);
+    maybeFreeI32(tmp3);
 
     return true;
 }
 
 bool
 BaseCompiler::emitLoad(ValType type, Scalar::Type viewType)
 {
     LinearMemoryAddress<Nothing> addr;
@@ -6855,21 +6954,19 @@ BaseCompiler::emitLoad(ValType type, Sca
     MemoryAccessDesc access(viewType, addr.align, addr.offset, Some(bytecodeOffset()));
     return loadCommon(&access, type);
 }
 
 bool
 BaseCompiler::storeCommon(MemoryAccessDesc* access, ValType resultType)
 {
     AccessCheck check;
-    size_t temps = storeTemps(*access, resultType);
-
-    MOZ_ASSERT(temps <= 1);
-    RegI32 tmp = temps >= 1 ? needI32() : invalidI32();
-    RegI32 tls = invalidI32();
+
+    RegI32 tls, tmp;
+    needStoreTemps(*access, resultType, &tmp);
 
     switch (resultType) {
       case ValType::I32: {
         RegI32 rv = popI32();
         RegI32 rp = popMemoryAccess(access, &check);
         tls = maybeLoadTlsForAccess(check);
         if (!store(access, &check, tls, rp, AnyReg(rv), tmp))
             return false;
@@ -6907,22 +7004,18 @@ BaseCompiler::storeCommon(MemoryAccessDe
         freeF64(rv);
         break;
       }
       default:
         MOZ_CRASH("store type");
         break;
     }
 
-    if (tls != invalidI32())
-        freeI32(tls);
-
-    MOZ_ASSERT(temps <= 1);
-    if (temps >= 1)
-        freeI32(tmp);
+    maybeFreeI32(tls);
+    maybeFreeI32(tmp);
 
     return true;
 }
 
 bool
 BaseCompiler::emitStore(ValType resultType, Scalar::Type viewType)
 {
     LinearMemoryAddress<Nothing> addr;
@@ -7212,18 +7305,17 @@ BaseCompiler::emitAtomicCmpXchg(ValType 
         MOZ_CRASH("BaseCompiler porting interface: compareExchange");
 #endif
         AccessCheck check;
         RegI32 rp = popMemoryAccess(&access, &check);
         RegI32 tls = maybeLoadTlsForAccess(check);
 
         atomicCompareExchange(&access, &check, tls, rp, rexpect, rnew, rd);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+	maybeFreeI32(tls);
         freeI32(rp);
         freeI32(rnew);
         if (rexpect != rd)
             freeI32(rexpect);
 
         if (narrowing)
             pushU32AsI64(rd);
         else
@@ -7260,18 +7352,17 @@ BaseCompiler::emitAtomicCmpXchg(ValType 
     RegI32 rp = popMemoryAccess(&access, &check);
     RegI32 tls = maybeLoadTlsForAccess(check);
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
     masm.compareExchange64(srcAddr, rexpect, rreplace, rd);
 
     pushI64(rd);
 
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
     freeI32(rp);
 #if defined(JS_CODEGEN_X64)
     freeI64(rreplace);
 #elif defined(JS_CODEGEN_X86)
     freeI32(specific_ecx);
 #elif defined(JS_CODEGEN_ARM)
     freeI64(rexpect);
     freeI64(rreplace);
@@ -7307,35 +7398,34 @@ BaseCompiler::emitAtomicLoad(ValType typ
 # if defined(JS_CODEGEN_X86)
     needI32(specific_ecx);
     needI64(specific_edx_eax);
     // Claim scratch after the need() calls because they may need it to sync.
     ScratchEBX scratch(*this);
     RegI64 tmp = specific_ecx_ebx;
     RegI64 output = specific_edx_eax;
 # elif defined(JS_CODEGEN_ARM)
-    RegI64 tmp = invalidI64();
+    RegI64 tmp;
     RegI64 output = needI64Pair();
 # else
     RegI64 tmp, output;
     MOZ_CRASH("BaseCompiler porting interface: atomic load 64-bit");
 # endif
 
     AccessCheck check;
     RegI32 rp = popMemoryAccess(&access, &check);
     RegI32 tls = maybeLoadTlsForAccess(check);
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
 
     masm.atomicLoad64(srcAddr, tmp, output);
     pushI64(output);
 
     freeI32(rp);
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
 # if defined(JS_CODEGEN_X86)
     freeI32(specific_ecx);
 # elif defined(JS_CODEGEN_ARM)
     // Nothing
 # else
     MOZ_CRASH("BaseCompiler porting interface: atomic load 64-bit");
 # endif
 
@@ -7373,61 +7463,58 @@ BaseCompiler::emitAtomicRMW(ValType type
         RegI32 rv = narrowing ? popI64ToI32() : popI32();
         RegI32 rp = popMemoryAccess(&access, &check);
         RegI32 output = needI32();
 #else
         RegI32 rv, rp, output;
         MOZ_CRASH("BaseCompiler porting interface: atomic rmw");
 #endif
         RegI32 tls = maybeLoadTlsForAccess(check);
-        size_t temps = atomicRMWTemps(op, &access);
-        MOZ_ASSERT(temps <= 1);
-        RegI32 tmp = temps >= 1 ? needI32() : invalidI32();
+	RegI32 tmp;
+        needAtomicRMWTemps(op, &access, &tmp);
 
         atomicRMW(op, &access, &check, tls, rp, rv, output, tmp);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+        maybeFreeI32(tls);
+        maybeFreeI32(tmp);
         freeI32(rp);
         if (rv != output)
             freeI32(rv);
-        if (temps >= 1)
-            freeI32(tmp);
 
         if (narrowing)
             pushU32AsI64(output);
         else
             pushI32(output);
         return true;
     }
 
     MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
 
 #if defined(JS_CODEGEN_X86)
 
     sync();
 
-    allocGPR(eax);
+    needI32NoSync(specific_eax);
     ScratchEBX scratch(*this);           // Already allocated
-    allocGPR(ecx);
-    allocGPR(edx);
-    allocGPR(edi);
-    allocGPR(esi);
+    needI32NoSync(specific_ecx);
+    needI32NoSync(specific_edx);
+    needI32NoSync(specific_edi);
+    needI32NoSync(specific_esi);
 
     AccessCheck check;
     MOZ_ASSERT(needTlsForAccess(check));
 
     RegI64 tmp = specific_ecx_ebx;
     popI64ToSpecific(tmp);
 
-    RegI32 ptr = RegI32(esi);
+    RegI32 ptr = specific_esi;
     popI32ToSpecific(ptr);
 
-    RegI32 tls = RegI32(edi);
-    RegI32 memoryBase = RegI32(edi);     // Yes, same
+    RegI32 tls = specific_edi;
+    RegI32 memoryBase = specific_edi;     // Yes, same
     masm.loadWasmTlsRegFromFrame(tls);
 
     prepareMemoryAccess(&access, &check, tls, ptr);
     masm.movl(Operand(Address(tls, offsetof(TlsData, memoryBase))), memoryBase);
 
     masm.Push(ecx);
     masm.Push(ebx);
 
@@ -7435,19 +7522,19 @@ BaseCompiler::emitAtomicRMW(ValType type
 
     BaseIndex srcAddr(memoryBase, ptr, TimesOne, access.offset());
     Address value(esp, 0);
     atomicRMW64(op, value, srcAddr, tmp, rd);
 
     masm.freeStack(8);
 
     pushI64(rd);
-    freeGPR(ecx);
-    freeGPR(edi);
-    freeGPR(esi);
+    freeI32(specific_ecx);
+    freeI32(specific_edi);
+    freeI32(specific_esi);
 
 #else // !JS_CODEGEN_X86
 
     AccessCheck check;
 # if defined(JS_CODEGEN_X64)
     bool isAddSub = op == AtomicFetchAddOp || op == AtomicFetchSubOp;
     needI64(specific_rax);
     RegI64 rv = isAddSub ? popI64ToSpecific(specific_rax) : popI64();
@@ -7459,39 +7546,31 @@ BaseCompiler::emitAtomicRMW(ValType type
     RegI64 rd = needI64Pair();
 #  else
     RegI64 rv, rd;
     RegI32 rp;
     MOZ_CRASH("BaseCompiler porting interface: 64-bit atomic RMW");
 # endif
 
     RegI32 tls = maybeLoadTlsForAccess(check);
-    size_t temps = atomicRMW64Temps(op);
-    MOZ_ASSERT(temps <= 1);
-    RegI64 tmp = invalidI64();
-# ifdef JS_CODEGEN_ARM
-    if (temps >= 1) tmp = needI64Pair();
-# else
-    if (temps >= 1) tmp = needI64();
-# endif
+    RegI64 tmp;
+    needAtomicRMW64Temps(op, &tmp);
 
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
 
     atomicRMW64(op, rv, srcAddr, tmp, rd);
 
     pushI64(rd);
 
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
     freeI32(rp);
     if (rv != rd)
         freeI64(rv);
-    if (temps >= 1)
-        freeI64(tmp);
+    maybeFreeI64(tmp);
 
 #endif // !JS_CODEGEN_X86
 
     return true;
 }
 
 bool
 BaseCompiler::emitAtomicStore(ValType type, Scalar::Type viewType)
@@ -7544,18 +7623,17 @@ BaseCompiler::emitAtomicXchg(ValType typ
         RegI32 rd = rv;
 #else
         RegI32 rd = needI32();
 #endif
         RegI32 tls = maybeLoadTlsForAccess(check);
 
         atomicExchange(&access, &check, tls, rp, rv, rd);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+	maybeFreeI32(tls);
         freeI32(rp);
         if (rv != rd)
             freeI32(rv);
 
         if (narrowing)
             pushU32AsI64(rd);
         else
             pushI32(rd);
@@ -7575,18 +7653,17 @@ BaseCompiler::emitAtomicXchg(ValType typ
     RegI32 tls = maybeLoadTlsForAccess(check);
 
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
 
     masm.atomicExchange64(srcAddr, rv, rd);
     pushI64(rd);
 
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
     freeI32(rp);
     if (rv != rd)
         freeI64(rv);
 #else
     xchg64(&access, type, WantResult(true));
 #endif
 
     return true;
@@ -8452,17 +8529,17 @@ BaseCompiler::emitInitStackLocals()
     // this case we'll end up using 32-bit offsets on x64 for up to half of the
     // stores, though.)
 
     // Fully-unrolled case.
 
     if (initWords < 2 * unrollLimit)  {
         for (uint32_t i = low; i < high; i += wordSize)
             masm.storePtr(zero, Address(StackPointer, localOffsetToSPOffset(i + wordSize)));
-        freeGPR(zero);
+        freeI32(zero);
         return;
     }
 
     // Unrolled loop with a tail. Stores will use negative offsets. That's OK
     // for x86 and ARM, at least.
 
     // Compute pointer to the highest-addressed slot on the frame.
     RegI32 p = needI32();
@@ -8483,19 +8560,19 @@ BaseCompiler::emitInitStackLocals()
         masm.storePtr(zero, Address(p, -(wordSize * i)));
     masm.subPtr(Imm32(unrollLimit * wordSize), p);
     masm.branchPtr(Assembler::LessThan, lim, p, &again);
 
     // The tail.
     for (uint32_t i = 0; i < tailWords; ++i)
         masm.storePtr(zero, Address(p, -(wordSize * i)));
 
-    freeGPR(p);
-    freeGPR(lim);
-    freeGPR(zero);
+    freeI32(p);
+    freeI32(lim);
+    freeI32(zero);
 }
 
 BaseCompiler::BaseCompiler(const ModuleEnvironment& env,
                            Decoder& decoder,
                            const FuncCompileInput& func,
                            const ValTypeVector& locals,
                            bool debugEnabled,
                            TempAllocator* alloc,
@@ -8516,65 +8593,42 @@ BaseCompiler::BaseCompiler(const ModuleE
       bceSafe_(0),
       stackAddOffset_(0),
       mode_(mode),
       latentOp_(LatentOp::None),
       latentType_(ValType::I32),
       latentIntCmp_(Assembler::Equal),
       latentDoubleCmp_(Assembler::DoubleEqual),
       masm(*masm),
-      availGPR_(GeneralRegisterSet::All()),
-      availFPU_(FloatRegisterSet::All()),
-#ifdef DEBUG
-      scratchRegisterTaken_(false),
-#endif
+      ra(*this),
 #ifdef JS_CODEGEN_X64
       specific_rax(RegI64(Register64(rax))),
       specific_rcx(RegI64(Register64(rcx))),
       specific_rdx(RegI64(Register64(rdx))),
 #endif
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
       specific_eax(RegI32(eax)),
       specific_ecx(RegI32(ecx)),
       specific_edx(RegI32(edx)),
+      specific_edi(RegI32(edi)),
+      specific_esi(RegI32(esi)),
 #endif
 #ifdef JS_CODEGEN_X86
       specific_ecx_ebx(RegI64(Register64(ecx, ebx))),
       specific_edx_eax(RegI64(Register64(edx, eax))),
-      singleByteRegs_(GeneralRegisterSet(Registers::SingleByteRegs)),
       abiReturnRegI64(RegI64(Register64(edx, eax))),
 #endif
 #ifdef JS_CODEGEN_ARM
       abiReturnRegI64(ReturnReg64),
 #endif
       joinRegI32(RegI32(ReturnReg)),
       joinRegI64(RegI64(ReturnReg64)),
       joinRegF32(RegF32(ReturnFloat32Reg)),
       joinRegF64(RegF64(ReturnDoubleReg))
 {
-    // jit/RegisterAllocator.h: RegisterAllocator::RegisterAllocator()
-
-#if defined(JS_CODEGEN_X64)
-    availGPR_.take(HeapReg);
-#elif defined(JS_CODEGEN_ARM)
-    availGPR_.take(HeapReg);
-    availGPR_.take(ScratchRegARM);
-#elif defined(JS_CODEGEN_ARM64)
-    availGPR_.take(HeapReg);
-    availGPR_.take(HeapLenReg);
-#elif defined(JS_CODEGEN_X86)
-    availGPR_.take(ScratchRegX86);
-#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
-    availGPR_.take(HeapReg);
-#endif
-    availGPR_.take(FramePointer);
-
-#ifdef DEBUG
-    setupRegisterLeakCheck();
-#endif
 }
 
 bool
 BaseCompiler::init()
 {
     if (!SigD_.append(ValType::F64))
         return false;
     if (!SigF_.append(ValType::F32))