Bug 1275442 - Optimize imul by small constants. r=sunfish draft
authorMichael Bebenita <mbebenita@gmail.com>
Fri, 27 May 2016 12:41:03 -0700
changeset 373494 2b6eaab34cd90129be9626372319a82be6d030e7
parent 373486 e04f158343d2d3709977f46131a7752d81a7babe
child 522409 7eedabb3b40f76c91f39a3393373ab07126790fe
push id19768
push userbmo:mbebenita@gmail.com
push dateTue, 31 May 2016 20:10:51 +0000
reviewerssunfish
bugs1275442
milestone49.0a1
Bug 1275442 - Optimize imul by small constants. r=sunfish MozReview-Commit-ID: LeBiqgsFFwI
js/src/jit-test/tests/wasm/basic-integer.js
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/Lowering-x86-shared.cpp
--- a/js/src/jit-test/tests/wasm/basic-integer.js
+++ b/js/src/jit-test/tests/wasm/basic-integer.js
@@ -101,16 +101,19 @@ testUnary('i32', 'popcnt', 0xFFFFFFFF, 3
 
 testUnary('i32', 'eqz', 0, 1);
 testUnary('i32', 'eqz', 1, 0);
 testUnary('i32', 'eqz', 0xFFFFFFFF, 0);
 
 testBinary32('add', 40, 2, 42);
 testBinary32('sub', 40, 2, 38);
 testBinary32('mul', 40, 2, 80);
+// Test optimizations for small multiplier constants.
+for (var i = 0; i < 64; i++) testBinary32('mul', 1, i, i);
+testBinary32('mul', 1, 0x7fffffff, 0x7fffffff | 0);
 testBinary32('div_s', -40, 2, -20);
 testBinary32('div_u', -40, 2, 2147483628);
 testBinary32('rem_s', 40, -3, 1);
 testBinary32('rem_u', 40, -3, 40);
 testBinary32('and', 42, 6, 2);
 testBinary32('or', 42, 6, 46);
 testBinary32('xor', 42, 2, 40);
 testBinary32('shl', 40, 2, 160);
@@ -147,16 +150,18 @@ if (hasI64()) {
     testBinary64('add', "0x1234567887654321", -1, "0x1234567887654320");
     testBinary64('add', "0xffffffffffffffff", 1, 0);
     testBinary64('sub', 40, 2, 38);
     testBinary64('sub', "0x1234567887654321", "0x123456789", "0x12345677641fdb98");
     testBinary64('sub', 3, 5, -2);
     testBinary64('mul', 40, 2, 80);
     testBinary64('mul', -1, 2, -2);
     testBinary64('mul', 0x123456, "0x9876543210", "0xad77d2c5f941160");
+    // Test optimizations for small multiplier constants.
+    for (var i = 0; i < 64; i++) testBinary64('mul', 1, i, i);
     testBinary64('div_s', -40, 2, -20);
     testBinary64('div_s', "0x1234567887654321", 2, "0x91a2b3c43b2a190");
     testBinary64('div_s', "0x1234567887654321", "0x1000000000", "0x1234567");
     testBinary64('div_u', -40, 2, "0x7fffffffffffffec");
     testBinary64('div_u', "0x1234567887654321", 9, "0x205d0b80f0b4059");
     testBinary64('rem_s', 40, -3, 1);
     testBinary64('rem_s', "0x1234567887654321", "0x1000000000", "0x887654321");
     testBinary64('rem_s', "0x7fffffffffffffff", -1, 0);
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -15,16 +15,17 @@
 
 #include "jit/MacroAssembler-inl.h"
 #include "jit/shared/CodeGenerator-shared-inl.h"
 
 using namespace js;
 using namespace js::jit;
 
 using mozilla::DebugOnly;
+using mozilla::FloorLog2;
 
 static inline Register64
 ToRegister64(const LAllocation* a)
 {
     return Register64(ToRegister(a));
 }
 
 static inline Register64
@@ -408,16 +409,41 @@ CodeGeneratorX64::visitMulI64(LMulI64* l
             return;
           case 1:
             // nop
             return;
           case 2:
             masm.addq(lhs, lhs);
             return;
           default:
+            // Attempt to emit shl/lea pairs whenever the constant can be factored into two
+            // products that can be computed cheaply using shl and/or lea.
+            int64_t a = 0;
+            int64_t b = 0;
+            if ((constant % 9) == 0) {
+                a = 9;
+                b = constant / 9;
+            } else if ((constant % 5) == 0) {
+                a = 5;
+                b = constant / 5;
+            } else if ((constant % 3) == 0) {
+                a = 3;
+                b = constant / 3;
+            }
+            if (IsPowerOfTwo(b) || b == 3 || b == 5 || b == 9) {
+                Scale scale = ScaleFromElemWidth(a - 1);
+                masm.lea(Operand(lhs, lhs, scale, 0), lhs);
+                if (IsPowerOfTwo(b)) {
+                    masm.shlq(Imm32(FloorLog2(b)), lhs);
+                } else {
+                    Scale scale = ScaleFromElemWidth(b - 1);
+                    masm.lea(Operand(lhs, lhs, scale, 0), lhs);
+                }
+                return;
+            }
             if (constant > 0) {
                 // Use shift if constant is power of 2.
                 int32_t shift = mozilla::FloorLog2(constant);
                 if (int64_t(1) << shift == constant) {
                     masm.shlq(Imm32(shift), lhs);
                     return;
                 }
             }
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -965,16 +965,17 @@ class MulNegativeZeroCheck : public OutO
     }
 };
 
 void
 CodeGeneratorX86Shared::visitMulI(LMulI* ins)
 {
     const LAllocation* lhs = ins->lhs();
     const LAllocation* rhs = ins->rhs();
+    const LAllocation* lhsCopy = ins->lhsCopy();
     MMul* mul = ins->mir();
     MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow());
 
     if (rhs->isConstant()) {
         // Bailout on -0.0
         int32_t constant = ToInt32(rhs);
         if (mul->canBeNegativeZero() && constant <= 0) {
             Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal;
@@ -992,16 +993,51 @@ CodeGeneratorX86Shared::visitMulI(LMulI*
           case 1:
             // nop
             return; // escape overflow check;
           case 2:
             masm.addl(ToOperand(lhs), ToRegister(lhs));
             break;
           default:
             if (!mul->canOverflow() && constant > 0) {
+                if (IsPowerOfTwo(constant + 1)) {
+                    masm.shll(Imm32(FloorLog2(constant + 1)), ToRegister(lhs));
+                    masm.subl(ToOperand(lhsCopy), ToRegister(lhs));
+                    return;
+                } else if (IsPowerOfTwo(constant - 1)) {
+                    masm.shll(Imm32(FloorLog2(constant - 1)), ToRegister(lhs));
+                    masm.addl(ToOperand(lhsCopy), ToRegister(lhs));
+                    return;
+                } else {
+                    // Attempt to emit shl/lea pairs whenever the constant can be factored into two
+                    // products that can be computed cheaply using shl and/or lea.
+                    int32_t a = 0;
+                    int32_t b = 0;
+                    if ((constant % 9) == 0) {
+                        a = 9;
+                        b = constant / 9;
+                    } else if ((constant % 5) == 0) {
+                        a = 5;
+                        b = constant / 5;
+                    } else if ((constant % 3) == 0) {
+                        a = 3;
+                        b = constant / 3;
+                    }
+                    if (IsPowerOfTwo(b) || b == 3 || b == 5 || b == 9) {
+                        Scale scale = ScaleFromElemWidth(a - 1);
+                        masm.leal(Operand(ToRegister(lhs), ToRegister(lhs), scale, 0), ToRegister(lhs));
+                        if (IsPowerOfTwo(b)) {
+                            masm.shll(Imm32(FloorLog2(b)), ToRegister(lhs));
+                        } else {
+                            Scale scale = ScaleFromElemWidth(b - 1);
+                            masm.leal(Operand(ToRegister(lhs), ToRegister(lhs), scale, 0), ToRegister(lhs));
+                        }
+                        return;
+                    }
+                }
                 // Use shift if cannot overflow and constant is power of 2
                 int32_t shift = FloorLog2(constant);
                 if ((1 << shift) == constant) {
                     masm.shll(Imm32(shift), ToRegister(lhs));
                     return;
                 }
             }
             masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs));
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -185,18 +185,18 @@ LIRGeneratorX86Shared::lowerForBitAndAnd
     baab->setOperand(0, useRegisterAtStart(lhs));
     baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
     add(baab, mir);
 }
 
 void
 LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs)
 {
-    // Note: If we need a negative zero check, lhs is used twice.
-    LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
+    // Note: If we need a negative zero check or rhs is constant, lhs is used twice.
+    LAllocation lhsCopy = (rhs->isConstant() || mul->canBeNegativeZero()) ? use(lhs) : LAllocation();
     LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy);
     if (mul->fallible())
         assignSnapshot(lir, Bailout_DoubleOutput);
     defineReuseInput(lir, mul, 0);
 }
 
 void
 LIRGeneratorX86Shared::lowerDivI(MDiv* div)