Bug 1275442 - Optimize imul by small constants. r=sunfish
MozReview-Commit-ID: LeBiqgsFFwI
--- a/js/src/jit-test/tests/wasm/basic-integer.js
+++ b/js/src/jit-test/tests/wasm/basic-integer.js
@@ -101,16 +101,19 @@ testUnary('i32', 'popcnt', 0xFFFFFFFF, 3
testUnary('i32', 'eqz', 0, 1);
testUnary('i32', 'eqz', 1, 0);
testUnary('i32', 'eqz', 0xFFFFFFFF, 0);
testBinary32('add', 40, 2, 42);
testBinary32('sub', 40, 2, 38);
testBinary32('mul', 40, 2, 80);
+// Test optimizations for small multiplier constants.
+for (var i = 0; i < 64; i++) testBinary32('mul', 1, i, i);
+testBinary32('mul', 1, 0x7fffffff, 0x7fffffff | 0);
testBinary32('div_s', -40, 2, -20);
testBinary32('div_u', -40, 2, 2147483628);
testBinary32('rem_s', 40, -3, 1);
testBinary32('rem_u', 40, -3, 40);
testBinary32('and', 42, 6, 2);
testBinary32('or', 42, 6, 46);
testBinary32('xor', 42, 2, 40);
testBinary32('shl', 40, 2, 160);
@@ -147,16 +150,18 @@ if (hasI64()) {
testBinary64('add', "0x1234567887654321", -1, "0x1234567887654320");
testBinary64('add', "0xffffffffffffffff", 1, 0);
testBinary64('sub', 40, 2, 38);
testBinary64('sub', "0x1234567887654321", "0x123456789", "0x12345677641fdb98");
testBinary64('sub', 3, 5, -2);
testBinary64('mul', 40, 2, 80);
testBinary64('mul', -1, 2, -2);
testBinary64('mul', 0x123456, "0x9876543210", "0xad77d2c5f941160");
+ // Test optimizations for small multiplier constants.
+ for (var i = 0; i < 64; i++) testBinary64('mul', 1, i, i);
testBinary64('div_s', -40, 2, -20);
testBinary64('div_s', "0x1234567887654321", 2, "0x91a2b3c43b2a190");
testBinary64('div_s', "0x1234567887654321", "0x1000000000", "0x1234567");
testBinary64('div_u', -40, 2, "0x7fffffffffffffec");
testBinary64('div_u', "0x1234567887654321", 9, "0x205d0b80f0b4059");
testBinary64('rem_s', 40, -3, 1);
testBinary64('rem_s', "0x1234567887654321", "0x1000000000", "0x887654321");
testBinary64('rem_s', "0x7fffffffffffffff", -1, 0);
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -15,16 +15,17 @@
#include "jit/MacroAssembler-inl.h"
#include "jit/shared/CodeGenerator-shared-inl.h"
using namespace js;
using namespace js::jit;
using mozilla::DebugOnly;
+using mozilla::FloorLog2;
static inline Register64
ToRegister64(const LAllocation* a)
{
return Register64(ToRegister(a));
}
static inline Register64
@@ -408,16 +409,41 @@ CodeGeneratorX64::visitMulI64(LMulI64* l
return;
case 1:
// nop
return;
case 2:
masm.addq(lhs, lhs);
return;
default:
+ // Attempt to emit shl/lea pairs whenever the constant can be factored into two
+ // products that can be computed cheaply using shl and/or lea.
+ int64_t a = 0;
+ int64_t b = 0;
+ if ((constant % 9) == 0) {
+ a = 9;
+ b = constant / 9;
+ } else if ((constant % 5) == 0) {
+ a = 5;
+ b = constant / 5;
+ } else if ((constant % 3) == 0) {
+ a = 3;
+ b = constant / 3;
+ }
+ if (IsPowerOfTwo(b) || b == 3 || b == 5 || b == 9) {
+ Scale scale = ScaleFromElemWidth(a - 1);
+ masm.lea(Operand(lhs, lhs, scale, 0), lhs);
+ if (IsPowerOfTwo(b)) {
+ masm.shlq(Imm32(FloorLog2(b)), lhs);
+ } else {
+ Scale scale = ScaleFromElemWidth(b - 1);
+ masm.lea(Operand(lhs, lhs, scale, 0), lhs);
+ }
+ return;
+ }
if (constant > 0) {
// Use shift if constant is power of 2.
int32_t shift = mozilla::FloorLog2(constant);
if (int64_t(1) << shift == constant) {
masm.shlq(Imm32(shift), lhs);
return;
}
}
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -965,16 +965,17 @@ class MulNegativeZeroCheck : public OutO
}
};
void
CodeGeneratorX86Shared::visitMulI(LMulI* ins)
{
const LAllocation* lhs = ins->lhs();
const LAllocation* rhs = ins->rhs();
+ const LAllocation* lhsCopy = ins->lhsCopy();
MMul* mul = ins->mir();
MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow());
if (rhs->isConstant()) {
// Bailout on -0.0
int32_t constant = ToInt32(rhs);
if (mul->canBeNegativeZero() && constant <= 0) {
Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal;
@@ -992,16 +993,51 @@ CodeGeneratorX86Shared::visitMulI(LMulI*
case 1:
// nop
return; // escape overflow check;
case 2:
masm.addl(ToOperand(lhs), ToRegister(lhs));
break;
default:
if (!mul->canOverflow() && constant > 0) {
+ if (IsPowerOfTwo(constant + 1)) {
+ masm.shll(Imm32(FloorLog2(constant + 1)), ToRegister(lhs));
+ masm.subl(ToOperand(lhsCopy), ToRegister(lhs));
+ return;
+ } else if (IsPowerOfTwo(constant - 1)) {
+ masm.shll(Imm32(FloorLog2(constant - 1)), ToRegister(lhs));
+ masm.addl(ToOperand(lhsCopy), ToRegister(lhs));
+ return;
+ } else {
+ // Attempt to emit shl/lea pairs whenever the constant can be factored into two
+ // products that can be computed cheaply using shl and/or lea.
+ int32_t a = 0;
+ int32_t b = 0;
+ if ((constant % 9) == 0) {
+ a = 9;
+ b = constant / 9;
+ } else if ((constant % 5) == 0) {
+ a = 5;
+ b = constant / 5;
+ } else if ((constant % 3) == 0) {
+ a = 3;
+ b = constant / 3;
+ }
+ if (IsPowerOfTwo(b) || b == 3 || b == 5 || b == 9) {
+ Scale scale = ScaleFromElemWidth(a - 1);
+ masm.leal(Operand(ToRegister(lhs), ToRegister(lhs), scale, 0), ToRegister(lhs));
+ if (IsPowerOfTwo(b)) {
+ masm.shll(Imm32(FloorLog2(b)), ToRegister(lhs));
+ } else {
+ Scale scale = ScaleFromElemWidth(b - 1);
+ masm.leal(Operand(ToRegister(lhs), ToRegister(lhs), scale, 0), ToRegister(lhs));
+ }
+ return;
+ }
+ }
// Use shift if cannot overflow and constant is power of 2
int32_t shift = FloorLog2(constant);
if ((1 << shift) == constant) {
masm.shll(Imm32(shift), ToRegister(lhs));
return;
}
}
masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs));
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -185,18 +185,18 @@ LIRGeneratorX86Shared::lowerForBitAndAnd
baab->setOperand(0, useRegisterAtStart(lhs));
baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
add(baab, mir);
}
void
LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs)
{
- // Note: If we need a negative zero check, lhs is used twice.
- LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
+ // Note: If we need a negative zero check or rhs is constant, lhs is used twice.
+ LAllocation lhsCopy = (rhs->isConstant() || mul->canBeNegativeZero()) ? use(lhs) : LAllocation();
LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy);
if (mul->fallible())
assignSnapshot(lir, Bailout_DoubleOutput);
defineReuseInput(lir, mul, 0);
}
void
LIRGeneratorX86Shared::lowerDivI(MDiv* div)