Bug 1330012: When caching power table for filters, avoid and optimize out the degenerate cases. r?bas.schouten
MozReview-Commit-ID: GlJYE8SLwJC
--- a/gfx/2d/FilterNodeSoftware.cpp
+++ b/gfx/2d/FilterNodeSoftware.cpp
@@ -23,75 +23,89 @@
namespace mozilla {
namespace gfx {
namespace {
/**
* This class provides a way to get a pow() results in constant-time. It works
- * by caching 256 values for bases between 0 and 1 and a fixed exponent.
+ * by caching 129 ((1 << sCacheIndexPrecisionBits) + 1) values for bases between
+ * 0 and 1 and a fixed exponent.
**/
class PowCache
{
public:
PowCache()
+ : mNumPowTablePreSquares(-1)
{
- CacheForExponent(0.0f);
}
void CacheForExponent(Float aExponent)
{
- mExponent = aExponent;
+ // Since we are in the world where we only care about
+ // input and results in [0,1], there is no point in
+ // dealing with non-positive exponents.
+ if (aExponent <= 0) {
+ mNumPowTablePreSquares = -1;
+ return;
+ }
int numPreSquares = 0;
- while (numPreSquares < 5 && mExponent > (1 << (numPreSquares + 2))) {
+ while (numPreSquares < 5 && aExponent > (1 << (numPreSquares + 2))) {
numPreSquares++;
}
mNumPowTablePreSquares = numPreSquares;
for (size_t i = 0; i < sCacheSize; i++) {
// sCacheSize is chosen in such a way that a takes values
// from 0.0 to 1.0 inclusive.
Float a = i / Float(1 << sCacheIndexPrecisionBits);
MOZ_ASSERT(0.0f <= a && a <= 1.0f, "We only want to cache for bases between 0 and 1.");
for (int j = 0; j < mNumPowTablePreSquares; j++) {
a = sqrt(a);
}
- uint32_t cachedInt = pow(a, mExponent) * (1 << sOutputIntPrecisionBits);
+ uint32_t cachedInt = pow(a, aExponent) * (1 << sOutputIntPrecisionBits);
MOZ_ASSERT(cachedInt < (1 << (sizeof(mPowTable[i]) * 8)), "mPowCache integer type too small");
mPowTable[i] = cachedInt;
}
}
+ // Only call Pow() if HasPowerTable() would return true, to avoid complicating
+ // this code and having it just return (1 << sOutputIntPrecisionBits))
uint16_t Pow(uint16_t aBase)
{
+ MOZ_ASSERT(HasPowerTable());
// Results should be similar to what the following code would produce:
// Float x = Float(aBase) / (1 << sInputIntPrecisionBits);
- // return uint16_t(pow(x, mExponent) * (1 << sOutputIntPrecisionBits));
+ // return uint16_t(pow(x, aExponent) * (1 << sOutputIntPrecisionBits));
MOZ_ASSERT(aBase <= (1 << sInputIntPrecisionBits), "aBase needs to be between 0 and 1!");
uint32_t a = aBase;
for (int j = 0; j < mNumPowTablePreSquares; j++) {
a = a * a >> sInputIntPrecisionBits;
}
uint32_t i = a >> (sInputIntPrecisionBits - sCacheIndexPrecisionBits);
MOZ_ASSERT(i < sCacheSize, "out-of-bounds mPowTable access");
return mPowTable[i];
}
static const int sInputIntPrecisionBits = 15;
static const int sOutputIntPrecisionBits = 15;
static const int sCacheIndexPrecisionBits = 7;
+ inline bool HasPowerTable() const
+ {
+ return mNumPowTablePreSquares >= 0;
+ }
+
private:
static const size_t sCacheSize = (1 << sCacheIndexPrecisionBits) + 1;
- Float mExponent;
int mNumPowTablePreSquares;
uint16_t mPowTable[sCacheSize];
};
class PointLightSoftware
{
public:
bool SetAttribute(uint32_t aIndex, Float) { return false; }
@@ -3382,24 +3396,33 @@ SpotLightSoftware::GetVectorToLight(cons
uint32_t
SpotLightSoftware::GetColor(uint32_t aLightColor, const Point3D &aVectorToLight)
{
union {
uint32_t color;
uint8_t colorC[4];
};
- color = aLightColor;
+
Float dot = -aVectorToLight.DotProduct(mVectorFromFocusPointToLight);
- uint16_t doti = dot * (dot >= 0) * (1 << PowCache::sInputIntPrecisionBits);
- uint32_t tmp = mPowCache.Pow(doti) * (dot >= mLimitingConeCos);
- MOZ_ASSERT(tmp <= (1 << PowCache::sOutputIntPrecisionBits), "pow() result must not exceed 1.0");
- colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] * tmp) >> PowCache::sOutputIntPrecisionBits);
- colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] * tmp) >> PowCache::sOutputIntPrecisionBits);
- colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] * tmp) >> PowCache::sOutputIntPrecisionBits);
+ if (!mPowCache.HasPowerTable()) {
+ dot *= (dot >= mLimitingConeCos);
+ color = aLightColor;
+ colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] *= dot;
+ colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] *= dot;
+ colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] *= dot;
+ } else {
+ color = aLightColor;
+ uint16_t doti = dot * (dot >= 0) * (1 << PowCache::sInputIntPrecisionBits);
+ uint32_t tmp = mPowCache.Pow(doti) * (dot >= mLimitingConeCos);
+ MOZ_ASSERT(tmp <= (1 << PowCache::sOutputIntPrecisionBits), "pow() result must not exceed 1.0");
+ colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] * tmp) >> PowCache::sOutputIntPrecisionBits);
+ colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] * tmp) >> PowCache::sOutputIntPrecisionBits);
+ colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] * tmp) >> PowCache::sOutputIntPrecisionBits);
+ }
colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_A] = 255;
return color;
}
void
DistantLightSoftware::Prepare()
{
const double radPerDeg = M_PI / 180.0;
@@ -3628,16 +3651,19 @@ uint32_t
SpecularLightingSoftware::LightPixel(const Point3D &aNormal,
const Point3D &aVectorToLight,
uint32_t aColor)
{
Point3D vectorToEye(0, 0, 1);
Point3D halfwayVector = Normalized(aVectorToLight + vectorToEye);
Float dotNH = aNormal.DotProduct(halfwayVector);
uint16_t dotNHi = uint16_t(dotNH * (dotNH >= 0) * (1 << PowCache::sInputIntPrecisionBits));
+ // The exponent for specular is in [1,128] range, so we don't need to check and
+ // optimize for the "default power table" scenario here.
+ MOZ_ASSERT(mPowCache.HasPowerTable());
uint32_t specularNHi = uint32_t(mSpecularConstantInt) * mPowCache.Pow(dotNHi) >> 8;
union {
uint32_t bgra;
uint8_t components[4];
} color = { aColor };
color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B] =
umin(