Bug 1408461 - Update webrender to commit 7892f5364bc4d35c7a9b42949f0ace4cc54f8b3c. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Wed, 18 Oct 2017 12:37:51 -0400
changeset 682710 903528f45bbf6313d7a39ec96963be10ce9984dc
parent 682709 2fe4b5ce14d5c5f7d85ed4312b2312f3fa13adc0
child 682711 59eb71e038adb407e23a59338a31d40b9dcdd206
push id85109
push userkgupta@mozilla.com
push dateWed, 18 Oct 2017 16:42:10 +0000
Bug 1408461 - Update webrender to commit 7892f5364bc4d35c7a9b42949f0ace4cc54f8b3c. r?jrmuizel MozReview-Commit-ID: 1n7EzemevQX
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 The version of WebRender currently in the tree is:
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -6,31 +6,32 @@ license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 profiler = ["thread_profiler/thread_profiler"]
 debugger = ["ws", "serde_json", "serde", "serde_derive"]
+query = []
 app_units = "0.5.6"
 bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.15.2"
 fxhash = "0.2.1"
 gleam = "0.4.8"
 lazy_static = "0.2"
 log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
-bitflags = "0.9"
+bitflags = "1.0"
 thread_profiler = "0.1.1"
 plane-split = "0.6"
 ws = { optional = true, version = "0.7.3" }
 serde_json = { optional = true, version = "1.0" }
 serde = { optional = true, version = "1.0" }
 serde_derive = { optional = true, version = "1.0" }
--- a/gfx/webrender/examples/common/boilerplate.rs
+++ b/gfx/webrender/examples/common/boilerplate.rs
@@ -182,44 +182,44 @@ pub fn main_wrapper(example: &mut Exampl
                 glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Q)) => break 'outer,
                 ) => {
                     let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::PROFILER_DBG);
+                    flags.toggle(webrender::DebugFlags::PROFILER_DBG);
                 ) => {
                     let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::RENDER_TARGET_DBG);
+                    flags.toggle(webrender::DebugFlags::RENDER_TARGET_DBG);
                 ) => {
                     let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::TEXTURE_CACHE_DBG);
+                    flags.toggle(webrender::DebugFlags::TEXTURE_CACHE_DBG);
                 ) => {
                     let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::ALPHA_PRIM_DBG);
+                    flags.toggle(webrender::DebugFlags::ALPHA_PRIM_DBG);
                 ) => {
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush.glsl
@@ -0,0 +1,81 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+varying vec2 vLocalPos;
+flat varying vec4 vLocalRect;
+struct BrushInstance {
+    int picture_address;
+    int prim_address;
+BrushInstance load_brush() {
+	BrushInstance bi;
+    bi.picture_address = aData0.x;
+    bi.prim_address = aData0.y;
+    return bi;
+ The dynamic picture that this brush exists on. Right now, it
+ contains minimal information. In the future, it will describe
+ the transform mode of primitives on this picture, among other things.
+ */
+struct PictureTask {
+    RectWithSize target_rect;
+PictureTask fetch_picture_task(int index) {
+    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
+    vec4 target_rect = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
+    PictureTask task = PictureTask(RectWithSize(target_rect.xy, target_rect.zw));
+    return task;
+void main(void) {
+    // Load the brush instance from vertex attributes.
+    BrushInstance brush = load_brush();
+    // Fetch the dynamic picture that we are drawing on.
+    PictureTask pic_task = fetch_picture_task(brush.picture_address);
+    // Load the geometry for this brush. For now, this is simply the
+    // local rect of the primitive. In the future, this will support
+    // loading segment rects, and other rect formats (glyphs).
+    PrimitiveGeometry geom = fetch_primitive_geometry(brush.prim_address);
+    // Write the (p0,p1) form of the primitive rect and the local position
+    // of this vertex. Specific brush shaders can use this information to
+    // interpolate texture coordinates etc.
+    vLocalRect = vec4(geom.local_rect.p0, geom.local_rect.p0 + geom.local_rect.size);
+    // Right now - pictures only support local positions. In the future, this
+    // will be expanded to support transform picture types (the common kind).
+    vec2 pos = pic_task.target_rect.p0 + aPosition.xy * pic_task.target_rect.size;
+    vLocalPos = aPosition.xy * pic_task.target_rect.size / uDevicePixelRatio;
+    // Run the specific brush VS code to write interpolators.
+    brush_vs(brush.prim_address, vLocalRect);
+    // Write the final position transformed by the orthographic device-pixel projection.
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+void main(void) {
+    // Run the specific brush FS code to output the color.
+    vec4 color = brush_fs(vLocalPos, vLocalRect);
+    // TODO(gw): Handle pre-multiply common code here as required.
+    oFragColor = color;
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush_mask.glsl
@@ -0,0 +1,61 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include shared,prim_shared,ellipse
+flat varying float vClipMode;
+flat varying vec4 vClipCenter_Radius_TL;
+flat varying vec4 vClipCenter_Radius_TR;
+flat varying vec4 vClipCenter_Radius_BR;
+flat varying vec4 vClipCenter_Radius_BL;
+struct BrushPrimitive {
+    float clip_mode;
+    float radius;
+BrushPrimitive fetch_brush_primitive(int address) {
+    vec4 data = fetch_from_resource_cache_1(address);
+    return BrushPrimitive(data.x, data.y);
+void brush_vs(int prim_address, vec4 prim_rect) {
+    // Load the specific primitive.
+    BrushPrimitive prim = fetch_brush_primitive(prim_address + 2);
+    // Write clip parameters
+    vClipMode = prim.clip_mode;
+    vec2 r = vec2(prim.radius);
+    vClipCenter_Radius_TL = vec4(prim_rect.xy + vec2(r.x, r.y), r);
+    vClipCenter_Radius_TR = vec4(prim_rect.zy + vec2(-r.x, r.y), r);
+    vClipCenter_Radius_BR = vec4(prim_rect.zw + vec2(-r.x, -r.y), r);
+    vClipCenter_Radius_BL = vec4(prim_rect.xw + vec2(r.x, -r.y), r);
+vec4 brush_fs(vec2 local_pos, vec4 local_rect) {
+    // TODO(gw): The mask code below is super-inefficient. Once we
+    // start using primitive segments in brush shaders, this can
+    // be made much faster.
+    float d = 0.0;
+    // Check if in valid clip region.
+    if (local_pos.x >= local_rect.x && local_pos.x < local_rect.z &&
+        local_pos.y >= local_rect.y && local_pos.y < local_rect.w) {
+        // Apply ellipse clip on each corner.
+        d = rounded_rect(local_pos,
+                         vClipCenter_Radius_TL,
+                         vClipCenter_Radius_TR,
+                         vClipCenter_Radius_BR,
+                         vClipCenter_Radius_BL);
+    }
+    return vec4(mix(d, 1.0 - d, vClipMode));
+#include brush
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -26,17 +26,21 @@ void main(void) {
     RenderTaskData src_task = fetch_render_task(aBlurSourceTaskAddress);
     vec4 local_rect = task.data0;
     vec2 pos = mix(local_rect.xy,
                    local_rect.xy + local_rect.zw,
+#if defined WR_FEATURE_COLOR
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
     vUv.z = src_task.data1.x;
     vBlurRadius = 3 * int(task.data1.y);
     vSigma = task.data1.y;
     switch (aBlurDirection) {
         case DIR_HORIZONTAL:
             vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
@@ -53,53 +57,62 @@ void main(void) {
     vec2 uv1 = (src_task.data0.xy + src_task.data0.zw) / texture_size;
     vUv.xy = mix(uv0, uv1, aPosition.xy);
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+#if defined WR_FEATURE_COLOR
+#define SAMPLE_TYPE vec4
+#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#define SAMPLE_TYPE float
+#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
 // TODO(gw): Write a fast path blur that handles smaller blur radii
 //           with a offset / weight uniform table and a constant
 //           loop iteration count!
 // TODO(gw): Make use of the bilinear sampling trick to reduce
 //           the number of texture fetches needed for a gaussian blur.
 void main(void) {
-    vec4 original_color = texture(sCacheRGBA8, vUv);
+    SAMPLE_TYPE original_color = SAMPLE_TEXTURE(vUv);
     // TODO(gw): The gauss function gets NaNs when blur radius
     //           is zero. In the future, detect this earlier
     //           and skip the blur passes completely.
     if (vBlurRadius == 0) {
-        oFragColor = original_color;
+        oFragColor = vec4(original_color);
     // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
     vec3 gauss_coefficient;
     gauss_coefficient.x = 1.0 / (sqrt(2.0 * 3.14159265) * vSigma);
     gauss_coefficient.y = exp(-0.5 / (vSigma * vSigma));
     gauss_coefficient.z = gauss_coefficient.y * gauss_coefficient.y;
     float gauss_coefficient_sum = 0.0;
-    vec4 avg_color = original_color * gauss_coefficient.x;
+    SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x;
     gauss_coefficient_sum += gauss_coefficient.x;
     gauss_coefficient.xy *= gauss_coefficient.yz;
     for (int i=1 ; i <= vBlurRadius/2 ; ++i) {
         vec2 offset = vOffsetScale * float(i);
         vec2 st0 = clamp(vUv.xy - offset, vUvRect.xy, vUvRect.zw);
-        avg_color += texture(sCacheRGBA8, vec3(st0, vUv.z)) * gauss_coefficient.x;
+        avg_color += SAMPLE_TEXTURE(vec3(st0, vUv.z)) * gauss_coefficient.x;
         vec2 st1 = clamp(vUv.xy + offset, vUvRect.xy, vUvRect.zw);
-        avg_color += texture(sCacheRGBA8, vec3(st1, vUv.z)) * gauss_coefficient.x;
+        avg_color += SAMPLE_TEXTURE(vec3(st1, vUv.z)) * gauss_coefficient.x;
         gauss_coefficient_sum += 2.0 * gauss_coefficient.x;
         gauss_coefficient.xy *= gauss_coefficient.yz;
-    oFragColor = avg_color / gauss_coefficient_sum;
+    oFragColor = vec4(avg_color) / gauss_coefficient_sum;
deleted file mode 100644
--- a/gfx/webrender/res/cs_box_shadow.glsl
+++ /dev/null
@@ -1,188 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-#include shared,prim_shared
-varying vec2 vPos;
-flat varying vec2 vBorderRadii;
-flat varying float vBlurRadius;
-flat varying vec4 vBoxShadowRect;
-flat varying float vInverted;
-in ivec2 aPrimAddress;
-in int aTaskIndex;
-void main(void) {
-    RenderTaskData task = fetch_render_task(aTaskIndex);
-    BoxShadow bs = fetch_boxshadow_direct(ivec2(aPrimAddress.x + VECS_PER_PRIM_HEADER, aPrimAddress.y));
-    vec2 p0 = task.data0.xy;
-    vec2 p1 = p0 + task.data0.zw;
-    vec2 pos = mix(p0, p1, aPosition.xy);
-    vBorderRadii = bs.border_radius_edge_size_blur_radius_inverted.xx;
-    vBlurRadius = bs.border_radius_edge_size_blur_radius_inverted.z;
-    vInverted = bs.border_radius_edge_size_blur_radius_inverted.w;
-    vBoxShadowRect = vec4(bs.bs_rect.xy, bs.bs_rect.xy + bs.bs_rect.zw);
-    // The fragment shader expects logical units, beginning at where the
-    // blur radius begins.
-    // The first path of the equation gets the virtual position in
-    // logical pixels within the patch rectangle (accounting for
-    // bilinear offset). Then we add the start position of the
-    // box shadow rect and subtract the blur radius to get the
-    // virtual coordinates that the FS expects.
-    vPos = (pos - 1.0 - p0) / uDevicePixelRatio + bs.bs_rect.xy - vec2(2.0 * vBlurRadius);
-    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
-// See http://asciimath.org to render the equations here.
-// The Gaussian function used for blurring:
-//     G_sigma(x) = 1/sqrt(2 pi sigma^2) e^(-x^2/(2 sigma^2))
-float gauss(float x, float sigma) {
-    float sigmaPow2 = sigma * sigma;
-    return 1.0 / sqrt(6.283185307179586 * sigmaPow2) * exp(-(x * x) / (2.0 * sigmaPow2));
-// An approximation of the error function, which is related to the integral of the Gaussian
-// function:
-//     "erf"(x) = 2/sqrt(pi) int_0^x e^(-t^2) dt
-//              ~~ 1 - 1 / (1 + a_1 x + a_2 x^2 + a_3 x^3 + a_4 x^4)^4
-// where:
-//     a_1 = 0.278393, a_2 = 0.230389, a_3 = 0.000972, a_4 = 0.078108
-// This approximation is accurate to `5 xx 10^-4`, more than accurate enough for our purposes.
-// See: https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
-float erf(float x) {
-    bool negative = x < 0.0;
-    if (negative)
-        x = -x;
-    float x2 = x * x;
-    float x3 = x2 * x;
-    float x4 = x2 * x2;
-    float denom = 1.0 + 0.278393 * x + 0.230389 * x2 + 0.000972 * x3 + 0.078108 * x4;
-    float result = 1.0 - 1.0 / (denom * denom * denom * denom);
-    return negative ? -result : result;
-// A useful helper for calculating integrals of the Gaussian function via the error function:
-//      "erf"_sigma(x) = 2 int 1/sqrt(2 pi sigma^2) e^(-x^2/(2 sigma^2)) dx
-//                     = "erf"(x/(sigma sqrt(2)))
-float erfSigma(float x, float sigma) {
-    return erf(x / (sigma * 1.4142135623730951));
-// Returns the blurred color value from the box itself (not counting any rounded corners). `p_0` is
-// the vector distance to the top left corner of the box; `p_1` is the vector distance to its
-// bottom right corner.
-//      "colorFromRect"_sigma(p_0, p_1)
-//          = int_{p_{0_y}}^{p_{1_y}} int_{p_{1_x}}^{p_{0_x}} G_sigma(y) G_sigma(x) dx dy
-//          = 1/4 ("erf"_sigma(p_{1_x}) - "erf"_sigma(p_{0_x}))
-//              ("erf"_sigma(p_{1_y}) - "erf"_sigma(p_{0_y}))
-float colorFromRect(vec2 p0, vec2 p1, float sigma) {
-    return (erfSigma(p1.x, sigma) - erfSigma(p0.x, sigma)) *
-        (erfSigma(p1.y, sigma) - erfSigma(p0.y, sigma)) / 4.0;
-// Returns the `x` coordinate on the ellipse with the given radii for the given `y` coordinate:
-//      "ellipsePoint"(y, y_0, a, b) = a sqrt(1 - ((y - y_0) / b)^2)
-float ellipsePoint(float y, float y0, vec2 radii) {
-    float bStep = (y - y0) / radii.y;
-    return radii.x * sqrt(1.0 - bStep * bStep);
-// A helper function to compute the value that needs to be subtracted to accommodate the border
-// corners.
-//     "colorCutout"_sigma(x_{0_l}, x_{0_r}, y_0, y_{min}, y_{max}, a, b)
-//          = int_{y_{min}}^{y_{max}}
-//              int_{x_{0_r} + "ellipsePoint"(y, y_0, a, b)}^{x_{0_r} + a} G_sigma(y) G_sigma(x) dx
-//              + int_{x_{0_l} - a}^{x_{0_l} - "ellipsePoint"(y, y_0, a, b)} G_sigma(y) G_sigma(x)
-//                  dx dy
-//          = int_{y_{min}}^{y_{max}} 1/2 G_sigma(y)
-//              ("erf"_sigma(x_{0_r} + a) - "erf"_sigma(x_{0_r} + "ellipsePoint"(y, y_0, a, b)) +
-//               "erf"_sigma(x_{0_l} - "ellipsePoint"(y, y_0, a, b)) - "erf"_sigma(x_{0_l} - a))
-// with the outer integral evaluated numerically.
-float colorCutoutGeneral(float x0l,
-                         float x0r,
-                         float y0,
-                         float yMin,
-                         float yMax,
-                         vec2 radii,
-                         float sigma) {
-    float sum = 0.0;
-    for (float y = yMin; y <= yMax; y += 1.0) {
-        float xEllipsePoint = ellipsePoint(y, y0, radii);
-        sum += gauss(y, sigma) *
-            (erfSigma(x0r + radii.x, sigma) - erfSigma(x0r + xEllipsePoint, sigma) +
-             erfSigma(x0l - xEllipsePoint, sigma) - erfSigma(x0l - radii.x, sigma));
-    }
-    return sum / 2.0;
-// The value that needs to be subtracted to accommodate the top border corners.
-float colorCutoutTop(float x0l, float x0r, float y0, vec2 radii, float sigma) {
-    return colorCutoutGeneral(x0l, x0r, y0, y0, y0 + radii.y, radii, sigma);
-// The value that needs to be subtracted to accommodate the bottom border corners.
-float colorCutoutBottom(float x0l, float x0r, float y0, vec2 radii, float sigma) {
-    return colorCutoutGeneral(x0l, x0r, y0, y0 - radii.y, y0, radii, sigma);
-// The blurred color value for the point at `pos` with the top left corner of the box at
-// `p_{0_"rect"}` and the bottom right corner of the box at `p_{1_"rect"}`.
-float color(vec2 pos, vec2 p0Rect, vec2 p1Rect, vec2 radii, float sigma) {
-    // Compute the vector distances `p_0` and `p_1`.
-    vec2 p0 = p0Rect - pos, p1 = p1Rect - pos;
-    // Compute the basic color `"colorFromRect"_sigma(p_0, p_1)`. This is all we have to do if
-    // the box is unrounded.
-    float cRect = colorFromRect(p0, p1, sigma);
-    if (radii.x == 0.0 || radii.y == 0.0)
-        return cRect;
-    // Compute the inner corners of the box, taking border radii into account: `x_{0_l}`,
-    // `y_{0_t}`, `x_{0_r}`, and `y_{0_b}`.
-    float x0l = p0.x + radii.x;
-    float y0t = p1.y - radii.y;
-    float x0r = p1.x - radii.x;
-    float y0b = p0.y + radii.y;
-    // Compute the final color:
-    //
-    //     "colorFromRect"_sigma(p_0, p_1) -
-    //          ("colorCutoutTop"_sigma(x_{0_l}, x_{0_r}, y_{0_t}, a, b) +
-    //           "colorCutoutBottom"_sigma(x_{0_l}, x_{0_r}, y_{0_b}, a, b))
-    float cCutoutTop = colorCutoutTop(x0l, x0r, y0t, radii, sigma);
-    float cCutoutBottom = colorCutoutBottom(x0l, x0r, y0b, radii, sigma);
-    return cRect - (cCutoutTop + cCutoutBottom);
-void main(void) {
-    vec2 pos = vPos.xy;
-    vec2 p0Rect = vBoxShadowRect.xy, p1Rect = vBoxShadowRect.zw;
-    vec2 radii = vBorderRadii.xy;
-    float sigma = vBlurRadius / 2.0;
-    float value = color(pos, p0Rect, p1Rect, radii, sigma);
-    value = max(value, 0.0);
-    oFragColor = dither(vec4(vInverted == 1.0 ? 1.0 - value : value));
--- a/gfx/webrender/res/cs_clip_border.glsl
+++ b/gfx/webrender/res/cs_clip_border.glsl
@@ -150,29 +150,28 @@ void main(void) {
     float d0 = distance_to_line(vPoint_Tangent0.xy,
     float d1 = distance_to_line(vPoint_Tangent1.xy,
     // Get AA widths based on zoom / scale etc.
-    vec2 fw = fwidth(local_pos);
-    float afwidth = length(fw);
+    float aa_range = compute_aa_range(local_pos);
     // SDF subtract edges for dash clip
     float dash_distance = max(d0, -d1);
     // Get distance from dot.
     float dot_distance = distance(clip_relative_pos, vDotParams.xy) - vDotParams.z;
     // Select between dot/dash clip based on mode.
     float d = mix(dash_distance, dot_distance, vAlphaMask.x);
-    // Apply AA over half a device pixel for the clip.
-    d = 1.0 - smoothstep(0.0, 0.5 * afwidth, d);
+    // Apply AA.
+    d = distance_aa(aa_range, d);
     // Completely mask out clip if zero'ing out the rect.
     d = d * vAlphaMask.y;
     oFragColor = vec4(d, 0.0, 0.0, 1.0);
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -76,86 +76,45 @@ void main(void) {
     vPos = vi.local_pos;
     vClipMode = clip.rect.mode.x;
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_rect);
-    vClipCenter_Radius_TL = vec4(clip_rect.p0 + clip.top_left.outer_inner_radius.xy,
-                                 clip.top_left.outer_inner_radius.xy);
+    vec2 r_tl = clip.top_left.outer_inner_radius.xy;
+    vec2 r_tr = clip.top_right.outer_inner_radius.xy;
+    vec2 r_br = clip.bottom_right.outer_inner_radius.xy;
+    vec2 r_bl = clip.bottom_left.outer_inner_radius.xy;
-    vClipCenter_Radius_TR = vec4(clip_rect.p1.x - clip.top_right.outer_inner_radius.x,
-                                 clip_rect.p0.y + clip.top_right.outer_inner_radius.y,
-                                 clip.top_right.outer_inner_radius.xy);
+    vClipCenter_Radius_TL = vec4(clip_rect.p0 + r_tl, r_tl);
-    vClipCenter_Radius_BR = vec4(clip_rect.p1 - clip.bottom_right.outer_inner_radius.xy,
-                                 clip.bottom_right.outer_inner_radius.xy);
+    vClipCenter_Radius_TR = vec4(clip_rect.p1.x - r_tr.x,
+                                 clip_rect.p0.y + r_tr.y,
+                                 r_tr);
-    vClipCenter_Radius_BL = vec4(clip_rect.p0.x + clip.bottom_left.outer_inner_radius.x,
-                                 clip_rect.p1.y - clip.bottom_left.outer_inner_radius.y,
-                                 clip.bottom_left.outer_inner_radius.xy);
+    vClipCenter_Radius_BR = vec4(clip_rect.p1 - r_br, r_br);
+    vClipCenter_Radius_BL = vec4(clip_rect.p0.x + r_bl.x,
+                                 clip_rect.p1.y - r_bl.y,
+                                 r_bl);
-float clip_against_ellipse_if_needed(vec2 pos,
-                                     float current_distance,
-                                     vec4 ellipse_center_radius,
-                                     vec2 sign_modifier,
-                                     float afwidth) {
-    float ellipse_distance = distance_to_ellipse(pos - ellipse_center_radius.xy,
-                                                 ellipse_center_radius.zw);
-    return mix(current_distance,
-               ellipse_distance + afwidth,
-               all(lessThan(sign_modifier * pos, sign_modifier * ellipse_center_radius.xy)));
-float rounded_rect(vec2 pos) {
-    float current_distance = 0.0;
-    // Apply AA
-    float afwidth = 0.5 * length(fwidth(pos));
-    // Clip against each ellipse.
-    current_distance = clip_against_ellipse_if_needed(pos,
-                                                      current_distance,
-                                                      vClipCenter_Radius_TL,
-                                                      vec2(1.0),
-                                                      afwidth);
-    current_distance = clip_against_ellipse_if_needed(pos,
-                                                      current_distance,
-                                                      vClipCenter_Radius_TR,
-                                                      vec2(-1.0, 1.0),
-                                                      afwidth);
-    current_distance = clip_against_ellipse_if_needed(pos,
-                                                      current_distance,
-                                                      vClipCenter_Radius_BR,
-                                                      vec2(-1.0),
-                                                      afwidth);
-    current_distance = clip_against_ellipse_if_needed(pos,
-                                                      current_distance,
-                                                      vClipCenter_Radius_BL,
-                                                      vec2(1.0, -1.0),
-                                                      afwidth);
-    return smoothstep(0.0, afwidth, 1.0 - current_distance);
 void main(void) {
     float alpha = 1.f;
     vec2 local_pos = init_transform_fs(vPos, alpha);
-    float clip_alpha = rounded_rect(local_pos);
+    float clip_alpha = rounded_rect(local_pos,
+                                    vClipCenter_Radius_TL,
+                                    vClipCenter_Radius_TR,
+                                    vClipCenter_Radius_BR,
+                                    vClipCenter_Radius_BL);
     float combined_alpha = min(alpha, clip_alpha);
     // Select alpha or inverse alpha depending on clip in/out.
     float final_alpha = mix(combined_alpha, 1.0 - combined_alpha, vClipMode);
     oFragColor = vec4(final_alpha, 0.0, 0.0, 1.0);
--- a/gfx/webrender/res/cs_text_run.glsl
+++ b/gfx/webrender/res/cs_text_run.glsl
@@ -13,52 +13,52 @@ flat varying vec4 vColor;
 // as text-shadow.
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
-    int text_shadow_address = prim.user_data2;
+    int picture_address = prim.user_data2;
-    // Fetch the parent text-shadow for this primitive. This allows the code
+    // Fetch the owning picture for this primitive. This allows the code
     // below to normalize the glyph offsets relative to the original text
     // shadow rect, which is the union of all elements that make up this
     // text shadow. This allows the text shadow to be rendered at an
     // arbitrary location in a render target (provided by the render
     // task render_target_origin field).
-    PrimitiveGeometry shadow_geom = fetch_primitive_geometry(text_shadow_address);
-    TextShadow shadow = fetch_text_shadow(text_shadow_address + VECS_PER_PRIM_HEADER);
+    PrimitiveGeometry shadow_geom = fetch_primitive_geometry(picture_address);
+    Picture pic = fetch_picture(picture_address + VECS_PER_PRIM_HEADER);
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
     GlyphResource res = fetch_glyph_resource(resource_address);
     // Glyphs size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
     vec2 size = (res.uv_rect.zw - res.uv_rect.xy) * res.scale;
     vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
     vec2 origin = prim.task.render_target_origin +
-                  uDevicePixelRatio * (local_pos + shadow.offset - shadow_geom.local_rect.p0);
+                  uDevicePixelRatio * (local_pos + pic.offset - shadow_geom.local_rect.p0);
     vec4 local_rect = vec4(origin, size);
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
     vec2 pos = mix(local_rect.xy,
                    local_rect.xy + local_rect.zw,
     vUv = vec3(mix(st0, st1, aPosition.xy), res.layer);
-    vColor = shadow.color;
+    vColor = pic.color;
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 void main(void) {
     float a = texture(sColor0, vUv).a;
--- a/gfx/webrender/res/ellipse.glsl
+++ b/gfx/webrender/res/ellipse.glsl
@@ -61,9 +61,60 @@ float distance_to_ellipse(vec2 p, vec2 r
     // a performance win for the circle case too.
     if (radii.x == radii.y) {
         return length(p) - radii.x;
     } else {
         return sdEllipse(p, radii);
+float clip_against_ellipse_if_needed(
+    vec2 pos,
+    float current_distance,
+    vec4 ellipse_center_radius,
+    vec2 sign_modifier
+) {
+    float ellipse_distance = distance_to_ellipse(pos - ellipse_center_radius.xy,
+                                                 ellipse_center_radius.zw);
+    return mix(current_distance,
+               ellipse_distance,
+               all(lessThan(sign_modifier * pos, sign_modifier * ellipse_center_radius.xy)));
+float rounded_rect(vec2 pos,
+                   vec4 clip_center_radius_tl,
+                   vec4 clip_center_radius_tr,
+                   vec4 clip_center_radius_br,
+                   vec4 clip_center_radius_bl) {
+    // Start with a negative value (means "inside") for all fragments that are not
+    // in a corner. If the fragment is in a corner, one of the clip_against_ellipse_if_needed
+    // calls below will update it.
+    float current_distance = -1.0;
+    // Clip against each ellipse.
+    current_distance = clip_against_ellipse_if_needed(pos,
+                                                      current_distance,
+                                                      clip_center_radius_tl,
+                                                      vec2(1.0));
+    current_distance = clip_against_ellipse_if_needed(pos,
+                                                      current_distance,
+                                                      clip_center_radius_tr,
+                                                      vec2(-1.0, 1.0));
+    current_distance = clip_against_ellipse_if_needed(pos,
+                                                      current_distance,
+                                                      clip_center_radius_br,
+                                                      vec2(-1.0));
+    current_distance = clip_against_ellipse_if_needed(pos,
+                                                      current_distance,
+                                                      clip_center_radius_bl,
+                                                      vec2(1.0, -1.0));
+    // Apply AA
+    // See comment in ps_border_corner about the choice of constants.
+    float aa_range = compute_aa_range(pos);
+    return distance_aa(aa_range, current_distance);
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -654,25 +654,25 @@ struct Rectangle {
     vec4 color;
 Rectangle fetch_rectangle(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
     return Rectangle(data);
-struct TextShadow {
+struct Picture {
     vec4 color;
     vec2 offset;
     float blur_radius;
-TextShadow fetch_text_shadow(int address) {
+Picture fetch_picture(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
-    return TextShadow(data[0], data[1].xy, data[1].z);
+    return Picture(data[0], data[1].xy, data[1].z);
 struct TextRun {
     vec4 color;
     vec2 offset;
     int subpx_dir;
@@ -687,61 +687,73 @@ struct Image {
     vec4 sub_rect;                          // If negative, ignored.
 Image fetch_image(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
     return Image(data[0], data[1]);
-struct BoxShadow {
-    vec4 src_rect;
-    vec4 bs_rect;
-    vec4 color;
-    vec4 border_radius_edge_size_blur_radius_inverted;
-BoxShadow fetch_boxshadow(int address) {
-    vec4 data[4] = fetch_from_resource_cache_4(address);
-    return BoxShadow(data[0], data[1], data[2], data[3]);
-BoxShadow fetch_boxshadow_direct(ivec2 address) {
-    vec4 data[4] = fetch_from_resource_cache_4_direct(address);
-    return BoxShadow(data[0], data[1], data[2], data[3]);
 void write_clip(vec2 global_pos, ClipArea area) {
     vec2 texture_size = vec2(textureSize(sSharedCacheA8, 0).xy);
     vec2 uv = global_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
     vClipMaskUvBounds = area.task_bounds / texture_size.xyxy;
     vClipMaskUv = vec3(uv / texture_size, area.screen_origin_target_index.z);
+/// Find the appropriate half range to apply the AA smoothstep over.
+/// This range represents a coefficient to go from one CSS pixel to half a device pixel.
+float compute_aa_range(vec2 position) {
+    // The constant factor is chosen to compensate for the fact that length(fw) is equal
+    // to sqrt(2) times the device pixel ratio in the typical case. 0.5/sqrt(2) = 0.35355.
+    //
+    // This coefficient is chosen to ensure that any sample 0.5 pixels or more inside of
+    // the shape has no anti-aliasing applied to it (since pixels are sampled at their center,
+    // such a pixel (axis aligned) is fully inside the border). We need this so that antialiased
+    // curves properly connect with non-antialiased vertical or horizontal lines, among other things.
+    //
+    // Using larger aa steps is quite common when rendering shapes with distance fields.
+    // It gives a smoother (although blurrier look) by extending the range that is smoothed
+    // to produce the anti aliasing. In our case, however, extending the range inside of
+    // the shape causes noticeable artifacts at the junction between an antialiased corner
+    // and a straight edge.
+    // We may want to adjust this constant in specific scenarios (for example keep the principled
+    // value for straight edges where we want pixel-perfect equivalence with non antialiased lines
+    // when axis aligned, while selecting a larger and smoother aa range on curves).
+    return 0.35355 * length(fwidth(position));
+/// Return the blending coefficient to for distance antialiasing.
+/// 0.0 means inside the shape, 1.0 means outside.
+float distance_aa(float aa_range, float signed_distance) {
+    return 1.0 - smoothstep(-aa_range, aa_range, signed_distance);
 float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
     vec2 d = max(p0 - pos, pos - p1);
     return length(max(vec2(0.0), d)) + min(0.0, max(d.x, d.y));
 vec2 init_transform_fs(vec3 local_pos, out float fragment_alpha) {
     fragment_alpha = 1.0;
     vec2 pos = local_pos.xy / local_pos.z;
     // Now get the actual signed distance.
     float d = signed_distance_rect(pos, vLocalBounds.xy, vLocalBounds.zw);
     // Find the appropriate distance to apply the AA smoothstep over.
-    float afwidth = 0.5 * length(fwidth(pos.xy));
+    float aa_range = compute_aa_range(pos.xy);
     // Only apply AA to fragments outside the signed distance field.
-    fragment_alpha = 1.0 - smoothstep(0.0, afwidth, d);
+    fragment_alpha = distance_aa(aa_range, d);
     return pos;
 float do_clip() {
     // anything outside of the mask is considered transparent
     bvec4 inside = lessThanEqual(
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -319,21 +319,17 @@ void main(void) {
     alpha = 0.0;
     vec2 local_pos = init_transform_fs(vLocalPos, alpha);
     vec2 local_pos = vLocalPos;
     alpha = min(alpha, do_clip());
-    // Find the appropriate distance to apply the AA smoothstep over.
-    // Using 0.7 instead of 0.5 for the step compensates for the fact that smoothstep
-    // is smooth at its endpoints and has a steeper maximum slope than a linear ramp.
-    vec2 fw = fwidth(local_pos);
-    float aa_step = 0.7 * length(fw);
+    float aa_range = compute_aa_range(local_pos);
     float distance_for_color;
     float color_mix_factor;
     // Only apply the clip AA if inside the clip region. This is
     // necessary for correctness when the border width is greater
     // than the border radius.
     if (all(lessThan(local_pos * vClipSign, vClipCenter * vClipSign))) {
@@ -344,39 +340,36 @@ void main(void) {
         // error of half a pixel towards the exterior of the curve (See issue #1750).
         // This error is corrected by offsetting the distance by half a device pixel.
         // This not entirely correct: it leaves an error that varries between
         // 0 and (sqrt(2) - 1)/2 = 0.2 pixels but it is hardly noticeable and is better
         // than the constant sqrt(2)/2 px error without the correction.
         // To correct this exactly we would need to offset p by half a pixel in the
         // direction of the center of the ellipse (a different offset for each corner).
-        // A half device pixel in css pixels (using the average of width and height in case
-        // there is any kind of transform applied).
-        float half_px = 0.25 * (fw.x + fw.y);
         // Get signed distance from the inner/outer clips.
-        float d0 = distance_to_ellipse(p, vRadii0.xy) + half_px;
-        float d1 = distance_to_ellipse(p, vRadii0.zw) + half_px;
-        float d2 = distance_to_ellipse(p, vRadii1.xy) + half_px;
-        float d3 = distance_to_ellipse(p, vRadii1.zw) + half_px;
+        float d0 = distance_to_ellipse(p, vRadii0.xy);
+        float d1 = distance_to_ellipse(p, vRadii0.zw);
+        float d2 = distance_to_ellipse(p, vRadii1.xy);
+        float d3 = distance_to_ellipse(p, vRadii1.zw);
         // SDF subtract main radii
-        float d_main = max(d0, aa_step - d1);
+        float d_main = max(d0, -d1);
         // SDF subtract inner radii (double style borders)
-        float d_inner = max(d2 - aa_step, -d3);
+        float d_inner = max(d2, -d3);
         // Select how to combine the SDF based on border style.
         float d = mix(max(d_main, -d_inner), d_main, vSDFSelect);
         // Only apply AA to fragments outside the signed distance field.
-        alpha = min(alpha, 1.0 - smoothstep(0.0, aa_step, d));
+        alpha = min(alpha, distance_aa(aa_range, d));
         // Get the groove/ridge mix factor.
-        color_mix_factor = smoothstep(-aa_step, aa_step, -d2);
+        color_mix_factor = distance_aa(aa_range, d2);
     } else {
         // Handle the case where the fragment is outside the clip
         // region in a corner. This occurs when border width is
         // greater than border radius.
         // Get linear distances along horizontal and vertical edges.
         vec2 d0 = vClipSign.xx * (local_pos.xx - vEdgeDistance.xz);
         vec2 d1 = vClipSign.yy * (local_pos.yy - vEdgeDistance.yw);
@@ -398,14 +391,14 @@ void main(void) {
     // Mix inner/outer color.
     vec4 color0 = mix(vColor00, vColor01, color_mix_factor);
     vec4 color1 = mix(vColor10, vColor11, color_mix_factor);
     // Select color based on side of line. Get distance from the
     // reference line, and then apply AA along the edge.
     float ld = distance_to_line(vColorEdgeLine.xy, vColorEdgeLine.zw, local_pos);
-    float m = smoothstep(-aa_step, aa_step, ld);
+    float m = distance_aa(aa_range, -ld);
     vec4 color = mix(color0, color1, m);
     oFragColor = color * vec4(1.0, 1.0, 1.0, alpha);
--- a/gfx/webrender/res/ps_border_edge.glsl
+++ b/gfx/webrender/res/ps_border_edge.glsl
@@ -248,18 +248,17 @@ void main(void) {
     vec2 local_pos = init_transform_fs(vLocalPos, alpha);
     vec2 local_pos = vLocalPos;
     alpha = min(alpha, do_clip());
     // Find the appropriate distance to apply the step over.
-    vec2 fw = fwidth(local_pos);
-    float afwidth = length(fw);
+    float aa_range = compute_aa_range(local_pos);
     // Applies the math necessary to draw a style: double
     // border. In the case of a solid border, the vertex
     // shader sets interpolator values that make this have
     // no effect.
     // Select the x/y coord, depending on which axis this edge is.
     vec2 pos = mix(local_pos.xy, local_pos.yx, vAxisSelect);
@@ -286,18 +285,16 @@ void main(void) {
     float x = mod(pos.y - vClipParams.x, vClipParams.y);
     // Calculate dash alpha (on/off) based on dash length
     float dash_alpha = step(x, vClipParams.z);
     // Get the dot alpha
     vec2 dot_relative_pos = vec2(x, pos.x) - vClipParams.zw;
     float dot_distance = length(dot_relative_pos) - vClipParams.z;
-    float dot_alpha = 1.0 - smoothstep(-0.5 * afwidth,
-                                        0.5 * afwidth,
-                                        dot_distance);
+    float dot_alpha = distance_aa(aa_range, dot_distance);
     // Select between dot/dash alpha based on clip mode.
     alpha = min(alpha, mix(dash_alpha, dot_alpha, vClipSelect));
     oFragColor = color * vec4(1.0, 1.0, 1.0, alpha);
deleted file mode 100644
--- a/gfx/webrender/res/ps_box_shadow.glsl
+++ /dev/null
@@ -1,73 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-#include shared,prim_shared
-flat varying vec4 vColor;
-varying vec3 vUv;
-flat varying vec2 vMirrorPoint;
-flat varying vec4 vCacheUvRectCoords;
-#define BS_HEADER_VECS 4
-RectWithSize fetch_instance_geometry(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
-    return RectWithSize(data.xy, data.zw);
-void main(void) {
-    Primitive prim = load_primitive();
-    BoxShadow bs = fetch_boxshadow(prim.specific_prim_address);
-    RectWithSize segment_rect = fetch_instance_geometry(prim.specific_prim_address + BS_HEADER_VECS + prim.user_data0);
-    VertexInfo vi = write_vertex(segment_rect,
-                                 prim.local_clip_rect,
-                                 prim.z,
-                                 prim.layer,
-                                 prim.task,
-                                 prim.local_rect);
-    RenderTaskData child_task = fetch_render_task(prim.user_data1);
-    vUv.z = child_task.data1.x;
-    // Constant offsets to inset from bilinear filtering border.
-    vec2 patch_origin = child_task.data0.xy + vec2(1.0);
-    vec2 patch_size_device_pixels = child_task.data0.zw - vec2(2.0);
-    vec2 patch_size = patch_size_device_pixels / uDevicePixelRatio;
-    vUv.xy = (vi.local_pos - prim.local_rect.p0) / patch_size;
-    vMirrorPoint = 0.5 * prim.local_rect.size / patch_size;
-    vec2 texture_size = vec2(textureSize(sSharedCacheA8, 0));
-    vCacheUvRectCoords = vec4(patch_origin, patch_origin + patch_size_device_pixels) / texture_size.xyxy;
-    vColor = bs.color;
-    write_clip(vi.screen_pos, prim.clip_area);
-void main(void) {
-    vec4 clip_scale = vec4(1.0, 1.0, 1.0, do_clip());
-    // Mirror and stretch the box shadow corner over the entire
-    // primitives.
-    vec2 uv = vMirrorPoint - abs(vUv.xy - vMirrorPoint);
-    // Ensure that we don't fetch texels outside the box
-    // shadow corner. This can happen, for example, when
-    // drawing the outer parts of an inset box shadow.
-    uv = clamp(uv, vec2(0.0), vec2(1.0));
-    // Map the unit UV to the actual UV rect in the cache.
-    uv = mix(vCacheUvRectCoords.xy, vCacheUvRectCoords.zw, uv);
-    // Modulate the box shadow by the color.
-    float mask = texture(sSharedCacheA8, vec3(uv, vUv.z)).r;
-    oFragColor = clip_scale * dither(vColor * vec4(1.0, 1.0, 1.0, mask));
--- a/gfx/webrender/res/ps_cache_image.glsl
+++ b/gfx/webrender/res/ps_cache_image.glsl
@@ -2,16 +2,20 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include shared,prim_shared
 varying vec3 vUv;
 flat varying vec4 vUvBounds;
+#if defined WR_FEATURE_ALPHA
+flat varying vec4 vColor;
 // Draw a cached primitive (e.g. a blurred text run) from the
 // target cache to the framebuffer, applying tile clip boundaries.
 void main(void) {
     Primitive prim = load_primitive();
     VertexInfo vi = write_vertex(prim.local_rect,
@@ -19,27 +23,54 @@ void main(void) {
     RenderTaskData child_task = fetch_render_task(prim.user_data1);
     vUv.z = child_task.data1.x;
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+#if defined WR_FEATURE_COLOR
+    vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
+    Picture pic = fetch_picture(prim.specific_prim_address);
+    vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
+    vColor = pic.color;
     vec2 uv0 = child_task.data0.xy;
     vec2 uv1 = (child_task.data0.xy + child_task.data0.zw);
     vec2 f = (vi.local_pos - prim.local_rect.p0) / prim.local_rect.size;
     vUv.xy = mix(uv0 / texture_size,
                  uv1 / texture_size,
     vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+    write_clip(vi.screen_pos, prim.clip_area);
 void main(void) {
     vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
-    oFragColor = texture(sColor0, vec3(uv, vUv.z));
+#if defined WR_FEATURE_COLOR
+    vec4 color = texture(sColor0, vec3(uv, vUv.z));
+    vec4 color = vColor * texture(sColor1, vec3(uv, vUv.z)).r;
+    // Un-premultiply the color from sampling the gradient.
+    if (color.a > 0.0) {
+        color.rgb /= color.a;
+        // Apply the clip mask
+        color.a = min(color.a, do_clip());
+        // Pre-multiply the result.
+        color.rgb *= color.a;
+    }
+    oFragColor = color;
--- a/gfx/webrender/res/ps_line.glsl
+++ b/gfx/webrender/res/ps_line.glsl
@@ -93,29 +93,29 @@ void main(void) {
                            size.y * 0.5,
                            size.y * 0.75,
                            size.y * 0.5);
-    int text_shadow_address = prim.user_data0;
-    PrimitiveGeometry shadow_geom = fetch_primitive_geometry(text_shadow_address);
-    TextShadow shadow = fetch_text_shadow(text_shadow_address + VECS_PER_PRIM_HEADER);
+    int picture_address = prim.user_data0;
+    PrimitiveGeometry picture_geom = fetch_primitive_geometry(picture_address);
+    Picture pic = fetch_picture(picture_address + VECS_PER_PRIM_HEADER);
     vec2 device_origin = prim.task.render_target_origin +
-                         uDevicePixelRatio * (prim.local_rect.p0 + shadow.offset - shadow_geom.local_rect.p0);
+                         uDevicePixelRatio * (prim.local_rect.p0 + pic.offset - picture_geom.local_rect.p0);
     vec2 device_size = uDevicePixelRatio * prim.local_rect.size;
     vec2 device_pos = mix(device_origin,
                           device_origin + device_size,
-    vColor = shadow.color;
+    vColor = pic.color;
     vLocalPos = mix(prim.local_rect.p0,
                     prim.local_rect.p0 + prim.local_rect.size,
     gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
     vColor = line.color;
@@ -185,18 +185,17 @@ void main(void) {
         vec2 local_pos = vLocalPos;
         alpha = min(alpha, do_clip());
     // Find the appropriate distance to apply the step over.
-    vec2 fw = fwidth(local_pos);
-    float afwidth = length(fw);
+    float aa_range = compute_aa_range(local_pos);
     // Select the x/y coord, depending on which axis this edge is.
     vec2 pos = mix(local_pos.xy, local_pos.yx, vAxisSelect);
     switch (vStyle) {
         case LINE_STYLE_SOLID: {
@@ -210,19 +209,17 @@ void main(void) {
         case LINE_STYLE_DOTTED: {
             // Get the main-axis position relative to closest dot or dash.
             float x = mod(pos.x - vLocalOrigin.x, vParams.x);
             // Get the dot alpha
             vec2 dot_relative_pos = vec2(x, pos.y) - vParams.yz;
             float dot_distance = length(dot_relative_pos) - vParams.y;
-            alpha = min(alpha, 1.0 - smoothstep(-0.5 * afwidth,
-                                                0.5 * afwidth,
-                                                dot_distance));
+            alpha = min(alpha, distance_aa(aa_range, dot_distance));
         case LINE_STYLE_WAVY: {
             vec2 normalized_local_pos = pos - vLocalOrigin.xy;
             float y0 = vParams.y;
             float dy = vParams.z;
             float dx = vParams.w;
@@ -246,18 +243,16 @@ void main(void) {
             vec2 b2_1 = vec2(4.0 * dx,  y0);
             float d2 = approx_distance(normalized_local_pos, b0_1, b1_1, b2_1);
             // SDF union - this is needed to avoid artifacts where the
             // bezier curves join.
             float d = min(d1, d2);
             // Apply AA based on the thickness of the wave.
-            alpha = 1.0 - smoothstep(vParams.x - 0.5 * afwidth,
-                                     vParams.x + 0.5 * afwidth,
-                                     d);
+            alpha = distance_aa(aa_range, d - vParams.x);
     oFragColor = vColor * vec4(1.0, 1.0, 1.0, alpha);
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -84,18 +84,18 @@ void main(void) {
     //           a combination of mix() etc. Branching on
     //           a uniform is probably fast in most GPUs now though?
     vec4 modulate_color = vec4(0.0);
     switch (uMode) {
         case MODE_ALPHA:
             modulate_color = alpha * vColor;
         case MODE_SUBPX_PASS0:
-            modulate_color = vec4(alpha);
+            modulate_color = vec4(alpha) * vColor.a;
         case MODE_SUBPX_PASS1:
-            modulate_color = vColor;
+            modulate_color = alpha * vColor;
     oFragColor = color * modulate_color;
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -1,48 +1,59 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-use api::{ClipId, LayerPixel, LayerPoint, LayerRect, LayerSize};
+use api::{ClipId, DeviceIntRect, LayerPixel, LayerPoint, LayerRect, LayerSize};
 use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, PipelineId};
 use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity, StickyFrameInfo};
 use api::WorldPoint;
 use clip::{ClipRegion, ClipSources, ClipSourcesHandle, ClipStore};
-use clip_scroll_tree::TransformUpdateState;
+use clip_scroll_tree::{CoordinateSystemId, TransformUpdateState};
 use geometry::ray_intersects_rect;
-use spring::{Spring, DAMPING, STIFFNESS};
-use tiling::PackedLayerIndex;
-use util::MatrixHelpers;
+use gpu_cache::GpuCache;
+use render_task::{ClipChain, ClipChainNode, ClipWorkItem};
+use resource_cache::ResourceCache;
+use spring::{DAMPING, STIFFNESS, Spring};
+use std::rc::Rc;
+use tiling::{PackedLayer, PackedLayerIndex};
+use util::{MatrixHelpers, MaxRect};
 #[cfg(target_os = "macos")]
 const CAN_OVERSCROLL: bool = true;
 #[cfg(not(target_os = "macos"))]
 const CAN_OVERSCROLL: bool = false;
 pub struct ClipInfo {
     /// The clips for this node.
     pub clip_sources: ClipSourcesHandle,
     /// The packed layer index for this node, which is used to render a clip mask
     /// for it, if necessary.
     pub packed_layer_index: PackedLayerIndex,
+    /// Whether or not this clip node automatically creates a mask.
+    pub is_masking: bool,
 impl ClipInfo {
     pub fn new(
         clip_region: ClipRegion,
         packed_layer_index: PackedLayerIndex,
         clip_store: &mut ClipStore,
     ) -> ClipInfo {
+        let clip_sources = ClipSources::from(clip_region);
+        let is_masking = clip_sources.is_masking();
         ClipInfo {
-            clip_sources: clip_store.insert(ClipSources::from(clip_region)),
+            clip_sources: clip_store.insert(clip_sources),
+            is_masking,
 pub enum NodeType {
     /// A reference frame establishes a new coordinate space in the tree.
@@ -97,110 +108,102 @@ pub struct ClipScrollNode {
     /// Parent layer. If this is None, we are the root node.
     pub parent: Option<ClipId>,
     /// Child layers
     pub children: Vec<ClipId>,
     /// Whether or not this node is a reference frame.
     pub node_type: NodeType,
+    /// The node in the chain of clips that are necessary to clip display items
+    /// that have this ClipScrollNode as their clip parent. This will be used to
+    /// generate clip tasks.
+    pub clip_chain_node: ClipChain,
+    /// The intersected outer bounds of the clips for this node.
+    pub combined_clip_outer_bounds: DeviceIntRect,
+    /// The axis-aligned coordinate system id of this node.
+    pub coordinate_system_id: CoordinateSystemId,
 impl ClipScrollNode {
+    fn new(
+        pipeline_id: PipelineId,
+        parent_id: Option<ClipId>,
+        rect: &LayerRect,
+        node_type: NodeType
+    ) -> ClipScrollNode {
+        ClipScrollNode {
+            local_viewport_rect: *rect,
+            local_clip_rect: *rect,
+            combined_local_viewport_rect: LayerRect::zero(),
+            world_viewport_transform: LayerToWorldTransform::identity(),
+            world_content_transform: LayerToWorldTransform::identity(),
+            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
+            parent: parent_id,
+            children: Vec::new(),
+            pipeline_id,
+            node_type: node_type,
+            clip_chain_node: None,
+            combined_clip_outer_bounds: DeviceIntRect::max_rect(),
+            coordinate_system_id: CoordinateSystemId(0),
+        }
+    }
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_id: ClipId,
         frame_rect: &LayerRect,
         content_size: &LayerSize,
         scroll_sensitivity: ScrollSensitivity,
     ) -> ClipScrollNode {
-        ClipScrollNode {
-            local_viewport_rect: *frame_rect,
-            local_clip_rect: *frame_rect,
-            combined_local_viewport_rect: LayerRect::zero(),
-            world_viewport_transform: LayerToWorldTransform::identity(),
-            world_content_transform: LayerToWorldTransform::identity(),
-            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
-            parent: Some(parent_id),
-            children: Vec::new(),
-            pipeline_id,
-            node_type: NodeType::ScrollFrame(ScrollingState::new(
-                scroll_sensitivity,
-                LayerSize::new(
-                    (content_size.width - frame_rect.size.width).max(0.0),
-                    (content_size.height - frame_rect.size.height).max(0.0)
-                )
-            )),
-        }
+        let node_type = NodeType::ScrollFrame(ScrollingState::new(
+            scroll_sensitivity,
+            LayerSize::new(
+                (content_size.width - frame_rect.size.width).max(0.0),
+                (content_size.height - frame_rect.size.height).max(0.0)
+            )
+        ));
+        Self::new(pipeline_id, Some(parent_id), frame_rect, node_type)
     pub fn new_clip_node(
         pipeline_id: PipelineId,
         parent_id: ClipId,
         clip_info: ClipInfo,
         clip_rect: LayerRect,
     ) -> ClipScrollNode {
-        ClipScrollNode {
-            local_viewport_rect: clip_rect,
-            local_clip_rect: clip_rect,
-            combined_local_viewport_rect: LayerRect::zero(),
-            world_viewport_transform: LayerToWorldTransform::identity(),
-            world_content_transform: LayerToWorldTransform::identity(),
-            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
-            parent: Some(parent_id),
-            children: Vec::new(),
-            pipeline_id,
-            node_type: NodeType::Clip(clip_info),
-        }
+        Self::new(pipeline_id, Some(parent_id), &clip_rect, NodeType::Clip(clip_info))
     pub fn new_reference_frame(
         parent_id: Option<ClipId>,
-        local_viewport_rect: &LayerRect,
+        frame_rect: &LayerRect,
         transform: &LayerToScrollTransform,
         origin_in_parent_reference_frame: LayerVector2D,
         pipeline_id: PipelineId,
     ) -> ClipScrollNode {
         let info = ReferenceFrameInfo {
             transform: *transform,
-        ClipScrollNode {
-            local_viewport_rect: *local_viewport_rect,
-            local_clip_rect: *local_viewport_rect,
-            combined_local_viewport_rect: LayerRect::zero(),
-            world_viewport_transform: LayerToWorldTransform::identity(),
-            world_content_transform: LayerToWorldTransform::identity(),
-            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
-            parent: parent_id,
-            children: Vec::new(),
-            pipeline_id,
-            node_type: NodeType::ReferenceFrame(info),
-        }
+        Self::new(pipeline_id, parent_id, frame_rect, NodeType::ReferenceFrame(info))
     pub fn new_sticky_frame(
         parent_id: ClipId,
         frame_rect: LayerRect,
         sticky_frame_info: StickyFrameInfo,
         pipeline_id: PipelineId,
     ) -> ClipScrollNode {
-        ClipScrollNode {
-            local_viewport_rect: frame_rect,
-            local_clip_rect: frame_rect,
-            combined_local_viewport_rect: LayerRect::zero(),
-            world_viewport_transform: LayerToWorldTransform::identity(),
-            world_content_transform: LayerToWorldTransform::identity(),
-            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
-            parent: Some(parent_id),
-            children: Vec::new(),
-            pipeline_id,
-            node_type: NodeType::StickyFrame(sticky_frame_info, LayerVector2D::zero()),
-        }
+        let node_type = NodeType::StickyFrame(sticky_frame_info, LayerVector2D::zero());
+        Self::new(pipeline_id, Some(parent_id), &frame_rect, node_type)
     pub fn add_child(&mut self, child: ClipId) {
     pub fn apply_old_scrolling_state(&mut self, new_scrolling: &ScrollingState) {
@@ -250,17 +253,89 @@ impl ClipScrollNode {
         scrolling.offset = new_offset;
         scrolling.bouncing_back = false;
         scrolling.started_bouncing_back = false;
-    pub fn update_transform(&mut self, state: &TransformUpdateState) {
+    pub fn update_clip_work_item(
+        &mut self,
+        state: &mut TransformUpdateState,
+        screen_rect: &DeviceIntRect,
+        device_pixel_ratio: f32,
+        packed_layers: &mut Vec<PackedLayer>,
+        clip_store: &mut ClipStore,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+    ) {
+        self.coordinate_system_id = state.current_coordinate_system_id;
+        let current_clip_chain = state.parent_clip_chain.clone();
+        let clip_info = match self.node_type {
+            NodeType::Clip(ref mut info) if info.is_masking => info,
+            _ => {
+                self.clip_chain_node = current_clip_chain;
+                self.combined_clip_outer_bounds = state.combined_outer_clip_bounds;
+                return;
+            }
+        };
+        // The coordinates of the mask are relative to the origin of the node itself,
+        // so we need to account for that origin in the transformation we assign to
+        // the packed layer.
+        let transform = self.world_viewport_transform
+            .pre_translate(self.local_viewport_rect.origin.to_vector().to_3d());
+        let packed_layer = &mut packed_layers[clip_info.packed_layer_index.0];
+        if packed_layer.set_transform(transform) {
+            // Meanwhile, the combined viewport rect is relative to the reference frame, so
+            // we move it into the local coordinate system of the node.
+            let local_viewport_rect = self.combined_local_viewport_rect
+                .translate(&-self.local_viewport_rect.origin.to_vector());
+            packed_layer.set_rect(
+                &local_viewport_rect,
+                screen_rect,
+                device_pixel_ratio,
+            );
+        }
+        let clip_sources = clip_store.get_mut(&clip_info.clip_sources);
+        clip_sources.update(
+            &transform,
+            gpu_cache,
+            resource_cache,
+            device_pixel_ratio,
+        );
+        let outer_bounds = clip_sources.bounds.outer.as_ref().map_or_else(
+            DeviceIntRect::zero,
+            |rect| rect.device_rect
+        );
+        self.combined_clip_outer_bounds = outer_bounds.intersection(
+            &state.combined_outer_clip_bounds).unwrap_or_else(DeviceIntRect::zero);
+        // TODO: Combine rectangles in the same axis-aligned clip space here?
+        self.clip_chain_node = Some(Rc::new(ClipChainNode {
+            work_item: ClipWorkItem {
+                layer_index: clip_info.packed_layer_index,
+                clip_sources: clip_info.clip_sources.weak(),
+                coordinate_system_id: state.current_coordinate_system_id,
+            },
+            prev: current_clip_chain,
+        }));
+        state.combined_outer_clip_bounds = self.combined_clip_outer_bounds;
+        state.parent_clip_chain = self.clip_chain_node.clone();
+    }
+    pub fn update_transform(&mut self, state: &mut TransformUpdateState) {
         // We calculate this here to avoid a double-borrow later.
         let sticky_offset = self.calculate_sticky_offset(
         let (local_transform, accumulated_scroll_offset) = match self.node_type {
             NodeType::ReferenceFrame(ref info) => {
@@ -310,16 +385,55 @@ impl ClipScrollNode {
         // The transformation for any content inside of us is the viewport transformation, plus
         // whatever scrolling offset we supply as well.
         let scroll_offset = self.scroll_offset();
         self.world_content_transform = self.world_viewport_transform
+        // The transformation we are passing is the transformation of the parent
+        // reference frame and the offset is the accumulated offset of all the nodes
+        // between us and the parent reference frame. If we are a reference frame,
+        // we need to reset both these values.
+        match self.node_type {
+            NodeType::ReferenceFrame(ref info) => {
+                state.parent_reference_frame_transform = self.world_viewport_transform;
+                state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
+                state.parent_accumulated_scroll_offset = LayerVector2D::zero();
+                state.nearest_scrolling_ancestor_viewport =
+                    state.nearest_scrolling_ancestor_viewport
+                       .translate(&info.origin_in_parent_reference_frame);
+                if !info.transform.preserves_2d_axis_alignment() {
+                    state.current_coordinate_system_id = state.next_coordinate_system_id;
+                    state.next_coordinate_system_id = state.next_coordinate_system_id.next();
+                }
+            },
+            NodeType::Clip(..) => {
+                state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
+            },
+            NodeType::ScrollFrame(ref scrolling) => {
+                state.parent_combined_viewport_rect =
+                        self.combined_local_viewport_rect.translate(&-scrolling.offset);
+                state.parent_accumulated_scroll_offset =
+                    scrolling.offset + state.parent_accumulated_scroll_offset;
+                state.nearest_scrolling_ancestor_offset = scrolling.offset;
+                state.nearest_scrolling_ancestor_viewport = self.local_viewport_rect;
+            }
+            NodeType::StickyFrame(_, sticky_offset) => {
+                // We don't translate the combined rect by the sticky offset, because sticky
+                // offsets actually adjust the node position itself, whereas scroll offsets
+                // only apply to contents inside the node.
+                state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
+                state.parent_accumulated_scroll_offset =
+                    sticky_offset + state.parent_accumulated_scroll_offset;
+            }
+        }
     fn calculate_sticky_offset(
         viewport_scroll_offset: &LayerVector2D,
         viewport_rect: &LayerRect,
     ) -> LayerVector2D {
         let sticky_frame_info = match self.node_type {
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -1,22 +1,41 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-use api::{ClipId, LayerPoint, LayerRect, LayerToScrollTransform, LayerToWorldTransform};
-use api::{LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
-use api::{ScrollLocation, StickyFrameInfo, WorldPoint};
+use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect};
+use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, PipelineId};
+use api::{ScrollClamping, ScrollEventPhase, ScrollLayerState, ScrollLocation, StickyFrameInfo};
+use api::WorldPoint;
 use clip::ClipStore;
 use clip_scroll_node::{ClipScrollNode, NodeType, ScrollingState};
+use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
+use render_task::ClipChain;
+use resource_cache::ResourceCache;
+use tiling::PackedLayer;
 pub type ScrollStates = FastHashMap<ClipId, ScrollingState>;
+/// An id that identifies coordinate systems in the ClipScrollTree. Each
+/// coordinate system has an id and those ids will be shared when the coordinates
+/// system are the same or are in the same axis-aligned space. This allows
+/// for optimizing mask generation.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct CoordinateSystemId(pub u32);
+impl CoordinateSystemId {
+    pub fn next(&self) -> CoordinateSystemId {
+        let CoordinateSystemId(id) = *self;
+        CoordinateSystemId(id + 1)
+    }
 pub struct ClipScrollTree {
     pub nodes: FastHashMap<ClipId, ClipScrollNode>,
     pub pending_scroll_offsets: FastHashMap<ClipId, (LayerPoint, ScrollClamping)>,
     /// The ClipId of the currently scrolling node. Used to allow the same
     /// node to scroll even if a touch operation leaves the boundaries of that node.
     pub currently_scrolling_node_id: Option<ClipId>,
@@ -33,22 +52,32 @@ pub struct ClipScrollTree {
     /// Initially this ID is not valid, which is indicated by ```nodes``` being empty.
     pub topmost_scrolling_node_id: ClipId,
     /// A set of pipelines which should be discarded the next time this
     /// tree is drained.
     pub pipelines_to_discard: FastHashSet<PipelineId>,
 pub struct TransformUpdateState {
     pub parent_reference_frame_transform: LayerToWorldTransform,
     pub parent_combined_viewport_rect: LayerRect,
     pub parent_accumulated_scroll_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_viewport: LayerRect,
+    pub parent_clip_chain: ClipChain,
+    pub combined_outer_clip_bounds: DeviceIntRect,
+    /// An id for keeping track of the axis-aligned space of this node. This is used in
+    /// order to to track what kinds of clip optimizations can be done for a particular
+    /// display list item, since optimizations can usually only be done among
+    /// coordinate systems which are relatively axis aligned.
+    pub current_coordinate_system_id: CoordinateSystemId,
+    pub next_coordinate_system_id: CoordinateSystemId,
 impl ClipScrollTree {
     pub fn new() -> ClipScrollTree {
         let dummy_pipeline = PipelineId::dummy();
         ClipScrollTree {
             nodes: FastHashMap::default(),
             pending_scroll_offsets: FastHashMap::default(),
@@ -292,90 +321,105 @@ impl ClipScrollTree {
             .scroll(scroll_location, phase)
-    pub fn update_all_node_transforms(&mut self, pan: LayerPoint) {
+    pub fn update_all_node_transforms(
+        &mut self,
+        screen_rect: &DeviceIntRect,
+        device_pixel_ratio: f32,
+        packed_layers: &mut Vec<PackedLayer>,
+        clip_store: &mut ClipStore,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+        pan: LayerPoint,
+    ) {
         if self.nodes.is_empty() {
         let root_reference_frame_id = self.root_reference_frame_id();
         let root_viewport = self.nodes[&root_reference_frame_id].local_clip_rect;
-        let state = TransformUpdateState {
+        let mut state = TransformUpdateState {
             parent_reference_frame_transform: LayerToWorldTransform::create_translation(
             parent_combined_viewport_rect: root_viewport,
             parent_accumulated_scroll_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_viewport: LayerRect::zero(),
+            parent_clip_chain: None,
+            combined_outer_clip_bounds: *screen_rect,
+            current_coordinate_system_id: CoordinateSystemId(0),
+            next_coordinate_system_id: CoordinateSystemId(0).next(),
-        self.update_node_transform(root_reference_frame_id, &state);
+        self.update_node_transform(
+            root_reference_frame_id,
+            &mut state,
+            &screen_rect,
+            device_pixel_ratio,
+            packed_layers,
+            clip_store,
+            resource_cache,
+            gpu_cache,
+        );
-    fn update_node_transform(&mut self, layer_id: ClipId, state: &TransformUpdateState) {
+    fn update_node_transform(
+        &mut self,
+        layer_id: ClipId,
+        state: &mut TransformUpdateState,
+        screen_rect: &DeviceIntRect,
+        device_pixel_ratio: f32,
+        packed_layers: &mut Vec<PackedLayer>,
+        clip_store: &mut ClipStore,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+    ) {
         // TODO(gw): This is an ugly borrow check workaround to clone these.
         //           Restructure this to avoid the clones!
-        let (state, node_children) = {
+        let mut state = state.clone();
+        let node_children = {
             let node = match self.nodes.get_mut(&layer_id) {
                 Some(node) => node,
                 None => return,
-            node.update_transform(&state);
-            // The transformation we are passing is the transformation of the parent
-            // reference frame and the offset is the accumulated offset of all the nodes
-            // between us and the parent reference frame. If we are a reference frame,
-            // we need to reset both these values.
-            let state = match node.node_type {
-                NodeType::ReferenceFrame(ref info) => TransformUpdateState {
-                    parent_reference_frame_transform: node.world_viewport_transform,
-                    parent_combined_viewport_rect: node.combined_local_viewport_rect,
-                    parent_accumulated_scroll_offset: LayerVector2D::zero(),
-                    nearest_scrolling_ancestor_viewport: state
-                        .nearest_scrolling_ancestor_viewport
-                        .translate(&info.origin_in_parent_reference_frame),
-                    ..*state
-                },
-                NodeType::Clip(..) => TransformUpdateState {
-                    parent_combined_viewport_rect: node.combined_local_viewport_rect,
-                    ..*state
-                },
-                NodeType::ScrollFrame(ref scrolling) => TransformUpdateState {
-                    parent_combined_viewport_rect:
-                        node.combined_local_viewport_rect.translate(&-scrolling.offset),
-                    parent_accumulated_scroll_offset: scrolling.offset +
-                        state.parent_accumulated_scroll_offset,
-                    nearest_scrolling_ancestor_offset: scrolling.offset,
-                    nearest_scrolling_ancestor_viewport: node.local_viewport_rect,
-                    ..*state
-                },
-                NodeType::StickyFrame(_, sticky_offset) => TransformUpdateState {
-                    // We don't translate the combined rect by the sticky offset, because sticky
-                    // offsets actually adjust the node position itself, whereas scroll offsets
-                    // only apply to contents inside the node.
-                    parent_combined_viewport_rect: node.combined_local_viewport_rect,
-                    parent_accumulated_scroll_offset:
-                        sticky_offset + state.parent_accumulated_scroll_offset,
-                    ..*state
-                }
-            };
+            node.update_transform(&mut state);
+            node.update_clip_work_item(
+                &mut state,
+                screen_rect,
+                device_pixel_ratio,
+                packed_layers,
+                clip_store,
+                resource_cache,
+                gpu_cache,
+            );
-            (state, node.children.clone())
+            node.children.clone()
         for child_layer_id in node_children {
-            self.update_node_transform(child_layer_id, &state);
+            self.update_node_transform(
+                child_layer_id,
+                &mut state,
+                screen_rect,
+                device_pixel_ratio,
+                packed_layers,
+                clip_store,
+                resource_cache,
+                gpu_cache,
+            );
     pub fn tick_scrolling_bounce_animations(&mut self) {
         for (_, node) in &mut self.nodes {
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -480,18 +480,17 @@ pub struct FBOId(gl::GLuint);
 pub struct RBOId(gl::GLuint);
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 pub struct VBOId(gl::GLuint);
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 struct IBOId(gl::GLuint);
-const MAX_TIMERS_PER_FRAME: usize = 256;
-const MAX_SAMPLERS_PER_FRAME: usize = 16;
+#[cfg(feature = "query")]
 const MAX_PROFILE_FRAMES: usize = 4;
 pub trait NamedTag {
     fn get_label(&self) -> &str;
 #[derive(Debug, Clone)]
 pub struct GpuTimer<T> {
@@ -500,22 +499,24 @@ pub struct GpuTimer<T> {
 #[derive(Debug, Clone)]
 pub struct GpuSampler<T> {
     pub tag: T,
     pub count: u64,
+#[cfg(feature = "query")]
 pub struct QuerySet<T> {
     set: Vec<gl::GLuint>,
     data: Vec<T>,
     pending: gl::GLuint,
+#[cfg(feature = "query")]
 impl<T> QuerySet<T> {
     fn new(set: Vec<gl::GLuint>) -> Self {
         QuerySet {
             data: Vec::new(),
             pending: 0,
@@ -538,33 +539,38 @@ impl<T> QuerySet<T> {
         let mut data = mem::replace(&mut self.data, Vec::new());
         for (value, &query) in data.iter_mut().zip(self.set.iter()) {
             fun(value, query)
+#[cfg(feature = "query")]
 pub struct GpuFrameProfile<T> {
     gl: Rc<gl::Gl>,
     timers: QuerySet<GpuTimer<T>>,
     samplers: QuerySet<GpuSampler<T>>,
     frame_id: FrameId,
     inside_frame: bool,
+#[cfg(feature = "query")]
 impl<T> GpuFrameProfile<T> {
+    const MAX_TIMERS_PER_FRAME: usize = 256;
+    // disable samplers on OSX due to driver bugs
+    #[cfg(target_os = "macos")]
+    const MAX_SAMPLERS_PER_FRAME: usize = 0;
+    #[cfg(not(target_os = "macos"))]
+    const MAX_SAMPLERS_PER_FRAME: usize = 16;
     fn new(gl: Rc<gl::Gl>) -> Self {
-        let (time_queries, sample_queries) = match gl.get_type() {
-            gl::GlType::Gl => (
-                gl.gen_queries(MAX_TIMERS_PER_FRAME as gl::GLint),
-                gl.gen_queries(MAX_SAMPLERS_PER_FRAME as gl::GLint),
-            ),
-            gl::GlType::Gles => (Vec::new(), Vec::new()),
-        };
+        assert_eq!(gl.get_type(), gl::GlType::Gl);
+        let time_queries = gl.gen_queries(Self::MAX_TIMERS_PER_FRAME as _);
+        let sample_queries = gl.gen_queries(Self::MAX_SAMPLERS_PER_FRAME as _);
         GpuFrameProfile {
             timers: QuerySet::new(time_queries),
             samplers: QuerySet::new(sample_queries),
             frame_id: FrameId(0),
             inside_frame: false,
@@ -602,36 +608,32 @@ impl<T> GpuFrameProfile<T> {
         if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
             self.gl.begin_query(gl::TIME_ELAPSED, query);
     fn done_sampler(&mut self) {
-        /* FIXME: samplers crash on MacOS
         if self.samplers.pending != 0 {
             self.samplers.pending = 0;
-        */
-    fn add_sampler(&mut self, _tag: T)
+    fn add_sampler(&mut self, tag: T)
         T: NamedTag,
-        /* FIXME: samplers crash on MacOS
         if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
             self.gl.begin_query(gl::SAMPLES_PASSED, query);
-        */
     fn is_valid(&self) -> bool {
         !self.timers.set.is_empty() || !self.samplers.set.is_empty()
     fn build_samples(&mut self) -> (Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
@@ -643,35 +645,37 @@ impl<T> GpuFrameProfile<T> {
             self.samplers.take(|sampler, query| {
                 sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+#[cfg(feature = "query")]
 impl<T> Drop for GpuFrameProfile<T> {
     fn drop(&mut self) {
-        match self.gl.get_type() {
-            gl::GlType::Gl => {
-                self.gl.delete_queries(&self.timers.set);
-                self.gl.delete_queries(&self.samplers.set);
-            }
-            gl::GlType::Gles => {}
+        if !self.timers.set.is_empty() {
+            self.gl.delete_queries(&self.timers.set);
+        }
+        if !self.samplers.set.is_empty() {
+            self.gl.delete_queries(&self.samplers.set);
+#[cfg(feature = "query")]
 pub struct GpuProfiler<T> {
     frames: [GpuFrameProfile<T>; MAX_PROFILE_FRAMES],
     next_frame: usize,
+#[cfg(feature = "query")]
 impl<T> GpuProfiler<T> {
-    pub fn new(gl: &Rc<gl::Gl>) -> GpuProfiler<T> {
+    pub fn new(gl: &Rc<gl::Gl>) -> Self {
         GpuProfiler {
             next_frame: 0,
             frames: [
@@ -713,54 +717,80 @@ impl<T> GpuProfiler<T> {
     pub fn done_sampler(&mut self) {
+#[cfg(not(feature = "query"))]
+pub struct GpuProfiler<T>(Option<T>);
+#[cfg(not(feature = "query"))]
+impl<T> GpuProfiler<T> {
+    pub fn new(_: &Rc<gl::Gl>) -> Self {
+        GpuProfiler(None)
+    }
+    pub fn build_samples(&mut self) -> Option<(FrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>)> {
+        None
+    }
+    pub fn begin_frame(&mut self, _: FrameId) {}
+    pub fn end_frame(&mut self) {}
+    pub fn add_marker(&mut self, _: T) -> GpuMarker {
+        GpuMarker {}
+    }
+    pub fn add_sampler(&mut self, _: T) {}
+    pub fn done_sampler(&mut self) {}
 pub struct GpuMarker {
+    #[cfg(feature = "query")]
     gl: Rc<gl::Gl>,
+#[cfg(feature = "query")]
 impl GpuMarker {
-    pub fn new(gl: &Rc<gl::Gl>, message: &str) -> GpuMarker {
-        match gl.get_type() {
-            gl::GlType::Gl => {
-                gl.push_group_marker_ext(message);
-                GpuMarker { gl: Rc::clone(gl) }
-            }
-            gl::GlType::Gles => GpuMarker { gl: Rc::clone(gl) },
-        }
+    pub fn new(gl: &Rc<gl::Gl>, message: &str) -> Self {
+        debug_assert_eq!(gl.get_type(), gl::GlType::Gl);
+        gl.push_group_marker_ext(message);
+        GpuMarker { gl: Rc::clone(gl) }
     pub fn fire(gl: &gl::Gl, message: &str) {
-        match gl.get_type() {
-            gl::GlType::Gl => {
-                gl.insert_event_marker_ext(message);
-            }
-            gl::GlType::Gles => {}
-        }
+        debug_assert_eq!(gl.get_type(), gl::GlType::Gl);
+        gl.insert_event_marker_ext(message);
-#[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
+#[cfg(feature = "query")]
 impl Drop for GpuMarker {
     fn drop(&mut self) {
-        match self.gl.get_type() {
-            gl::GlType::Gl => {
-                self.gl.pop_group_marker_ext();
-            }
-            gl::GlType::Gles => {}
-        }
+        self.gl.pop_group_marker_ext();
+#[cfg(not(feature = "query"))]
+impl GpuMarker {
+    #[inline]
+    pub fn new(_: &Rc<gl::Gl>, _: &str) -> Self {
+        GpuMarker{}
+    }
+    #[inline]
+    pub fn fire(_: &gl::Gl, _: &str) {}
 #[derive(Debug, Copy, Clone)]
 pub enum VertexUsageHint {
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -217,16 +217,20 @@ impl Frame {
         self.frame_builder = Some(frame_builder);
+    pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
+        self.pipeline_epoch_map.insert(pipeline_id, epoch);
+    }
     fn flatten_clip<'a>(
         &mut self,
         context: &mut FlattenContext,
         pipeline_id: PipelineId,
         parent_id: &ClipId,
         new_clip_id: &ClipId,
         clip_region: ClipRegion,
     ) {
@@ -1090,59 +1094,37 @@ impl Frame {
-    pub fn build(
+    pub fn build_renderer_frame(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         device_pixel_ratio: f32,
         pan: LayerPoint,
         output_pipelines: &FastHashSet<PipelineId>,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
     ) -> RendererFrame {
-        self.clip_scroll_tree.update_all_node_transforms(pan);
-        let frame = self.build_frame(
-            resource_cache,
-            gpu_cache,
-            pipelines,
-            device_pixel_ratio,
-            output_pipelines,
-            texture_cache_profile,
-            gpu_cache_profile,
-        );
-        frame
-    }
-    fn build_frame(
-        &mut self,
-        resource_cache: &mut ResourceCache,
-        gpu_cache: &mut GpuCache,
-        pipelines: &FastHashMap<PipelineId, ScenePipeline>,
-        device_pixel_ratio: f32,
-        output_pipelines: &FastHashSet<PipelineId>,
-        texture_cache_profile: &mut TextureCacheProfileCounters,
-        gpu_cache_profile: &mut GpuCacheProfileCounters,
-    ) -> RendererFrame {
         let mut frame_builder = self.frame_builder.take();
         let frame = frame_builder.as_mut().map(|builder| {
                 &mut self.clip_scroll_tree,
+                pan,
         self.frame_builder = frame_builder;
         let nodes_bouncing_back = self.clip_scroll_tree.collect_nodes_bouncing_back();
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,67 +1,83 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 use api::{BorderDetails, BorderDisplayItem, BorderRadius, BoxShadowClipMode, BuiltDisplayList};
-use api::{ClipAndScrollInfo, ClipId, ColorF};
+use api::{ComplexClipRegion, ClipAndScrollInfo, ClipId, ColorF};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
-use api::{ExtendMode, FIND_ALL, FilterOp, FontInstance, FontRenderMode};
+use api::{ExtendMode, FilterOp, FontInstance, FontRenderMode};
 use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
 use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerPixel, LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
-use api::{LineStyle, LocalClip, POINT_RELATIVE_TO_PIPELINE_VIEWPORT, PipelineId, RepeatMode};
+use api::{LineStyle, LocalClip, PipelineId, RepeatMode};
 use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
 use api::{WorldPixel, WorldPoint, YuvColorSpace, YuvData, device_length};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipMode, ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipInfo, ClipScrollNode, NodeType};
-use clip_scroll_tree::ClipScrollTree;
-use euclid::{SideOffsets2D, vec2, vec3};
+use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
+use euclid::{SideOffsets2D, TypedTransform3D, vec2, vec3};
 use frame::FrameId;
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, HardwareCompositeOp};
-use picture::PicturePrimitive;
+use picture::{PicturePrimitive};
 use plane_split::{BspSplitter, Polygon, Splitter};
-use prim_store::{BoxShadowPrimitiveCpu, TexelRect, YuvImagePrimitiveCpu};
+use prim_store::{BrushPrimitive, TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
-use render_task::{AlphaRenderItem, ClipWorkItem, RenderTask};
-use render_task::{RenderTaskId, RenderTaskLocation, RenderTaskTree};
+use render_task::{AlphaRenderItem, ClipChain, RenderTask, RenderTaskId, RenderTaskLocation};
+use render_task::RenderTaskTree;
 use resource_cache::ResourceCache;
 use scene::ScenePipeline;
 use std::{mem, usize, f32, i32};
 use tiling::{ClipScrollGroup, ClipScrollGroupIndex, CompositeOps, Frame};
-use tiling::{ContextIsolation, StackingContextIndex};
+use tiling::{ContextIsolation, RenderTargetKind, StackingContextIndex};
 use tiling::{PackedLayer, PackedLayerIndex, PrimitiveFlags, PrimitiveRunCmd, RenderPass};
 use tiling::{RenderTargetContext, ScrollbarPrimitive, StackingContext};
-use util::{self, pack_as_float, recycle_vec, subtract_rect};
-use util::{MatrixHelpers, RectHelpers};
+use util::{self, pack_as_float, RectHelpers, recycle_vec};
 /// Construct a polygon from stacking context boundaries.
 /// `anchor` here is an index that's going to be preserved in all the
 /// splits of the polygon.
 fn make_polygon(
     stacking_context: &StackingContext,
     node: &ClipScrollNode,
     anchor: usize,
-) -> Polygon<f32, WorldPixel> {
+) -> Polygon<f64, WorldPixel> {
     //TODO: only work with `isolated_items_bounds.size` worth of space
     // This can be achieved by moving the `origin` shift
     // from the primitive local coordinates into the layer transformation.
     // Which in turn needs it to be a render task property obeyed by all primitives
     // upon rendering, possibly not limited to `write_*_vertex` implementations.
     let size = stacking_context.isolated_items_bounds.bottom_right();
     let bounds = LayerRect::new(LayerPoint::zero(), LayerSize::new(size.x, size.y));
-    Polygon::from_transformed_rect(bounds, node.world_content_transform, anchor)
+    let mat = TypedTransform3D::row_major(
+        node.world_content_transform.m11 as f64,
+        node.world_content_transform.m12 as f64,
+        node.world_content_transform.m13 as f64,
+        node.world_content_transform.m14 as f64,
+        node.world_content_transform.m21 as f64,
+        node.world_content_transform.m22 as f64,
+        node.world_content_transform.m23 as f64,
+        node.world_content_transform.m24 as f64,
+        node.world_content_transform.m31 as f64,
+        node.world_content_transform.m32 as f64,
+        node.world_content_transform.m33 as f64,
+        node.world_content_transform.m34 as f64,
+        node.world_content_transform.m41 as f64,
+        node.world_content_transform.m42 as f64,
+        node.world_content_transform.m43 as f64,
+        node.world_content_transform.m44 as f64);
+    Polygon::from_transformed_rect(bounds.cast().unwrap(), mat, anchor)
 #[derive(Clone, Copy)]
 pub struct FrameBuilderConfig {
     pub enable_scrollbars: bool,
     pub default_font_render_mode: FontRenderMode,
     pub debug: bool,
@@ -121,95 +137,44 @@ pub struct FrameBuilder {
     /// Whether or not we've pushed a root stacking context for the current pipeline.
     has_root_stacking_context: bool,
 pub struct PrimitiveContext<'a> {
     pub packed_layer_index: PackedLayerIndex,
     pub packed_layer: &'a PackedLayer,
     pub device_pixel_ratio: f32,
-    // Clip items that apply for this primitive run.
-    // In the future, we'll build these once at the
-    // start of the frame when updating the
-    // clip-scroll tree.
-    pub current_clip_stack: Vec<ClipWorkItem>,
+    pub clip_chain: ClipChain,
     pub clip_bounds: DeviceIntRect,
     pub clip_id: ClipId,
+    pub coordinate_system_id: CoordinateSystemId,
     pub display_list: &'a BuiltDisplayList,
 impl<'a> PrimitiveContext<'a> {
     fn new(
         packed_layer_index: PackedLayerIndex,
         packed_layer: &'a PackedLayer,
         clip_id: ClipId,
-        screen_rect: &DeviceIntRect,
-        clip_scroll_tree: &ClipScrollTree,
-        clip_store: &ClipStore,
+        clip_chain: ClipChain,
+        clip_bounds: DeviceIntRect,
+        coordinate_system_id: CoordinateSystemId,
         device_pixel_ratio: f32,
         display_list: &'a BuiltDisplayList,
-    ) -> Option<Self> {
-        let mut current_clip_stack = Vec::new();
-        let mut clip_bounds = *screen_rect;
-        let mut current_id = Some(clip_id);
-        // Indicates if the next non-reference-frame that we encounter needs to have its
-        // local combined clip rectangle backed into the clip mask.
-        let mut next_node_needs_region_mask = false;
-        while let Some(id) = current_id {
-            let node = &clip_scroll_tree.nodes.get(&id).unwrap();
-            current_id = node.parent;
-            let clip = match node.node_type {
-                NodeType::ReferenceFrame(ref info) => {
-                    // if the transform is non-aligned, bake the next LCCR into the clip mask
-                    next_node_needs_region_mask |= !info.transform.preserves_2d_axis_alignment();
-                    continue;
-                }
-                NodeType::Clip(ref clip) => clip,
-                NodeType::StickyFrame(..) | NodeType::ScrollFrame(..) => {
-                    continue;
-                }
-            };
-            let clip_sources = clip_store.get(&clip.clip_sources);
-            if !clip_sources.is_masking() {
-                continue;
-            }
-            // apply the outer device bounds of the clip stack
-            if let Some(ref outer) = clip_sources.bounds.outer {
-                clip_bounds = match clip_bounds.intersection(&outer.device_rect) {
-                    Some(rect) => rect,
-                    None => return None,
-                }
-            }
-            //TODO-LCCR: bake a single LCCR instead of all aligned rects?
-            current_clip_stack.push(ClipWorkItem {
-                layer_index: clip.packed_layer_index,
-                clip_sources: clip.clip_sources.weak(),
-                apply_rectangles: next_node_needs_region_mask,
-            });
-            next_node_needs_region_mask = false;
-        }
-        current_clip_stack.reverse();
-        Some(PrimitiveContext {
+    ) -> Self {
+        PrimitiveContext {
-            current_clip_stack,
+            clip_chain,
+            coordinate_system_id,
-        })
+        }
 impl FrameBuilder {
     pub fn new(
         previous: Option<FrameBuilder>,
         screen_size: DeviceUintSize,
         background_color: Option<ColorF>,
@@ -366,16 +331,17 @@ impl FrameBuilder {
         let group_id = self.clip_scroll_group_store.len();
         self.clip_scroll_group_store.push(ClipScrollGroup {
             scroll_node_id: info.scroll_node_id,
             clip_node_id: info.clip_node_id(),
             screen_bounding_rect: None,
+            coordinate_system_id: CoordinateSystemId(0),
     pub fn notify_waiting_for_root_stacking_context(&mut self) {
         self.has_root_stacking_context = false;
@@ -586,17 +552,17 @@ impl FrameBuilder {
     pub fn push_shadow(
         &mut self,
         shadow: Shadow,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
     ) {
-        let prim = PicturePrimitive::new_shadow(shadow);
+        let prim = PicturePrimitive::new_shadow(shadow, RenderTargetKind::Color);
         // Create an empty shadow primitive. Insert it into
         // the draw lists immediately so that it will be drawn
         // before any visual text elements that are added as
         // part of this shadow context.
         let prim_index = self.create_primitive(
@@ -1191,17 +1157,17 @@ impl FrameBuilder {
-        let mut prim = TextRunPrimitiveCpu {
+        let prim = TextRunPrimitiveCpu {
             font: prim_font,
             glyph_gpu_blocks: Vec::new(),
             glyph_keys: Vec::new(),
             offset: run_offset,
@@ -1216,22 +1182,16 @@ impl FrameBuilder {
         let mut fast_shadow_prims = Vec::new();
         for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
             let picture_prim = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
             let shadow = picture_prim.as_shadow();
             if shadow.blur_radius == 0.0 {
                 let mut text_prim = prim.clone();
                 text_prim.font.color = shadow.color.into();
-                // If we have translucent text, we need to ensure it won't go
-                // through the subpixel blend mode, which doesn't work with
-                // traditional alpha blending.
-                if shadow.color.a != 1.0 {
-                    text_prim.font.render_mode = text_prim.font.render_mode.limit_by(FontRenderMode::Alpha);
-                }
                 text_prim.offset += shadow.offset;
                 fast_shadow_prims.push((idx, text_prim));
         for (idx, text_prim) in fast_shadow_prims {
             let rect = info.rect;
             let mut info = info.clone();
@@ -1240,22 +1200,16 @@ impl FrameBuilder {
             self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
-        // We defer this until after fast-shadows so that shadows of transparent text
-        // get subpixel-aa
-        if color.a != 1.0 {
-            prim.font.render_mode = FontRenderMode::Alpha;
-        }
         // Create (and add to primitive store) the primitive that will be
         // used for both the visual element and also the shadow(s).
         let prim_index = self.create_primitive(
@@ -1291,224 +1245,177 @@ impl FrameBuilder {
                 shadow_metadata.local_rect = shadow_metadata.local_rect.union(&shadow_rect);
                 picture_prim.add_primitive(prim_index, clip_and_scroll);
-    pub fn fill_box_shadow_rect(
-        &mut self,
-        clip_and_scroll: ClipAndScrollInfo,
-        info: &LayerPrimitiveInfo,
-        bs_rect: LayerRect,
-        color: &ColorF,
-        border_radius: f32,
-        clip_mode: BoxShadowClipMode,
-    ) {
-        // We can draw a rectangle instead with the proper border radius clipping.
-        let (bs_clip_mode, rect_to_draw) = match clip_mode {
-            BoxShadowClipMode::Outset | BoxShadowClipMode::None => (ClipMode::Clip, bs_rect),
-            BoxShadowClipMode::Inset => (ClipMode::ClipOut, info.rect),
-        };
-        let box_clip_mode = !bs_clip_mode;
-        // Clip the inside and then the outside of the box.
-        let border_radius = BorderRadius::uniform(border_radius);
-        let extra_clips = vec![
-            ClipSource::RoundedRectangle(bs_rect, border_radius, bs_clip_mode),
-            ClipSource::RoundedRectangle(info.rect, border_radius, box_clip_mode),
-        ];
-        let prim = RectanglePrimitive { color: *color };
-        let mut info = info.clone();
-        info.rect = rect_to_draw;
-        self.add_primitive(
-            clip_and_scroll,
-            &info,
-            extra_clips,
-            PrimitiveContainer::Rectangle(prim),
-        );
-    }
     pub fn add_box_shadow(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
-        info: &LayerPrimitiveInfo,
+        prim_info: &LayerPrimitiveInfo,
         box_offset: &LayerVector2D,
         color: &ColorF,
         blur_radius: f32,
         spread_radius: f32,
         border_radius: f32,
         clip_mode: BoxShadowClipMode,
     ) {
         if color.a == 0.0 {
-        // The local space box shadow rect. It is the element rect
-        // translated by the box shadow offset and inflated by the
-        // box shadow spread.
-        let inflate_amount = match clip_mode {
-            BoxShadowClipMode::Outset | BoxShadowClipMode::None => spread_radius,
-            BoxShadowClipMode::Inset => -spread_radius,
-        };
-        let bs_rect = info.rect
-            .translate(box_offset)
-            .inflate(inflate_amount, inflate_amount);
-        // If we have negative inflate amounts.
-        // Have to explicitly check this since euclid::TypedRect relies on negative rects
-        let bs_rect_empty = bs_rect.size.width <= 0.0 || bs_rect.size.height <= 0.0;
-        // Just draw a rectangle
-        if (blur_radius == 0.0 && spread_radius == 0.0 && clip_mode == BoxShadowClipMode::None) ||
-            bs_rect_empty
-        {
-            self.add_solid_rectangle(clip_and_scroll, info, color, PrimitiveFlags::None);
-            return;
-        }
-        if blur_radius == 0.0 && border_radius != 0.0 {
-            self.fill_box_shadow_rect(
-                clip_and_scroll,
-                info,
-                bs_rect,
-                color,
-                border_radius,
-                clip_mode,
-            );
-            return;
-        }
-        // Get the outer rectangle, based on the blur radius.
-        let outside_edge_size = 2.0 * blur_radius;
-        let inside_edge_size = outside_edge_size.max(border_radius);
-        let edge_size = outside_edge_size + inside_edge_size;
-        let outer_rect = bs_rect.inflate(outside_edge_size, outside_edge_size);
-        // Box shadows are often used for things like text underline and other
-        // simple primitives, so we want to draw these simple cases with the
-        // solid rectangle shader wherever possible, to avoid invoking the
-        // expensive box-shadow shader.
-        enum BoxShadowKind {
-            Simple(Vec<LayerRect>), // Can be drawn via simple rectangles only
-            Shadow(Vec<LayerRect>), // Requires the full box-shadow code path
-        }
-        let shadow_kind = match clip_mode {
-            BoxShadowClipMode::Outset | BoxShadowClipMode::None => {
-                // If a border radius is set, we need to draw inside
-                // the original box in order to draw where the border
-                // corners are. A clip-out mask applied below will
-                // ensure that we don't draw on the box itself.
-                let inner_box_bounds = info.rect.inflate(-border_radius, -border_radius);
-                // For outset shadows, subtracting the element rectangle
-                // from the outer rectangle gives the rectangles we need
-                // to draw. In the simple case (no blur radius), we can
-                // just draw these as solid colors.
-                let mut rects = Vec::new();
-                subtract_rect(&outer_rect, &inner_box_bounds, &mut rects);
-                if edge_size == 0.0 {
-                    BoxShadowKind::Simple(rects)
-                } else {
-                    BoxShadowKind::Shadow(rects)
-                }
+        let spread_amount = match clip_mode {
+            BoxShadowClipMode::Outset => {
+                spread_radius
             BoxShadowClipMode::Inset => {
-                // For inset shadows, in the simple case (no blur) we
-                // can draw the shadow area by subtracting the box
-                // shadow rect from the element rect (since inset box
-                // shadows never extend past the element rect). However,
-                // in the case of an inset box shadow with blur, we
-                // currently just draw the box shadow over the entire
-                // rect. The opaque parts of the shadow (past the outside
-                // edge of the box-shadow) are handled by the shadow
-                // shader.
-                // TODO(gw): We should be able to optimize the complex
-                //           inset shadow case to touch fewer pixels. We
-                //           can probably calculate the inner rect that
-                //           can't be affected, and subtract that from
-                //           the element rect?
-                let mut rects = Vec::new();
-                if edge_size == 0.0 {
-                    subtract_rect(&info.rect, &bs_rect, &mut rects);
-                    BoxShadowKind::Simple(rects)
-                } else {
-                    rects.push(info.rect);
-                    BoxShadowKind::Shadow(rects)
-                }
+                -spread_radius
-        match shadow_kind {
-            BoxShadowKind::Simple(rects) => for rect in &rects {
-                let mut info = info.clone();
-                info.rect = *rect;
-                self.add_solid_rectangle(clip_and_scroll, &info, color, PrimitiveFlags::None)
-            },
-            BoxShadowKind::Shadow(rects) => {
-                assert!(blur_radius > 0.0);
-                if clip_mode == BoxShadowClipMode::Inset {
-                    self.fill_box_shadow_rect(
-                        clip_and_scroll,
-                        info,
-                        bs_rect,
-                        color,
-                        border_radius,
-                        clip_mode,
-                    );
+        // Adjust the shadow box radius as per:
+        // https://drafts.csswg.org/css-backgrounds-3/#shadow-shape
+        let sharpness_scale = if border_radius < spread_radius {
+            let r = border_radius / spread_amount;
+            1.0 + (r - 1.0) * (r - 1.0) * (r - 1.0)
+        } else {
+            1.0
+        };
+        let shadow_radius = (border_radius + spread_amount * sharpness_scale).max(0.0);
+        let shadow_rect = prim_info.rect
+                                   .translate(box_offset)
+                                   .inflate(spread_amount, spread_amount);
+        if blur_radius == 0.0 {
+            let mut clips = Vec::new();
+            let fast_info = match clip_mode {
+                BoxShadowClipMode::Outset => {
+                    // TODO(gw): Add a fast path for ClipOut + zero border radius!
+                    clips.push(ClipSource::RoundedRectangle(
+                        prim_info.rect,
+                        BorderRadius::uniform(border_radius),
+                        ClipMode::ClipOut
+                    ));
+                    LayerPrimitiveInfo::with_clip(
+                        shadow_rect,
+                        LocalClip::RoundedRect(
+                            shadow_rect,
+                            ComplexClipRegion::new(shadow_rect, BorderRadius::uniform(shadow_radius)),
+                        ),
+                    )
+                BoxShadowClipMode::Inset => {
+                    clips.push(ClipSource::RoundedRectangle(
+                        shadow_rect,
+                        BorderRadius::uniform(shadow_radius),
+                        ClipMode::ClipOut
+                    ));
-                let inverted = match clip_mode {
-                    BoxShadowClipMode::Outset | BoxShadowClipMode::None => 0.0,
-                    BoxShadowClipMode::Inset => 1.0,
-                };
+                    LayerPrimitiveInfo::with_clip(
+                        prim_info.rect,
+                        LocalClip::RoundedRect(
+                            prim_info.rect,
+                            ComplexClipRegion::new(prim_info.rect, BorderRadius::uniform(border_radius)),
+                        ),
+                    )
+                }
+            };
-                // Outset box shadows with border radius
-                // need a clip out of the center box.
-                let extra_clip_mode = match clip_mode {
-                    BoxShadowClipMode::Outset | BoxShadowClipMode::None => ClipMode::ClipOut,
-                    BoxShadowClipMode::Inset => ClipMode::Clip,
-                };
+            self.add_primitive(
+                clip_and_scroll,
+                &fast_info,
+                clips,
+                PrimitiveContainer::Rectangle(RectanglePrimitive {
+                    color: *color,
+                }),
+            );
+        } else {
+            let shadow = Shadow {
+                blur_radius,
+                color: *color,
+                offset: LayerVector2D::zero(),
+            };
+            let blur_offset = 2.0 * blur_radius;
+            let mut extra_clips = vec![];
+            let mut pic_prim = PicturePrimitive::new_shadow(shadow, RenderTargetKind::Alpha);
-                let mut extra_clips = Vec::new();
-                if border_radius >= 0.0 {
+            let pic_info = match clip_mode {
+                BoxShadowClipMode::Outset => {
+                    let brush_prim = BrushPrimitive {
+                        clip_mode: ClipMode::Clip,
+                        radius: shadow_radius,
+                    };
+                    let brush_rect = LayerRect::new(LayerPoint::new(blur_offset, blur_offset),
+                                                    shadow_rect.size);
+                    let brush_info = LayerPrimitiveInfo::new(brush_rect);
+                    let brush_prim_index = self.create_primitive(
+                        clip_and_scroll,
+                        &brush_info,
+                        Vec::new(),
+                        PrimitiveContainer::Brush(brush_prim),
+                    );
+                    pic_prim.add_primitive(brush_prim_index, clip_and_scroll);
-                        info.rect,
+                        prim_info.rect,
-                        extra_clip_mode,
+                        ClipMode::ClipOut,
-                }
-                let prim_cpu = BoxShadowPrimitiveCpu {
-                    src_rect: info.rect,
-                    bs_rect,
-                    color: *color,
-                    blur_radius,
-                    border_radius,
-                    edge_size,
-                    inverted,
-                    rects,
-                    render_task_id: None,
-                };
+                    let pic_rect = shadow_rect.inflate(blur_offset, blur_offset);
+                    LayerPrimitiveInfo::new(pic_rect)
+                }
+                BoxShadowClipMode::Inset => {
+                    let brush_prim = BrushPrimitive {
+                        clip_mode: ClipMode::ClipOut,
+                        radius: shadow_radius,
+                    };
+                    let mut brush_rect = shadow_rect;
+                    brush_rect.origin.x = brush_rect.origin.x - prim_info.rect.origin.x + blur_offset;
+                    brush_rect.origin.y = brush_rect.origin.y - prim_info.rect.origin.y + blur_offset;
+                    let brush_info = LayerPrimitiveInfo::new(brush_rect);
-                let mut info = info.clone();
-                info.rect = outer_rect;
-                self.add_primitive(
-                    clip_and_scroll,
-                    &info,
-                    extra_clips,
-                    PrimitiveContainer::BoxShadow(prim_cpu),
-                );
-            }
+                    let brush_prim_index = self.create_primitive(
+                        clip_and_scroll,
+                        &brush_info,
+                        Vec::new(),
+                        PrimitiveContainer::Brush(brush_prim),
+                    );
+                    pic_prim.add_primitive(brush_prim_index, clip_and_scroll);
+                    extra_clips.push(ClipSource::RoundedRectangle(
+                        prim_info.rect,
+                        BorderRadius::uniform(border_radius),
+                        ClipMode::Clip,
+                    ));
+                    let pic_rect = prim_info.rect.inflate(blur_offset, blur_offset);
+                    LayerPrimitiveInfo::with_clip_rect(pic_rect, prim_info.rect)
+                }
+            };
+            self.add_primitive(
+                clip_and_scroll,
+                &pic_info,
+                extra_clips,
+                PrimitiveContainer::Picture(pic_prim),
+            );
     pub fn add_image(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         stretch_size: &LayerSize,
@@ -1604,17 +1511,17 @@ impl FrameBuilder {
     pub fn hit_test(
         clip_scroll_tree: &ClipScrollTree,
         pipeline_id: Option<PipelineId>,
         point: WorldPoint,
         flags: HitTestFlags
     ) -> HitTestResult {
-        let point = if flags.contains(POINT_RELATIVE_TO_PIPELINE_VIEWPORT) {
+        let point = if flags.contains(HitTestFlags::POINT_RELATIVE_TO_PIPELINE_VIEWPORT) {
             let point = LayerPoint::new(point.x, point.y);
             clip_scroll_tree.make_node_relative_point_absolute(pipeline_id, &point)
         } else {
         let mut node_cache = FastHashMap::default();
         let mut result = HitTestResult::default();
@@ -1654,18 +1561,19 @@ impl FrameBuilder {
                     Some(&Some(point)) => point,
                     _ => unreachable!("Hittest target's root reference frame not hit."),
                 result.items.push(HitTestItem {
                     pipeline: clip_and_scroll.clip_node_id().pipeline_id(),
                     tag: item.tag,
+                    point_relative_to_item: point_in_layer - item.rect.origin.to_vector(),
-                if !flags.contains(FIND_ALL) {
+                if !flags.contains(HitTestFlags::FIND_ALL) {
                     return result;
         return result;
@@ -1676,33 +1584,50 @@ impl FrameBuilder {
         base_prim_index: PrimitiveIndex,
         prim_count: usize,
         clip_and_scroll: ClipAndScrollInfo,
         render_tasks: &mut RenderTaskTree,
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         clip_scroll_tree: &ClipScrollTree,
-        screen_rect: &DeviceIntRect,
         device_pixel_ratio: f32,
         profile_counters: &mut FrameProfileCounters,
     ) -> bool {
         let stacking_context_index = *self.stacking_context_stack.last().unwrap();
         let packed_layer_index =
             match self.get_packed_layer_index_if_visible(&clip_and_scroll) {
             Some(index) => index,
             None => {
                 debug!("{:?} of invisible {:?}", base_prim_index, stacking_context_index);
                 return false;
+        let (clip_chain, clip_bounds, coordinate_system_id) =
+            match clip_scroll_tree.nodes.get(&clip_and_scroll.clip_node_id()) {
+            Some(node) if node.combined_clip_outer_bounds != DeviceIntRect::zero() => {
+                let group_id = self.clip_scroll_group_indices[&clip_and_scroll];
+                (
+                    node.clip_chain_node.clone(),
+                    node.combined_clip_outer_bounds,
+                    self.clip_scroll_group_store[group_id].coordinate_system_id,
+                )
+            }
+            _ => {
+                let group_id = self.clip_scroll_group_indices[&clip_and_scroll];
+                self.clip_scroll_group_store[group_id].screen_bounding_rect = None;
+                debug!("{:?} of clipped out {:?}", base_prim_index, stacking_context_index);
+                return false;
+            }
+        };
+        let stacking_context = &mut self.stacking_context_store[stacking_context_index.0];
         let pipeline_id = {
-            let stacking_context =
-                &mut self.stacking_context_store[stacking_context_index.0];
             if !stacking_context.can_contribute_to_scene() {
                 return false;
             // At least one primitive in this stacking context is visible, so the stacking
             // context is visible.
             stacking_context.is_visible = true;
@@ -1710,48 +1635,37 @@ impl FrameBuilder {
             "\t{:?} of {:?} at {:?}",
-        let stacking_context =
-            &mut self.stacking_context_store[stacking_context_index.0];
         let packed_layer = &self.packed_layers[packed_layer_index.0];
         let display_list = &pipelines
             .expect("No display list?")
         if !stacking_context.is_backface_visible && packed_layer.transform.is_backface_visible() {
             return false;
         let prim_context = PrimitiveContext::new(
-            screen_rect,
-            clip_scroll_tree,
-            &self.clip_store,
+            clip_chain,
+            clip_bounds,
+            coordinate_system_id,
-        let prim_context = match prim_context {
-            Some(prim_context) => prim_context,
-            None => {
-                let group_id = self.clip_scroll_group_indices[&clip_and_scroll];
-                self.clip_scroll_group_store[group_id].screen_bounding_rect = None;
-                return false
-            },
-        };
             "\tclip_bounds {:?}, layer_local_clip {:?}",
         for i in 0 .. prim_count {
             let prim_index = PrimitiveIndex(base_prim_index.0 + i);
@@ -1831,62 +1745,16 @@ impl FrameBuilder {
                 parent.isolated_items_bounds = parent.isolated_items_bounds.union(&child_bounds);
             // Per-primitive stacking context visibility checks do not take into account
             // visibility of child stacking contexts, so do that now.
             parent.is_visible = parent.is_visible || is_visible;
-    fn recalculate_clip_scroll_nodes(
-        &mut self,
-        clip_scroll_tree: &mut ClipScrollTree,
-        gpu_cache: &mut GpuCache,
-        resource_cache: &mut ResourceCache,
-        screen_rect: &DeviceIntRect,
-        device_pixel_ratio: f32
-    ) {
-        for (_, ref mut node) in clip_scroll_tree.nodes.iter_mut() {
-            let node_clip_info = match node.node_type {
-                NodeType::Clip(ref mut clip_info) => clip_info,
-                _ => continue,
-            };
-            let packed_layer_index = node_clip_info.packed_layer_index;
-            let packed_layer = &mut self.packed_layers[packed_layer_index.0];
-            // The coordinates of the mask are relative to the origin of the node itself,
-            // so we need to account for that origin in the transformation we assign to
-            // the packed layer.
-            let transform = node.world_viewport_transform
-                .pre_translate(node.local_viewport_rect.origin.to_vector().to_3d());
-            if packed_layer.set_transform(transform) {
-                // Meanwhile, the combined viewport rect is relative to the reference frame, so
-                // we move it into the local coordinate system of the node.
-                let local_viewport_rect = node.combined_local_viewport_rect
-                    .translate(&-node.local_viewport_rect.origin.to_vector());
-                packed_layer.set_rect(
-                    &local_viewport_rect,
-                    screen_rect,
-                    device_pixel_ratio,
-                );
-            }
-            let clip_sources = self.clip_store.get_mut(&node_clip_info.clip_sources);
-            clip_sources.update(
-                &transform,
-                gpu_cache,
-                resource_cache,
-                device_pixel_ratio,
-            );
-        }
-    }
     fn recalculate_clip_scroll_groups(
         &mut self,
         clip_scroll_tree: &ClipScrollTree,
         screen_rect: &DeviceIntRect,
         device_pixel_ratio: f32
     ) {
         for ref mut group in &mut self.clip_scroll_group_store {
@@ -1915,16 +1783,18 @@ impl FrameBuilder {
             group.screen_bounding_rect = packed_layer.set_rect(
+            group.coordinate_system_id = scroll_node.coordinate_system_id;
                 "\t\tlocal viewport {:?} screen bound {:?}",
@@ -1938,23 +1808,16 @@ impl FrameBuilder {
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         profile_counters: &mut FrameProfileCounters,
         device_pixel_ratio: f32,
     ) {
-        self.recalculate_clip_scroll_nodes(
-            clip_scroll_tree,
-            gpu_cache,
-            resource_cache,
-            screen_rect,
-            device_pixel_ratio
-        );
         debug!("processing commands...");
         let commands = mem::replace(&mut self.cmds, Vec::new());
@@ -1968,17 +1831,16 @@ impl FrameBuilder {
-                        screen_rect,
                 PrimitiveRunCmd::PopStackingContext => {
                     self.handle_pop_stacking_context(screen_rect, clip_scroll_tree);
@@ -2171,20 +2033,23 @@ impl FrameBuilder {
                     for filter in &stacking_context.composite_ops.filters {
                         let mut prev_task = alpha_task_stack.pop().unwrap();
                         let screen_origin = current_task.as_alpha_batch().screen_origin;
                         let current_task_id = render_tasks.add(current_task);
                         match *filter {
                             FilterOp::Blur(blur_radius) => {
                                 let blur_radius = device_length(blur_radius, device_pixel_ratio);
+                                render_tasks.get_mut(current_task_id)
+                                            .inflate(blur_radius.0);
                                 let blur_render_task = RenderTask::new_blur(
+                                    RenderTargetKind::Color,
                                 let blur_render_task_id = render_tasks.add(blur_render_task);
                                 let item = AlphaRenderItem::HardwareComposite(
                                         screen_origin.x - blur_radius.0,
@@ -2258,19 +2123,19 @@ impl FrameBuilder {
                         // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
                         for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
                             let sc_index = StackingContextIndex(poly.anchor);
                             let task_id = preserve_3d_map_stack.last().unwrap()[&sc_index];
                             debug!("\t\tproduce {:?} -> {:?} for {:?}", sc_index, poly, task_id);
                             let pp = &poly.points;
                             let gpu_blocks = [
-                                [pp[0].x, pp[0].y, pp[0].z, pp[1].x].into(),
-                                [pp[1].y, pp[1].z, pp[2].x, pp[2].y].into(),
-                                [pp[2].z, pp[3].x, pp[3].y, pp[3].z].into(),
+                                [pp[0].x as f32, pp[0].y as f32, pp[0].z as f32, pp[1].x as f32].into(),
+                                [pp[1].y as f32, pp[1].z as f32, pp[2].x as f32, pp[2].y as f32].into(),
+                                [pp[2].z as f32, pp[3].x as f32, pp[3].y as f32, pp[3].z as f32].into(),
                             let handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
                             let item =
                                 AlphaRenderItem::SplitComposite(sc_index, task_id, handle, next_z);
                         next_z += 1;
@@ -2338,16 +2203,17 @@ impl FrameBuilder {
     pub fn build(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         frame_id: FrameId,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         device_pixel_ratio: f32,
+        pan: LayerPoint,
         output_pipelines: &FastHashSet<PipelineId>,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
     ) -> Frame {
         let mut profile_counters = FrameProfileCounters::new();
@@ -2360,16 +2226,26 @@ impl FrameBuilder {
         let screen_rect = DeviceIntRect::new(
                 self.screen_size.width as i32,
                 self.screen_size.height as i32,
+        clip_scroll_tree.update_all_node_transforms(
+            &screen_rect,
+            device_pixel_ratio,
+            &mut self.packed_layers,
+            &mut self.clip_store,
+            resource_cache,
+            gpu_cache,
+            pan
+        );
         self.update_scroll_bars(clip_scroll_tree, gpu_cache);
         let mut render_tasks = RenderTaskTree::new();
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -183,17 +183,17 @@ impl GlyphRasterizer {
         let mut glyphs = Vec::new();
         let glyph_key_cache = glyph_cache.get_glyph_key_cache_for_font_mut(font.clone());
         // select glyphs that have not been requested yet.
         for key in glyph_keys {
             match glyph_key_cache.entry(key.clone()) {
                 Entry::Occupied(mut entry) => {
-                    if let Some(ref mut glyph_info) = *entry.get_mut() {
+                    if let Ok(Some(ref mut glyph_info)) = *entry.get_mut() {
                         if texture_cache.request(&mut glyph_info.texture_cache_handle, gpu_cache) {
                             // This case gets hit when we have already rasterized
                             // the glyph and stored it in CPU memory, the the glyph
                             // has been evicted from the texture cache. In which case
                             // we need to re-upload it to the GPU.
                                 &mut glyph_info.texture_cache_handle,
                                 ImageDescriptor {
@@ -347,17 +347,17 @@ impl GlyphRasterizer {
                         scale: glyph.scale,
                 } else {
             let glyph_key_cache = glyph_cache.get_glyph_key_cache_for_font_mut(job.request.font);
-            glyph_key_cache.insert(job.request.key, glyph_info);
+            glyph_key_cache.insert(job.request.key, Ok(glyph_info));
         // Now that we are done with the critical path (rendering the glyphs),
         // we can schedule removing the fonts if needed.
         if !self.fonts_to_remove.is_empty() {
             let font_contexts = Arc::clone(&self.font_contexts);
             let fonts_to_remove = mem::replace(&mut self.fonts_to_remove, Vec::new());
             self.workers.spawn(move || {
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -12,24 +12,16 @@ use tiling::PackedLayerIndex;
 pub struct PackedLayerAddress(i32);
 impl From<PackedLayerIndex> for PackedLayerAddress {
     fn from(index: PackedLayerIndex) -> PackedLayerAddress {
         PackedLayerAddress(index.0 as i32)
-// Instance structure for box shadows being drawn into target cache.
-pub struct BoxShadowCacheInstance {
-    pub prim_address: GpuCacheAddress,
-    pub task_index: RenderTaskAddress,
 pub enum BlurDirection {
     Horizontal = 0,
@@ -140,8 +132,43 @@ impl From<CompositePrimitiveInstance> fo
+pub struct BrushInstance {
+    picture_address: RenderTaskAddress,
+    prim_address: GpuCacheAddress,
+impl BrushInstance {
+    pub fn new(
+        picture_address: RenderTaskAddress,
+        prim_address: GpuCacheAddress
+    ) -> BrushInstance {
+        BrushInstance {
+            picture_address,
+            prim_address,
+        }
+    }
+impl From<BrushInstance> for PrimitiveInstance {
+    fn from(instance: BrushInstance) -> PrimitiveInstance {
+        PrimitiveInstance {
+            data: [
+                instance.picture_address.0 as i32,
+                instance.prim_address.as_int(),
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]
+        }
+    }
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -145,14 +145,13 @@ extern crate time;
 extern crate ws;
 pub extern crate webrender_api;
 #[cfg(any(target_os = "macos", target_os = "windows"))]
 extern crate gamma_lut;
 pub use device::build_shader_strings;
 pub use renderer::{CpuProfile, DebugFlags, GpuProfile, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use webrender_api as api;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -1,15 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 use api::{ClipAndScrollInfo, Shadow};
 use prim_store::PrimitiveIndex;
 use render_task::RenderTaskId;
+use tiling::RenderTargetKind;
  A picture represents a dynamically rendered image. It consists of:
  * A number of primitives that are drawn onto the picture.
  * A composite operation describing how to composite this
    picture into its parent.
  * A configuration describing how to draw the primitives on
@@ -31,28 +32,33 @@ pub enum CompositeOp {
     //           as blur, blend etc.
 pub struct PicturePrimitive {
     pub prim_runs: Vec<PrimitiveRun>,
     pub composite_op: CompositeOp,
     pub render_task_id: Option<RenderTaskId>,
+    pub kind: RenderTargetKind,
     // TODO(gw): Add a mode that specifies if this
     //           picture should be rasterized in
     //           screen-space or local-space.
 impl PicturePrimitive {
-    pub fn new_shadow(shadow: Shadow) -> PicturePrimitive {
+    pub fn new_shadow(
+        shadow: Shadow,
+        kind: RenderTargetKind,
+    ) -> PicturePrimitive {
         PicturePrimitive {
             prim_runs: Vec::new(),
             composite_op: CompositeOp::Shadow(shadow),
             render_task_id: None,
+            kind,
     pub fn as_shadow(&self) -> &Shadow {
         match self.composite_op {
             CompositeOp::Shadow(ref shadow) => shadow,
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -273,22 +273,16 @@ impl FontContext {
         slot: FT_GlyphSlot,
         font: &FontInstance,
         glyph: &GlyphKey,
         scale_bitmaps: bool,
     ) -> Option<GlyphDimensions> {
         let metrics = unsafe { &(*slot).metrics };
-        // If there's no advance, no need to consider this glyph
-        // for layout.
-        if metrics.horiAdvance == 0 {
-            return None
-        }
         let advance = metrics.horiAdvance as f32 / 64.0;
         match unsafe { (*slot).format } {
             FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP => {
                 let left = unsafe { (*slot).bitmap_left };
                 let top = unsafe { (*slot).bitmap_top };
                 let width = unsafe { (*slot).bitmap.width };
                 let height = unsafe { (*slot).bitmap.rows };
                 if scale_bitmaps {
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -2,28 +2,27 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 use api::{BorderRadius, BuiltDisplayList, ColorF, ComplexClipRegion, DeviceIntRect, DeviceIntSize};
 use api::{DevicePoint, ExtendMode, FontInstance, FontRenderMode, GlyphInstance, GlyphKey};
 use api::{GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerRect};
 use api::{LayerSize, LayerVector2D, LineOrientation, LineStyle};
 use api::{TileOffset, YuvColorSpace, YuvFormat, device_length};
-use app_units::Au;
 use border::BorderCornerInstance;
 use clip::{ClipMode, ClipSourcesHandle, ClipStore, Geometry};
-use euclid::Size2D;
 use frame_builder::PrimitiveContext;
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
 use picture::PicturePrimitive;
-use render_task::{ClipWorkItem, RenderTask, RenderTaskId, RenderTaskTree};
+use render_task::{ClipWorkItem, ClipChainNode, RenderTask, RenderTaskId, RenderTaskTree};
 use resource_cache::{ImageProperties, ResourceCache};
 use std::{mem, usize};
+use std::rc::Rc;
 use util::{MatrixHelpers, pack_as_float, recycle_vec, TransformedRect};
 #[derive(Debug, Copy, Clone)]
 pub struct PrimitiveOpacity {
     pub is_opaque: bool,
 impl PrimitiveOpacity {
@@ -105,19 +104,19 @@ pub enum PrimitiveKind {
-    BoxShadow,
+    Brush,
 impl GpuCacheHandle {
     pub fn as_int(&self, gpu_cache: &GpuCache) -> i32 {
@@ -160,16 +159,33 @@ pub struct RectanglePrimitive {
 impl ToGpuBlocks for RectanglePrimitive {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
+pub struct BrushPrimitive {
+    pub clip_mode: ClipMode,
+    pub radius: f32,
+impl ToGpuBlocks for BrushPrimitive {
+    fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
+        request.push([
+            self.clip_mode as u32 as f32,
+            self.radius,
+            0.0,
+            0.0
+        ]);
+    }
 #[derive(Debug, Clone)]
 pub struct LinePrimitive {
     pub color: ColorF,
     pub style: LineStyle,
     pub orientation: LineOrientation,
@@ -226,56 +242,16 @@ pub struct BorderPrimitiveCpu {
 impl ToGpuBlocks for BorderPrimitiveCpu {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub struct BoxShadowPrimitiveCacheKey {
-    pub shadow_rect_size: Size2D<Au>,
-    pub border_radius: Au,
-    pub blur_radius: Au,
-    pub inverted: bool,
-pub struct BoxShadowPrimitiveCpu {
-    // todo(gw): generate on demand
-    // gpu data
-    pub src_rect: LayerRect,
-    pub bs_rect: LayerRect,
-    pub color: ColorF,
-    pub border_radius: f32,
-    pub edge_size: f32,
-    pub blur_radius: f32,
-    pub inverted: f32,
-    pub rects: Vec<LayerRect>,
-    pub render_task_id: Option<RenderTaskId>,
-impl ToGpuBlocks for BoxShadowPrimitiveCpu {
-    fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        request.push(self.src_rect);
-        request.push(self.bs_rect);
-        request.push(self.color);
-        request.push([
-            self.border_radius,
-            self.edge_size,
-            self.blur_radius,
-            self.inverted,
-        ]);
-        for &rect in &self.rects {
-            request.push(rect);
-        }
-    }
 pub struct GradientPrimitiveCpu {
     pub stops_range: ItemRange<GradientStop>,
     pub stops_count: usize,
     pub extend_mode: ExtendMode,
     pub reverse_stops: bool,
     pub gpu_blocks: [GpuBlockData; 3],
@@ -799,65 +775,65 @@ pub enum PrimitiveContainer {
-    BoxShadow(BoxShadowPrimitiveCpu),
+    Brush(BrushPrimitive),
 pub struct PrimitiveStore {
     /// CPU side information only.
     pub cpu_rectangles: Vec<RectanglePrimitive>,
+    pub cpu_brushes: Vec<BrushPrimitive>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
     pub cpu_pictures: Vec<PicturePrimitive>,
     pub cpu_images: Vec<ImagePrimitiveCpu>,
     pub cpu_yuv_images: Vec<YuvImagePrimitiveCpu>,
     pub cpu_gradients: Vec<GradientPrimitiveCpu>,
     pub cpu_radial_gradients: Vec<RadialGradientPrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
-    pub cpu_box_shadows: Vec<BoxShadowPrimitiveCpu>,
     pub cpu_lines: Vec<LinePrimitive>,
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_rectangles: Vec::new(),
+            cpu_brushes: Vec::new(),
             cpu_text_runs: Vec::new(),
             cpu_pictures: Vec::new(),
             cpu_images: Vec::new(),
             cpu_yuv_images: Vec::new(),
             cpu_gradients: Vec::new(),
             cpu_radial_gradients: Vec::new(),
             cpu_borders: Vec::new(),
-            cpu_box_shadows: Vec::new(),
             cpu_lines: Vec::new(),
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
             cpu_rectangles: recycle_vec(self.cpu_rectangles),
+            cpu_brushes: recycle_vec(self.cpu_brushes),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
             cpu_pictures: recycle_vec(self.cpu_pictures),
             cpu_images: recycle_vec(self.cpu_images),
             cpu_yuv_images: recycle_vec(self.cpu_yuv_images),
             cpu_gradients: recycle_vec(self.cpu_gradients),
             cpu_radial_gradients: recycle_vec(self.cpu_radial_gradients),
             cpu_borders: recycle_vec(self.cpu_borders),
-            cpu_box_shadows: recycle_vec(self.cpu_box_shadows),
             cpu_lines: recycle_vec(self.cpu_lines),
     pub fn add_primitive(
         &mut self,
         local_rect: &LayerRect,
         local_clip_rect: &LayerRect,
@@ -891,16 +867,28 @@ impl PrimitiveStore {
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_rectangles.len()),
+            PrimitiveContainer::Brush(brush) => {
+                let metadata = PrimitiveMetadata {
+                    opacity: PrimitiveOpacity::translucent(),
+                    prim_kind: PrimitiveKind::Brush,
+                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_brushes.len()),
+                    ..base_metadata
+                };
+                self.cpu_brushes.push(brush);
+                metadata
+            }
             PrimitiveContainer::Line(line) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     prim_kind: PrimitiveKind::Line,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_lines.len()),
@@ -992,27 +980,16 @@ impl PrimitiveStore {
                     prim_kind: PrimitiveKind::RadialGradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_radial_gradients.len()),
-            PrimitiveContainer::BoxShadow(box_shadow) => {
-                let metadata = PrimitiveMetadata {
-                    opacity: PrimitiveOpacity::translucent(),
-                    prim_kind: PrimitiveKind::BoxShadow,
-                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_box_shadows.len()),
-                    ..base_metadata
-                };
-                self.cpu_box_shadows.push(box_shadow);
-                metadata
-            }
     pub fn get_metadata(&self, index: PrimitiveIndex) -> &PrimitiveMetadata {
@@ -1024,99 +1001,56 @@ impl PrimitiveStore {
     /// Add any task dependencies for this primitive to the provided task.
     pub fn add_render_tasks_for_prim(&self, prim_index: PrimitiveIndex, task: &mut RenderTask) {
         // Add any dynamic render tasks needed to render this primitive
         let metadata = &self.cpu_metadata[prim_index.0];
         let render_task_id = match metadata.prim_kind {
-            PrimitiveKind::BoxShadow => {
-                let box_shadow = &self.cpu_box_shadows[metadata.cpu_prim_index.0];
-                box_shadow.render_task_id
-            }
             PrimitiveKind::Picture => {
                 let picture = &self.cpu_pictures[metadata.cpu_prim_index.0];
             PrimitiveKind::Rectangle |
             PrimitiveKind::TextRun |
             PrimitiveKind::Image |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::YuvImage |
             PrimitiveKind::Border |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient |
-            PrimitiveKind::Line => None,
+            PrimitiveKind::Line |
+            PrimitiveKind::Brush => None,
         if let Some(render_task_id) = render_task_id {
         if let Some(clip_task_id) = metadata.clip_task_id {
-    /// Returns true if the bounding box needs to be updated.
     fn prepare_prim_for_render_inner(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         // For some primitives, we need to mark dependencies as needed for rendering
         // without spawning new tasks, since there will be another call to
         // `prepare_prim_for_render_inner` specifically for this primitive later on.
         render_tasks: Option<&mut RenderTaskTree>,
         text_run_mode: TextRunMode,
     ) {
         let metadata = &mut self.cpu_metadata[prim_index.0];
         match metadata.prim_kind {
             PrimitiveKind::Rectangle | PrimitiveKind::Border | PrimitiveKind::Line => {}
-            PrimitiveKind::BoxShadow => {
-                // TODO(gw): Account for zoom factor!
-                // Here, we calculate the size of the patch required in order
-                // to create the box shadow corner. First, scale it by the
-                // device pixel ratio since the cache shader expects vertices
-                // in device space. The shader adds a 1-pixel border around
-                // the patch, in order to prevent bilinear filter artifacts as
-                // the patch is clamped / mirrored across the box shadow rect.
-                let box_shadow = &mut self.cpu_box_shadows[metadata.cpu_prim_index.0];
-                let edge_size = box_shadow.edge_size.ceil() * prim_context.device_pixel_ratio;
-                let edge_size = edge_size as i32 + 2; // Account for bilinear filtering
-                let cache_size = DeviceIntSize::new(edge_size, edge_size);
-                let cache_key = BoxShadowPrimitiveCacheKey {
-                    blur_radius: Au::from_f32_px(box_shadow.blur_radius),
-                    border_radius: Au::from_f32_px(box_shadow.border_radius),
-                    inverted: box_shadow.inverted != 0.0,
-                    shadow_rect_size: Size2D::new(
-                        Au::from_f32_px(box_shadow.bs_rect.size.width),
-                        Au::from_f32_px(box_shadow.bs_rect.size.height),
-                    ),
-                };
-                // Create a render task for this box shadow primitive. This renders a small
-                // portion of the box shadow to a render target. That portion is then
-                // stretched over the actual primitive rect by the box shadow primitive
-                // shader, to reduce the number of pixels that the expensive box
-                // shadow shader needs to run on.
-                // TODO(gw): In the future, we can probably merge the box shadow
-                // primitive (stretch) shader with the generic cached primitive shader.
-                let render_task = RenderTask::new_box_shadow(
-                    cache_key,
-                    cache_size,
-                    prim_index
-                );
-                // ignore the new task if we are in a dependency context
-                box_shadow.render_task_id = render_tasks.map(|rt| rt.add(render_task));
-            }
             PrimitiveKind::Picture => {
                 let picture = &mut self.cpu_pictures[metadata.cpu_prim_index.0];
                 // This is a shadow element. Create a render task that will
                 // render the text run to a target, and then apply a gaussian
                 // blur to that text run in order to build the actual primitive
                 // which will be blitted to the framebuffer.
                 let cache_width =
@@ -1124,20 +1058,28 @@ impl PrimitiveStore {
                 let cache_height =
                     (metadata.local_rect.size.height * prim_context.device_pixel_ratio).ceil() as i32;
                 let cache_size = DeviceIntSize::new(cache_width, cache_height);
                 let blur_radius = picture.as_shadow().blur_radius;
                 let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
                 // ignore new tasks if we are in a dependency context
                 picture.render_task_id = render_tasks.map(|rt| {
-                    let picture_task = RenderTask::new_picture(cache_size, prim_index);
+                    let picture_task = RenderTask::new_picture(
+                        cache_size,
+                        prim_index,
+                        picture.kind,
+                    );
                     let picture_task_id = rt.add(picture_task);
-                    let render_task =
-                        RenderTask::new_blur(blur_radius, picture_task_id, rt);
+                    let render_task = RenderTask::new_blur(
+                        blur_radius,
+                        picture_task_id,
+                        rt,
+                        picture.kind
+                    );
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
@@ -1179,17 +1121,18 @@ impl PrimitiveStore {
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
-            PrimitiveKind::RadialGradient => {}
+            PrimitiveKind::RadialGradient |
+            PrimitiveKind::Brush => {}
         // Mark this GPU resource as required for this frame.
         if let Some(mut request) = gpu_cache.request(&mut metadata.gpu_location) {
             match metadata.prim_kind {
@@ -1200,20 +1143,16 @@ impl PrimitiveStore {
                 PrimitiveKind::Line => {
                     let line = &self.cpu_lines[metadata.cpu_prim_index.0];
                 PrimitiveKind::Border => {
                     let border = &self.cpu_borders[metadata.cpu_prim_index.0];
-                PrimitiveKind::BoxShadow => {
-                    let box_shadow = &self.cpu_box_shadows[metadata.cpu_prim_index.0];
-                    box_shadow.write_gpu_blocks(request);
-                }
                 PrimitiveKind::Image => {
                     let image = &self.cpu_images[metadata.cpu_prim_index.0];
                 PrimitiveKind::YuvImage => {
                     let yuv_image = &self.cpu_yuv_images[metadata.cpu_prim_index.0];
@@ -1239,16 +1178,20 @@ impl PrimitiveStore {
+                PrimitiveKind::Brush => {
+                    let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
+                    brush.write_gpu_blocks(request);
+                }
     fn update_clip_task(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
@@ -1278,54 +1221,58 @@ impl PrimitiveStore {
                     None => {
                         metadata.screen_rect = None;
                         return false;
                 _ => prim_screen_rect,
-            let extra = ClipWorkItem {
-                layer_index: prim_context.packed_layer_index,
-                clip_sources: metadata.clip_sources.weak(),
-                apply_rectangles: false,
-            };
+            let extra_clip = Some(Rc::new(ClipChainNode {
+                work_item: ClipWorkItem {
+                    layer_index: prim_context.packed_layer_index,
+                    clip_sources: metadata.clip_sources.weak(),
+                    coordinate_system_id: prim_context.coordinate_system_id,
+                },
+                prev: None,
+            }));
-                &prim_context.current_clip_stack,
-                Some(extra),
+                prim_context.clip_chain.clone(),
+                extra_clip,
+                prim_context.coordinate_system_id,
-        } else if !prim_context.current_clip_stack.is_empty() {
+        } else if prim_context.clip_chain.is_some() {
             // If the primitive doesn't have a specific clip, key the task ID off the
             // stacking context. This means that two primitives which are only clipped
             // by the stacking context stack can share clip masks during render task
             // assignment to targets.
-                &prim_context.current_clip_stack,
+                prim_context.clip_chain.clone(),
+                prim_context.coordinate_system_id,
         } else {
         metadata.clip_task_id = clip_task.map(|clip_task| render_tasks.add(clip_task));
-    /// Returns true if the bounding box needs to be updated.
     pub fn prepare_prim_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -98,17 +98,17 @@ impl Document {
         gpu_cache: &mut GpuCache,
         resource_profile: &mut ResourceProfileCounters,
     ) -> RendererFrame {
         let accumulated_scale_factor = self.accumulated_scale_factor();
         let pan = LayerPoint::new(
             self.pan.x as f32 / accumulated_scale_factor,
             self.pan.y as f32 / accumulated_scale_factor,
-        self.frame.build(
+        self.frame.build_renderer_frame(
             &mut resource_profile.texture_cache,
             &mut resource_profile.gpu_cache,
@@ -292,16 +292,27 @@ impl RenderBackend {
+            DocumentMsg::UpdatePipelineResources { resources, pipeline_id, epoch } => {
+                profile_scope!("UpdateResources");
+                self.resource_cache
+                    .update_resources(resources, &mut profile_counters.resources);
+                doc.scene.update_epoch(pipeline_id, epoch);
+                doc.frame.update_epoch(pipeline_id, epoch);
+                DocumentOp::Nop
+            }
             DocumentMsg::SetRootPipeline(pipeline_id) => {
                 if doc.scene.pipelines.get(&pipeline_id).is_some() {
                     let _timer = profile_counters.total_time.timer();
                     doc.build_scene(&mut self.resource_cache);
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -1,20 +1,22 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 use api::{ClipId, DeviceIntLength, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{FilterOp, MixBlendMode};
 use api::PipelineId;
 use clip::{ClipSource, ClipSourcesWeakHandle, ClipStore};
+use clip_scroll_tree::CoordinateSystemId;
 use gpu_cache::GpuCacheHandle;
 use internal_types::HardwareCompositeOp;
-use prim_store::{BoxShadowPrimitiveCacheKey, PrimitiveIndex};
+use prim_store::PrimitiveIndex;
 use std::{cmp, usize, f32, i32};
+use std::rc::Rc;
 use tiling::{ClipScrollGroupIndex, PackedLayerIndex, RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind, StackingContextIndex};
 const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub struct RenderTaskId(pub u32); // TODO(gw): Make private when using GPU cache!
@@ -23,16 +25,41 @@ pub struct RenderTaskId(pub u32); // TOD
 pub struct RenderTaskAddress(pub u32);
 pub struct RenderTaskTree {
     pub tasks: Vec<RenderTask>,
     pub task_data: Vec<RenderTaskData>,
+pub type ClipChain = Option<Rc<ClipChainNode>>;
+pub struct ClipChainNode {
+    pub work_item: ClipWorkItem,
+    pub prev: ClipChain,
+struct ClipChainNodeIter {
+    current: ClipChain,
+impl Iterator for ClipChainNodeIter {
+    type Item = Rc<ClipChainNode>;
+    fn next(&mut self) -> ClipChain {
+        let previous = self.current.clone();
+        self.current = match self.current {
+            Some(ref item) => item.prev.clone(),
+            None => return None,
+        };
+        previous
+    }
 impl RenderTaskTree {
     pub fn new() -> RenderTaskTree {
         RenderTaskTree {
             tasks: Vec::new(),
             task_data: Vec::new(),
@@ -107,18 +134,16 @@ impl RenderTaskTree {
         for task in &mut self.tasks {
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTaskKey {
-    /// Draw this box shadow to a cache target.
-    BoxShadow(BoxShadowPrimitiveCacheKey),
     /// Draw the alpha mask for a shared clip.
 pub enum RenderTaskLocation {
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
@@ -172,70 +197,92 @@ pub enum MaskGeometryKind {
     CornersOnly, // Draw the corners (simple axis aligned mask)
              // TODO(gw): Add more types here (e.g. 4 rectangles outside the inner rect)
 #[derive(Debug, Clone)]
 pub struct ClipWorkItem {
     pub layer_index: PackedLayerIndex,
     pub clip_sources: ClipSourcesWeakHandle,
-    pub apply_rectangles: bool,
+    pub coordinate_system_id: CoordinateSystemId,
 impl ClipWorkItem {
-    fn get_geometry_kind(&self, clip_store: &ClipStore) -> MaskGeometryKind {
+    fn get_geometry_kind(
+        &self,
+        clip_store: &ClipStore,
+        prim_coordinate_system_id: CoordinateSystemId
+    ) -> MaskGeometryKind {
         let clips = clip_store
             .expect("bug: clip handle should be valid")
         let mut rounded_rect_count = 0;
         for &(ref clip, _) in clips {
             match *clip {
-                ClipSource::Rectangle(..) => if self.apply_rectangles {
-                    return MaskGeometryKind::Default;
+                ClipSource::Rectangle(..) => {
+                    if self.has_compatible_coordinate_system(prim_coordinate_system_id) {
+                        return MaskGeometryKind::Default;
+                    }
                 ClipSource::RoundedRectangle(..) => {
                     rounded_rect_count += 1;
                 ClipSource::Image(..) | ClipSource::BorderCorner(..) => {
                     return MaskGeometryKind::Default;
         if rounded_rect_count == 1 {
         } else {
+    fn has_compatible_coordinate_system(&self, other_id: CoordinateSystemId) -> bool {
+        self.coordinate_system_id == other_id
+    }
 pub struct CacheMaskTask {
     actual_rect: DeviceIntRect,
     inner_rect: DeviceIntRect,
     pub clips: Vec<ClipWorkItem>,
     pub geometry_kind: MaskGeometryKind,
+    pub coordinate_system_id: CoordinateSystemId,
+pub struct PictureTask {
+    pub prim_index: PrimitiveIndex,
+    pub target_kind: RenderTargetKind,
+pub struct BlurTask {
+    pub blur_radius: DeviceIntLength,
+    pub target_kind: RenderTargetKind,
 pub struct RenderTaskData {
     pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 pub enum RenderTaskKind {
-    Picture(PrimitiveIndex),
-    BoxShadow(PrimitiveIndex),
+    Picture(PictureTask),
-    VerticalBlur(DeviceIntLength),
-    HorizontalBlur(DeviceIntLength),
+    VerticalBlur(BlurTask),
+    HorizontalBlur(BlurTask),
 pub struct RenderTask {
     pub cache_key: Option<RenderTaskKey>,
     pub location: RenderTaskLocation,
@@ -264,123 +311,119 @@ impl RenderTask {
     pub fn new_dynamic_alpha_batch(
         rect: &DeviceIntRect,
         frame_output_pipeline_id: Option<PipelineId>,
     ) -> RenderTask {
         let location = RenderTaskLocation::Dynamic(None, rect.size);
         Self::new_alpha_batch(rect.origin, location, frame_output_pipeline_id)
-    pub fn new_picture(size: DeviceIntSize, prim_index: PrimitiveIndex) -> RenderTask {
+    pub fn new_picture(
+        size: DeviceIntSize,
+        prim_index: PrimitiveIndex,
+        target_kind: RenderTargetKind,
+    ) -> RenderTask {
         RenderTask {
             cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, size),
-            kind: RenderTaskKind::Picture(prim_index),
-        }
-    }
-    pub fn new_box_shadow(
-        key: BoxShadowPrimitiveCacheKey,
-        size: DeviceIntSize,
-        prim_index: PrimitiveIndex,
-    ) -> RenderTask {
-        RenderTask {
-            cache_key: Some(RenderTaskKey::BoxShadow(key)),
-            children: Vec::new(),
-            location: RenderTaskLocation::Dynamic(None, size),
-            kind: RenderTaskKind::BoxShadow(prim_index),
+            kind: RenderTaskKind::Picture(PictureTask {
+                prim_index,
+                target_kind,
+            }),
     pub fn new_readback(screen_rect: DeviceIntRect) -> RenderTask {
         RenderTask {
             cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, screen_rect.size),
             kind: RenderTaskKind::Readback(screen_rect),
     pub fn new_mask(
         key: Option<ClipId>,
         task_rect: DeviceIntRect,
-        raw_clips: &[ClipWorkItem],
-        extra_clip: Option<ClipWorkItem>,
+        raw_clips: ClipChain,
+        extra_clip: ClipChain,
         prim_rect: DeviceIntRect,
         clip_store: &ClipStore,
         is_axis_aligned: bool,
+        prim_coordinate_system_id: CoordinateSystemId,
     ) -> Option<RenderTask> {
         // Filter out all the clip instances that don't contribute to the result
+        let mut current_coordinate_system_id = prim_coordinate_system_id;
         let mut inner_rect = Some(task_rect);
-        let clips: Vec<_> = raw_clips
-            .iter()
-            .chain(extra_clip.iter())
-            .filter(|work_item| {
-                let clip_info = clip_store
-                    .get_opt(&work_item.clip_sources)
-                    .expect("bug: clip item should exist");
+        let clips: Vec<_> = ClipChainNodeIter { current: raw_clips }
+            .chain(ClipChainNodeIter { current: extra_clip })
+            .filter_map(|node| {
+                let work_item = node.work_item.clone();
-                // If this clip does not contribute to a mask, then ensure
-                // it gets filtered out here. Otherwise, if a mask is
-                // created (by a different clip in the list), the allocated
-                // rectangle for the mask could end up being much bigger
-                // than is actually required.
-                if !clip_info.is_masking() {
-                    return false;
+                // FIXME(1828): This is a workaround until we can fix the inconsistency between
+                // the shader and the CPU code around how inner_rects are handled.
+                if !node.work_item.has_compatible_coordinate_system(current_coordinate_system_id) {
+                    current_coordinate_system_id = node.work_item.coordinate_system_id;
+                    inner_rect = None;
+                    return Some(work_item)
+                let clip_info = clip_store
+                    .get_opt(&node.work_item.clip_sources)
+                    .expect("bug: clip item should exist");
+                debug_assert!(clip_info.is_masking());
                 match clip_info.bounds.inner {
-                    // Inner rects aren't valid if the item is not axis-aligned, which can
-                    // be determined by the apply_rectangles field. This is mostly a band-aid
-                    // until we have better handling of inner rectangles for transformed clips.
-                    Some(ref inner) if !work_item.apply_rectangles && !inner.device_rect.is_empty() => {
+                    Some(ref inner) if !inner.device_rect.is_empty() => {
                         inner_rect = inner_rect.and_then(|r| r.intersection(&inner.device_rect));
-                        !inner.device_rect.contains_rect(&task_rect)
+                        if inner.device_rect.contains_rect(&task_rect) {
+                            return None;
+                        }
-                    _ => {
-                        inner_rect = None;
-                        true
-                    }
+                    _ => inner_rect = None,
+                Some(work_item)
-            .cloned()
         // Nothing to do, all clips are irrelevant for this case
         if clips.is_empty() {
             return None;
         // TODO(gw): This optimization is very conservative for now.
         //           For now, only draw optimized geometry if it is
         //           a single aligned rect mask with rounded corners.
         //           In the future, we'll expand this to handle the
         //           more complex types of clip mask geometry.
         let mut geometry_kind = MaskGeometryKind::Default;
         if let Some(inner_rect) = inner_rect {
             // If the inner rect completely contains the primitive
             // rect, then this mask can't affect the primitive.
             if inner_rect.contains_rect(&prim_rect) {
                 return None;
             if is_axis_aligned && clips.len() == 1 {
-                geometry_kind = clips[0].get_geometry_kind(clip_store);
+                geometry_kind = clips[0].get_geometry_kind(clip_store, prim_coordinate_system_id);
         Some(RenderTask {
             cache_key: key.map(RenderTaskKey::CacheMask),
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, task_rect.size),
             kind: RenderTaskKind::CacheMask(CacheMaskTask {
                 actual_rect: task_rect,
                 inner_rect: inner_rect.unwrap_or(DeviceIntRect::zero()),
+                coordinate_system_id: prim_coordinate_system_id,
     // Construct a render task to apply a blur to a primitive. For now,
     // this is only used for text runs, but we can probably extend this
     // to handle general blurs to any render task in the future.
     // The render task chain that is constructed looks like:
@@ -394,58 +437,61 @@ impl RenderTask {
     //    HorizontalBlurTask: Apply the separable horizontal blur to the vertical blur.
     //           |
     //           +---- This is stored as the input task to the primitive shader.
     pub fn new_blur(
         blur_radius: DeviceIntLength,
         src_task_id: RenderTaskId,
         render_tasks: &mut RenderTaskTree,
+        target_kind: RenderTargetKind,
     ) -> RenderTask {
-        let src_size = render_tasks.get(src_task_id).get_dynamic_size();
-        let blur_target_size = src_size + DeviceIntSize::new(2 * blur_radius.0, 2 * blur_radius.0);
+        let blur_target_size = render_tasks.get(src_task_id).get_dynamic_size();
         let blur_task_v = RenderTask {
             cache_key: None,
             children: vec![src_task_id],
             location: RenderTaskLocation::Dynamic(None, blur_target_size),
-            kind: RenderTaskKind::VerticalBlur(blur_radius),
+            kind: RenderTaskKind::VerticalBlur(BlurTask {
+                blur_radius,
+                target_kind,
+            }),
         let blur_task_v_id = render_tasks.add(blur_task_v);
         let blur_task_h = RenderTask {
             cache_key: None,
             children: vec![blur_task_v_id],
             location: RenderTaskLocation::Dynamic(None, blur_target_size),
-            kind: RenderTaskKind::HorizontalBlur(blur_radius),
+            kind: RenderTaskKind::HorizontalBlur(BlurTask {
+                blur_radius,
+                target_kind,
+            }),
     pub fn as_alpha_batch_mut<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref mut task) => task,
             RenderTaskKind::Picture(..) |
-            RenderTaskKind::BoxShadow(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::Alias(..) => unreachable!(),
     pub fn as_alpha_batch<'a>(&'a self) -> &'a AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref task) => task,
             RenderTaskKind::Picture(..) |
-            RenderTaskKind::BoxShadow(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::Alias(..) => unreachable!(),
@@ -476,17 +522,17 @@ impl RenderTask {
-            RenderTaskKind::Picture(..) | RenderTaskKind::BoxShadow(..) => {
+            RenderTaskKind::Picture(..) => {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
@@ -514,27 +560,27 @@ impl RenderTask {
                         task.inner_rect.origin.x as f32,
                         task.inner_rect.origin.y as f32,
                         (task.inner_rect.origin.x + task.inner_rect.size.width) as f32,
                         (task.inner_rect.origin.y + task.inner_rect.size.height) as f32,
-            RenderTaskKind::VerticalBlur(blur_radius) |
-            RenderTaskKind::HorizontalBlur(blur_radius) => {
+            RenderTaskKind::VerticalBlur(ref task_info) |
+            RenderTaskKind::HorizontalBlur(ref task_info) => {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
-                        blur_radius.0 as f32,
+                        task_info.blur_radius.0 as f32,
@@ -557,16 +603,43 @@ impl RenderTask {
             RenderTaskKind::Alias(..) => RenderTaskData { data: [0.0; 12] },
+    pub fn inflate(&mut self, device_radius: i32) {
+        match self.kind {
+            RenderTaskKind::Alpha(ref mut info) => {
+                match self.location {
+                    RenderTaskLocation::Fixed => {
+                        panic!("bug: inflate only supported for dynamic tasks");
+                    }
+                    RenderTaskLocation::Dynamic(_, ref mut size) => {
+                        size.width += device_radius * 2;
+                        size.height += device_radius * 2;
+                        info.screen_origin.x -= device_radius;
+                        info.screen_origin.y -= device_radius;
+                    }
+                }
+            }
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::VerticalBlur(..) |
+            RenderTaskKind::HorizontalBlur(..) |
+            RenderTaskKind::Picture(..) |
+            RenderTaskKind::Alias(..) => {
+                panic!("bug: inflate only supported for alpha tasks");
+            }
+        }
+    }
     pub fn get_dynamic_size(&self) -> DeviceIntSize {
         match self.location {
             RenderTaskLocation::Fixed => DeviceIntSize::zero(),
             RenderTaskLocation::Dynamic(_, size) => size,
     pub fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
@@ -578,23 +651,29 @@ impl RenderTask {
                 (DeviceIntRect::new(origin, size), target_index)
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             RenderTaskKind::Alpha(..) |
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) => RenderTargetKind::Color,
+            RenderTaskKind::Readback(..) => RenderTargetKind::Color,
+            RenderTaskKind::CacheMask(..) => {
+                RenderTargetKind::Alpha
+            }
-            RenderTaskKind::CacheMask(..) | RenderTaskKind::BoxShadow(..) => {
-                RenderTargetKind::Alpha
+            RenderTaskKind::VerticalBlur(ref task_info) |
+            RenderTaskKind::HorizontalBlur(ref task_info) => {
+                task_info.target_kind
+            }
+            RenderTaskKind::Picture(ref task_info) => {
+                task_info.target_kind
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: target_kind() called on invalidated task");
@@ -607,17 +686,17 @@ impl RenderTask {
     pub fn is_shared(&self) -> bool {
         match self.kind {
             RenderTaskKind::Alpha(..) |
             RenderTaskKind::Picture(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) => false,
-            RenderTaskKind::CacheMask(..) | RenderTaskKind::BoxShadow(..) => true,
+            RenderTaskKind::CacheMask(..) => true,
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: is_shared() called on aliased task");
     pub fn set_alias(&mut self, id: RenderTaskId) {
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -62,18 +62,18 @@ use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget, RenderTargetKind};
 use tiling::{BatchKey, BatchKind, Frame, RenderTarget, TransformBatchKind};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
-const GPU_TAG_CACHE_BOX_SHADOW: GpuProfileTag = GpuProfileTag {
-    label: "C_BoxShadow",
+const GPU_TAG_BRUSH_MASK: GpuProfileTag = GpuProfileTag {
+    label: "B_Mask",
     color: debug_colors::BLACK,
 const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag {
     label: "C_Clip",
     color: debug_colors::PURPLE,
 const GPU_TAG_CACHE_TEXT_RUN: GpuProfileTag = GpuProfileTag {
     label: "C_TextRun",
@@ -134,20 +134,16 @@ const GPU_TAG_PRIM_GRADIENT: GpuProfileT
 const GPU_TAG_PRIM_ANGLE_GRADIENT: GpuProfileTag = GpuProfileTag {
     label: "AngleGradient",
     color: debug_colors::POWDERBLUE,
 const GPU_TAG_PRIM_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
     label: "RadialGradient",
     color: debug_colors::LIGHTPINK,
-const GPU_TAG_PRIM_BOX_SHADOW: GpuProfileTag = GpuProfileTag {
-    label: "BoxShadow",
-    color: debug_colors::CYAN,
 const GPU_TAG_PRIM_BORDER_CORNER: GpuProfileTag = GpuProfileTag {
     label: "BorderCorner",
     color: debug_colors::DARKSLATEGREY,
 const GPU_TAG_PRIM_BORDER_EDGE: GpuProfileTag = GpuProfileTag {
     label: "BorderEdge",
     color: debug_colors::LAVENDER,
@@ -189,18 +185,17 @@ impl BatchKind {
                     ImageBufferKind::TextureRect => "Image (Rect)",
                     ImageBufferKind::TextureExternal => "Image (External)",
                     ImageBufferKind::Texture2DArray => "Image (Array)",
                 TransformBatchKind::YuvImage(..) => "YuvImage",
                 TransformBatchKind::AlignedGradient => "AlignedGradient",
                 TransformBatchKind::AngleGradient => "AngleGradient",
                 TransformBatchKind::RadialGradient => "RadialGradient",
-                TransformBatchKind::BoxShadow => "BoxShadow",
-                TransformBatchKind::CacheImage => "CacheImage",
+                TransformBatchKind::CacheImage(..) => "CacheImage",
                 TransformBatchKind::BorderCorner => "BorderCorner",
                 TransformBatchKind::BorderEdge => "BorderEdge",
                 TransformBatchKind::Line => "Line",
@@ -360,45 +355,21 @@ const DESC_CLIP: VertexDescriptor = Vert
         VertexAttribute {
             name: "aClipDataResourceAddress",
             count: 4,
             kind: VertexAttributeKind::U16,
-const DESC_CACHE_BOX_SHADOW: VertexDescriptor = VertexDescriptor {
-    vertex_attributes: &[
-        VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::F32,
-        },
-    ],
-    instance_attributes: &[
-        VertexAttribute {
-            name: "aPrimAddress",
-            count: 2,
-            kind: VertexAttributeKind::U16,
-        },
-        VertexAttribute {
-            name: "aTaskIndex",
-            count: 1,
-            kind: VertexAttributeKind::I32,
-        },
-    ],
 #[derive(Debug, Copy, Clone)]
 enum VertexArrayKind {
-    CacheBoxShadow,
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphicsApi {
 #[derive(Clone, Debug)]
@@ -859,16 +830,17 @@ impl VertexDataTexture {
 const CLIP_FEATURE: &str = "CLIP";
 enum ShaderKind {
+    Brush,
 struct LazilyCompiledShader {
     program: Option<Program>,
     name: &'static str,
     kind: ShaderKind,
     features: Vec<&'static str>,
@@ -912,17 +884,17 @@ impl LazilyCompiledShader {
         device.set_uniforms(program, projection, mode.into());
     fn get(&mut self, device: &mut Device) -> Result<&Program, ShaderError> {
         if self.program.is_none() {
             let program = try!{
                 match self.kind {
-                    ShaderKind::Primitive => {
+                    ShaderKind::Primitive | ShaderKind::Brush => {
                     ShaderKind::Cache(format) => {
@@ -1039,17 +1011,16 @@ fn create_prim_shader(
     debug!("PrimShader {}", name);
     let vertex_descriptor = match vertex_format {
         VertexArrayKind::Primitive => DESC_PRIM_INSTANCES,
         VertexArrayKind::Blur => DESC_BLUR,
         VertexArrayKind::Clip => DESC_CLIP,
-        VertexArrayKind::CacheBoxShadow => DESC_CACHE_BOX_SHADOW,
     let program = device.create_program(name, &prefix, &vertex_descriptor);
     if let Ok(ref program) = program {
@@ -1117,20 +1088,21 @@ pub struct Renderer {
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     current_frame: Option<RendererFrame>,
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
-    cs_box_shadow: LazilyCompiledShader,
     cs_text_run: LazilyCompiledShader,
     cs_line: LazilyCompiledShader,
-    cs_blur: LazilyCompiledShader,
+    cs_blur_a8: LazilyCompiledShader,
+    cs_blur_rgba8: LazilyCompiledShader,
+    brush_mask: LazilyCompiledShader,
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
     cs_clip_border: LazilyCompiledShader,
@@ -1146,18 +1118,18 @@ pub struct Renderer {
     ps_text_run: PrimitiveShader,
     ps_image: Vec<Option<PrimitiveShader>>,
     ps_yuv_image: Vec<Option<PrimitiveShader>>,
     ps_border_corner: PrimitiveShader,
     ps_border_edge: PrimitiveShader,
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_radial_gradient: PrimitiveShader,
-    ps_box_shadow: PrimitiveShader,
-    ps_cache_image: PrimitiveShader,
+    ps_cache_image_rgba8: PrimitiveShader,
+    ps_cache_image_a8: PrimitiveShader,
     ps_line: PrimitiveShader,
     ps_blend: LazilyCompiledShader,
     ps_hw_composite: LazilyCompiledShader,
     ps_split_composite: LazilyCompiledShader,
     ps_composite: LazilyCompiledShader,
     notifier: Arc<Mutex<Option<Box<RenderNotifier>>>>,
@@ -1178,17 +1150,16 @@ pub struct Renderer {
     color_render_targets: Vec<Texture>,
     alpha_render_targets: Vec<Texture>,
     gpu_profile: GpuProfiler<GpuProfileTag>,
     prim_vao: VAO,
     blur_vao: VAO,
     clip_vao: VAO,
-    box_shadow_vao: VAO,
     layer_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     gpu_cache_texture: CacheTexture,
     pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
     // Manages and resolves source textures IDs to real texture IDs.
@@ -1299,44 +1270,52 @@ impl Renderer {
         // device-pixel ratio doesn't matter here - we are just creating resources.
-        let cs_box_shadow = try!{
-            LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::CacheBoxShadow),
-                                      "cs_box_shadow",
-                                      &[],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
         let cs_text_run = try!{
                                       &mut device,
         let cs_line = try!{
                                       &mut device,
-        let cs_blur = try!{
+        let brush_mask = try!{
+            LazilyCompiledShader::new(ShaderKind::Brush,
+                                      "brush_mask",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+        let cs_blur_a8 = try!{
-                                      &[],
+                                      &["ALPHA"],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+        let cs_blur_rgba8 = try!{
+            LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Blur),
+                                     "cs_blur",
+                                      &["COLOR"],
                                       &mut device,
         let cs_clip_rectangle = try!{
@@ -1465,23 +1444,16 @@ impl Renderer {
         let ps_border_edge = try!{
                                  &mut device,
-        let ps_box_shadow = try!{
-            PrimitiveShader::new("ps_box_shadow",
-                                 &mut device,
-                                 &[],
-                                 options.precache_shaders)
-        };
         let dithering_feature = ["DITHERING"];
         let ps_gradient = try!{
                                  &mut device,
                                  if options.enable_dithering {
                                  } else {
@@ -1507,20 +1479,27 @@ impl Renderer {
                                  if options.enable_dithering {
                                  } else {
-        let ps_cache_image = try!{
+        let ps_cache_image_a8 = try!{
                                  &mut device,
-                                 &[],
+                                 &["ALPHA"],
+                                 options.precache_shaders)
+        };
+        let ps_cache_image_rgba8 = try!{
+            PrimitiveShader::new("ps_cache_image",
+                                 &mut device,
+                                 &["COLOR"],
         let ps_blend = try!{
                                      &mut device,
@@ -1658,18 +1637,16 @@ impl Renderer {
         let prim_vao = device.create_vao(&DESC_PRIM_INSTANCES);
         device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static);
         device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);
         let blur_vao = device.create_vao_with_new_instances(&DESC_BLUR, &prim_vao);
         let clip_vao = device.create_vao_with_new_instances(&DESC_CLIP, &prim_vao);
-        let box_shadow_vao =
-            device.create_vao_with_new_instances(&DESC_CACHE_BOX_SHADOW, &prim_vao);
         let texture_cache_upload_pbo = device.create_pbo();
         let texture_resolver = SourceTextureResolver::new(&mut device);
         let layer_texture = VertexDataTexture::new(&mut device);
         let render_task_texture = VertexDataTexture::new(&mut device);
@@ -1728,35 +1705,36 @@ impl Renderer {
         let renderer = Renderer {
             current_frame: None,
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
-            cs_box_shadow,
-            cs_blur,
+            cs_blur_a8,
+            cs_blur_rgba8,
+            brush_mask,
-            ps_box_shadow,
-            ps_cache_image,
+            ps_cache_image_rgba8,
+            ps_cache_image_a8,
             debug: debug_renderer,
@@ -1770,17 +1748,16 @@ impl Renderer {
             clear_color: options.clear_color,
             enable_clear_scissor: options.enable_clear_scissor,
             last_time: 0,
             color_render_targets: Vec::new(),
             alpha_render_targets: Vec::new(),
-            box_shadow_vao,
             pipeline_epoch_map: FastHashMap::default(),
             external_image_handler: None,
             output_image_handler: None,
             output_targets: FastHashMap::default(),
@@ -1928,28 +1905,38 @@ impl Renderer {
+                        debug_server::BatchKind::Cache,
+                        "Vertical Blur",
+                        target.vertical_blurs.len(),
+                    );
+                    debug_target.add(
+                        debug_server::BatchKind::Cache,
+                        "Horizontal Blur",
+                        target.horizontal_blurs.len(),
+                    );
+                    debug_target.add(
+                    debug_target.add(
+                        debug_server::BatchKind::Cache,
+                        "Rectangle Brush",
+                        target.rect_cache_prims.len(),
+                    );
                     for (_, items) in target.clip_batcher.images.iter() {
                         debug_target.add(debug_server::BatchKind::Clip, "Image mask", items.len());
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Box Shadow",
-                        target.box_shadow_cache_prims.len(),
-                    );
                 for target in &pass.color_targets.targets {
                     let mut debug_target = debug_server::Target::new("RGBA8");
@@ -2006,34 +1993,34 @@ impl Renderer {
     fn handle_debug_command(&mut self, command: DebugCommand) {
         match command {
             DebugCommand::EnableProfiler(enable) => if enable {
-                self.debug_flags.insert(PROFILER_DBG);
+                self.debug_flags.insert(DebugFlags::PROFILER_DBG);
             } else {
-                self.debug_flags.remove(PROFILER_DBG);
+                self.debug_flags.remove(DebugFlags::PROFILER_DBG);
             DebugCommand::EnableTextureCacheDebug(enable) => if enable {
-                self.debug_flags.insert(TEXTURE_CACHE_DBG);
+                self.debug_flags.insert(DebugFlags::TEXTURE_CACHE_DBG);
             } else {
-                self.debug_flags.remove(TEXTURE_CACHE_DBG);
+                self.debug_flags.remove(DebugFlags::TEXTURE_CACHE_DBG);
             DebugCommand::EnableRenderTargetDebug(enable) => if enable {
-                self.debug_flags.insert(RENDER_TARGET_DBG);
+                self.debug_flags.insert(DebugFlags::RENDER_TARGET_DBG);
             } else {
-                self.debug_flags.remove(RENDER_TARGET_DBG);
+                self.debug_flags.remove(DebugFlags::RENDER_TARGET_DBG);
             DebugCommand::EnableAlphaRectsDebug(enable) => if enable {
-                self.debug_flags.insert(ALPHA_PRIM_DBG);
+                self.debug_flags.insert(DebugFlags::ALPHA_PRIM_DBG);
             } else {
-                self.debug_flags.remove(ALPHA_PRIM_DBG);
+                self.debug_flags.remove(DebugFlags::ALPHA_PRIM_DBG);
             DebugCommand::FetchDocuments => {}
             DebugCommand::FetchClipScrollTree => {}
             DebugCommand::FetchPasses => {
                 let json = self.get_passes_for_debugger();
@@ -2128,17 +2115,17 @@ impl Renderer {
-                if self.debug_flags.contains(PROFILER_DBG) {
+                if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
                     let screen_fraction = 1.0 / //TODO: take device/pixel ratio into equation?
                         (framebuffer_size.width as f32 * framebuffer_size.height as f32);
                         &mut self.device,
                         &mut profile_timers,
@@ -2309,17 +2296,16 @@ impl Renderer {
         if let Some(ref texture) = self.dither_matrix_texture {
             self.device.bind_texture(TextureSampler::Dither, texture);
         let vao = match vertex_array_kind {
             VertexArrayKind::Primitive => &self.prim_vao,
             VertexArrayKind::Clip => &self.clip_vao,
             VertexArrayKind::Blur => &self.blur_vao,
-            VertexArrayKind::CacheBoxShadow => &self.box_shadow_vao,
         if self.enable_batcher {
                 .update_vao_instances(vao, data, VertexUsageHint::Stream);
@@ -2487,34 +2473,37 @@ impl Renderer {
                         &mut self.device,
                         &mut self.renderer_errors,
-                TransformBatchKind::BoxShadow => {
-                    self.ps_box_shadow.bind(
-                        &mut self.device,
-                        transform_kind,
-                        projection,
-                        0,
-                        &mut self.renderer_errors,
-                    );
-                    GPU_TAG_PRIM_BOX_SHADOW
-                }
-                TransformBatchKind::CacheImage => {
-                    self.ps_cache_image.bind(
-                        &mut self.device,
-                        transform_kind,
-                        projection,
-                        0,
-                        &mut self.renderer_errors,
-                    );
+                TransformBatchKind::CacheImage(target_kind) => {
+                    match target_kind {
+                        RenderTargetKind::Alpha => {
+                            self.ps_cache_image_a8.bind(
+                                &mut self.device,
+                                transform_kind,
+                                projection,
+                                0,
+                                &mut self.renderer_errors,
+                            );
+                        }
+                        RenderTargetKind::Color => {
+                            self.ps_cache_image_rgba8.bind(
+                                &mut self.device,
+                                transform_kind,
+                                projection,
+                                0,
+                                &mut self.renderer_errors,
+                            );
+                        }
+                    }
         // Handle special case readback for composites.
         match key.kind {
             BatchKind::Composite {
@@ -2634,17 +2623,17 @@ impl Renderer {
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
-            self.cs_blur
+            self.cs_blur_rgba8
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             if !target.vertical_blurs.is_empty() {
@@ -2729,17 +2718,17 @@ impl Renderer {
             for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
-                if self.debug_flags.contains(ALPHA_PRIM_DBG) {
+                if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => ColorF::new(0.3, 0.3, 0.3, 1.0),
                         BlendMode::Alpha => ColorF::new(0.0, 0.9, 0.1, 1.0),
                         BlendMode::PremultipliedAlpha => ColorF::new(0.0, 0.3, 0.7, 1.0),
                         BlendMode::Subpixel => ColorF::new(0.5, 0.0, 0.4, 1.0),
                     for item_rect in &batch.item_rects {
                         self.debug.add_rect(item_rect, color);
@@ -2916,25 +2905,55 @@ impl Renderer {
             // GPUs that I have tested with. It's possible it may be a
             // performance penalty on other GPU types - we should test this
             // and consider different code paths.
             let clear_color = [1.0, 1.0, 1.0, 0.0];
                 .clear_target_rect(Some(clear_color), None, target.used_rect());
-        // Draw any box-shadow caches for this target.
-        if !target.box_shadow_cache_prims.is_empty() {
+        // Draw any blurs for this target.
+        // Blurs are rendered as a standard 2-pass
+        // separable implementation.
+        // TODO(gw): In the future, consider having
+        //           fast path blur shaders for common
+        //           blur radii with fixed weights.
+        if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
+            let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_BOX_SHADOW);
-            self.cs_box_shadow
+            self.cs_blur_a8
+                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+            if !target.vertical_blurs.is_empty() {
+                self.draw_instanced_batch(
+                    &target.vertical_blurs,
+                    VertexArrayKind::Blur,
+                    &BatchTextures::no_texture(),
+                );
+            }
+            if !target.horizontal_blurs.is_empty() {
+                self.draw_instanced_batch(
+                    &target.horizontal_blurs,
+                    VertexArrayKind::Blur,
+                    &BatchTextures::no_texture(),
+                );
+            }
+        }
+        if !target.rect_cache_prims.is_empty() {
+            self.device.set_blend(false);
+            let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
+            self.brush_mask
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
-                &target.box_shadow_cache_prims,
-                VertexArrayKind::CacheBoxShadow,
+                &target.rect_cache_prims,
+                VertexArrayKind::Primitive,
         // Draw the clip items into the tiled alpha mask.
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
@@ -3325,17 +3344,17 @@ impl Renderer {
         self.debug_flags = flags;
     pub fn save_cpu_profile(&self, filename: &str) {
     fn draw_render_target_debug(&mut self, framebuffer_size: DeviceUintSize) {
-        if !self.debug_flags.contains(RENDER_TARGET_DBG) {
+        if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
         let mut spacing = 16;
         let mut size = 512;
         let fb_width = framebuffer_size.width as i32;
         let num_textures = self.color_render_targets
@@ -3365,17 +3384,17 @@ impl Renderer {
                 let dest_rect = rect(x, y, size, size);
                 self.device.blit_render_target(src_rect, dest_rect);
     fn draw_texture_cache_debug(&mut self, framebuffer_size: DeviceUintSize) {
-        if !self.debug_flags.contains(TEXTURE_CACHE_DBG) {
+        if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
         let mut spacing = 16;
         let mut size = 512;
         let fb_width = framebuffer_size.width as i32;
         let num_layers: i32 = self.texture_resolver
@@ -3386,17 +3405,17 @@ impl Renderer {
         if num_layers * (size + spacing) > fb_width {
             let factor = fb_width as f32 / (num_layers * (size + spacing)) as f32;
             size = (size as f32 * factor) as i32;
             spacing = (spacing as f32 * factor) as i32;
         let mut i = 0;
         for texture in &self.texture_resolver.cache_texture_map {
-            let y = spacing + if self.debug_flags.contains(RENDER_TARGET_DBG) {
+            let y = spacing + if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
             } else {
             let dimensions = texture.get_dimensions();
             let src_rect = DeviceIntRect::new(
                 DeviceIntSize::new(dimensions.width as i32, dimensions.height as i32),
@@ -3468,22 +3487,22 @@ impl Renderer {
         for texture in self.color_render_targets {
         self.texture_resolver.deinit(&mut self.device);
-        self.device.delete_vao(self.box_shadow_vao);
         self.debug.deinit(&mut self.device);
-        self.cs_box_shadow.deinit(&mut self.device);
         self.cs_text_run.deinit(&mut self.device);
         self.cs_line.deinit(&mut self.device);
-        self.cs_blur.deinit(&mut self.device);
+        self.cs_blur_a8.deinit(&mut self.device);
+        self.cs_blur_rgba8.deinit(&mut self.device);
+        self.brush_mask.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
         self.ps_rectangle.deinit(&mut self.device);
         self.ps_rectangle_clip.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
         for shader in self.ps_image {
             if let Some(shader) = shader {
@@ -3498,18 +3517,18 @@ impl Renderer {
         for (_, target) in self.output_targets {
         self.ps_border_corner.deinit(&mut self.device);
         self.ps_border_edge.deinit(&mut self.device);
         self.ps_gradient.deinit(&mut self.device);
         self.ps_angle_gradient.deinit(&mut self.device);
         self.ps_radial_gradient.deinit(&mut self.device);
-        self.ps_box_shadow.deinit(&mut self.device);
-        self.ps_cache_image.deinit(&mut self.device);
+        self.ps_cache_image_rgba8.deinit(&mut self.device);
+        self.ps_cache_image_a8.deinit(&mut self.device);
         self.ps_line.deinit(&mut self.device);
         self.ps_blend.deinit(&mut self.device);
         self.ps_hw_composite.deinit(&mut self.device);
         self.ps_split_composite.deinit(&mut self.device);
         self.ps_composite.deinit(&mut self.device);
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -17,16 +17,17 @@ use device::TextureFilter;
 use frame::FrameId;
 use glyph_cache::GlyphCache;
 use glyph_rasterizer::{GlyphRasterizer, GlyphRequest};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use rayon::ThreadPool;
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
+use std::cmp;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::mem;
 use std::sync::Arc;
 use texture_cache::{TextureCache, TextureCacheHandle};
 const DEFAULT_TILE_SIZE: TileSize = 512;
@@ -108,45 +109,52 @@ impl ImageTemplates {
 struct CachedImageInfo {
     texture_cache_handle: TextureCacheHandle,
     epoch: Epoch,
+pub enum ResourceClassCacheError {
+    OverLimitSize,
+pub type ResourceCacheResult<V> = Result<V, ResourceClassCacheError>;
 pub struct ResourceClassCache<K, V> {
-    resources: FastHashMap<K, V>,
+    resources: FastHashMap<K, ResourceCacheResult<V>>,
 impl<K, V> ResourceClassCache<K, V>
     K: Clone + Hash + Eq + Debug,
     pub fn new() -> ResourceClassCache<K, V> {
         ResourceClassCache {
             resources: FastHashMap::default(),
-    fn get(&self, key: &K) -> &V {
-        self.resources
-            .get(key)
+    fn get(&self, key: &K) -> &ResourceCacheResult<V> {
+        self.resources.get(key)
             .expect("Didn't find a cached resource with that ID!")
-    pub fn insert(&mut self, key: K, value: V) {
+    pub fn insert(&mut self, key: K, value: ResourceCacheResult<V>) {
         self.resources.insert(key, value);
-    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
+    pub fn get_mut(&mut self, key: &K) -> &mut ResourceCacheResult<V> {
+            .expect("Didn't find a cached resource with that ID!")
-    pub fn entry(&mut self, key: K) -> Entry<K, V> {
+    pub fn entry(&mut self, key: K) -> Entry<K, ResourceCacheResult<V>> {
     pub fn clear(&mut self) {
     fn clear_keys<F>(&mut self, key_fun: F)
@@ -154,17 +162,17 @@ where
         for<'r> F: Fn(&'r &K) -> bool,
         let resources_to_destroy = self.resources
         for key in resources_to_destroy {
-            self.resources.remove(&key).unwrap();
+            let _ = self.resources.remove(&key).unwrap();
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 struct ImageRequest {
     key: ImageKey,
@@ -478,35 +486,48 @@ impl ResourceCache {
         match self.resources.image_templates.get(key) {
             Some(template) => {
                 // Images that don't use the texture cache can early out.
                 if !template.data.uses_texture_cache() {
+                let side_size =
+                    template.tiling.map_or(cmp::max(template.descriptor.width, template.descriptor.height),
+                                           |tile_size| tile_size as u32);
+                if side_size > self.texture_cache.max_texture_size() {
+                    // The image or tiling size is too big for hardware texture size.
+                    warn!("Dropping image, image:(w:{},h:{}, tile:{}) is too big for hardware!",
+                          template.descriptor.width, template.descriptor.height, template.tiling.unwrap_or(0));
+                    self.cached_images.insert(request, Err(ResourceClassCacheError::OverLimitSize));
+                    return;
+                }
                 // If this image exists in the texture cache, *and* the epoch
                 // in the cache matches that of the template, then it is
                 // valid to use as-is.
                 let (entry, needs_update) = match self.cached_images.entry(request) {
                     Occupied(entry) => {
-                        let needs_update = entry.get().epoch != template.epoch;
+                        let needs_update = entry.get().as_ref().unwrap().epoch != template.epoch;
                         (entry.into_mut(), needs_update)
                     Vacant(entry) => (
-                        entry.insert(CachedImageInfo {
-                            epoch: template.epoch,
-                            texture_cache_handle: TextureCacheHandle::new(),
-                        }),
+                        entry.insert(Ok(
+                            CachedImageInfo {
+                                epoch: template.epoch,
+                                texture_cache_handle: TextureCacheHandle::new(),
+                            }
+                        )),
                 let needs_upload = self.texture_cache
-                    .request(&mut entry.texture_cache_handle, gpu_cache);
+                    .request(&mut entry.as_mut().unwrap().texture_cache_handle, gpu_cache);
                 if !needs_upload && !needs_update {
                 // We can start a worker thread rasterizing right now, if:
                 //  - The image is a blob.
                 //  - The blob hasn't already been requested this frame.
@@ -595,17 +616,17 @@ impl ResourceCache {
         self.glyph_rasterizer.prepare_font(&mut font);
         let glyph_key_cache = self.cached_glyphs.get_glyph_key_cache_for_font(&font);
         let mut current_texture_id = SourceTexture::Invalid;
         for (loop_index, key) in glyph_keys.iter().enumerate() {
-            if let Some(ref glyph) = *glyph_key_cache.get(key) {
+            if let Ok(Some(ref glyph)) = *glyph_key_cache.get(key) {
                 let cache_item = self.texture_cache.get(&glyph.texture_cache_handle);
                 if current_texture_id != cache_item.texture_id {
                     if !fetch_buffer.is_empty() {
                         f(current_texture_id, fetch_buffer);
                     current_texture_id = cache_item.texture_id;
@@ -643,25 +664,34 @@ impl ResourceCache {
     pub fn get_cached_image(
         image_key: ImageKey,
         image_rendering: ImageRendering,
         tile: Option<TileOffset>,
-    ) -> CacheItem {
+    ) -> Result<CacheItem, ()> {
         debug_assert_eq!(self.state, State::QueryResources);
         let key = ImageRequest {
             key: image_key,
             rendering: image_rendering,
-        let image_info = &self.cached_images.get(&key);
-        self.texture_cache.get(&image_info.texture_cache_handle)
+        // TODO(Jerry): add a debug option to visualize the corresponding area for
+        // the Err() case of CacheItem.
+        match *self.cached_images.get(&key) {
+          Ok(ref image_info) => {
+              Ok(self.texture_cache.get(&image_info.texture_cache_handle))
+          }
+          Err(_) => {
+              Err(())
+          }
+        }
     pub fn get_image_properties(&self, image_key: ImageKey) -> Option<ImageProperties> {
         let image_template = &self.resources.image_templates.get(image_key);
         image_template.map(|image_template| {
             let external_image = match image_template.data {
                 ImageData::External(ext_image) => {
@@ -811,17 +841,17 @@ impl ResourceCache {
                     format: image_descriptor.format,
                     is_opaque: image_descriptor.is_opaque,
             } else {
-            let entry = self.cached_images.get_mut(&request).unwrap();
+            let entry = self.cached_images.get_mut(&request).as_mut().unwrap();
                 &mut entry.texture_cache_handle,
                 [0.0; 3],
--- a/gfx/webrender/src/scene.rs
+++ b/gfx/webrender/src/scene.rs
@@ -128,16 +128,22 @@ impl Scene {
     pub fn remove_pipeline(&mut self, pipeline_id: PipelineId) {
         if self.root_pipeline_id == Some(pipeline_id) {
             self.root_pipeline_id = None;
+    pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
+        if let Some(pipeline) = self.pipelines.get_mut(&pipeline_id) {
+            pipeline.epoch = epoch;
+        }
+    }
 pub trait FilterOpHelpers {
     fn resolve(self, properties: &SceneProperties) -> FilterOp;
     fn is_noop(&self) -> bool;
 impl FilterOpHelpers for FilterOp {
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -4,19 +4,20 @@
 use api::{ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
 use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
 use api::{LayerToWorldTransform, MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
 use api::{LayerVector2D, TileOffset, WorldToLayerTransform, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
+use clip_scroll_tree::CoordinateSystemId;
 use device::Texture;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
-use gpu_types::{BlurDirection, BlurInstance, BoxShadowCacheInstance, ClipMaskInstance};
+use gpu_types::{BlurDirection, BlurInstance, BrushInstance, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{DeferredResolve, TextRunMode};
 use profiler::FrameProfileCounters;
 use render_task::{AlphaRenderItem, ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
@@ -395,16 +396,19 @@ impl AlphaRenderItem {
                 let blend_mode = ctx.prim_store.get_blend_mode(prim_metadata, transform_kind);
                 match prim_metadata.prim_kind {
+                    PrimitiveKind::Brush => {
+                        panic!("BUG: brush type not expected in an alpha task (yet)");
+                    }
                     PrimitiveKind::Border => {
                         let border_cpu =
                         // TODO(gw): Select correct blend mode for edges and corners!!
                         let corner_kind = BatchKind::Transformable(
@@ -476,16 +480,17 @@ impl AlphaRenderItem {
                         if color_texture_id == SourceTexture::Invalid {
+                            warn!("Warnings: skip a PrimitiveKind::Image at {:?}.\n", item_bounding_rect);
                         let batch_kind = match color_texture_id {
                             SourceTexture::External(ext_image) => {
                                 match ext_image.image_type {
                                     ExternalImageType::Texture2DHandle => {
@@ -534,16 +539,18 @@ impl AlphaRenderItem {
                         let font = text_cpu.get_font(TextRunMode::Normal, ctx.device_pixel_ratio);
                             |texture_id, glyphs| {
+                                debug_assert_ne!(texture_id, SourceTexture::Invalid);
                                 let textures = BatchTextures {
                                     colors: [
@@ -568,17 +575,17 @@ impl AlphaRenderItem {
                     PrimitiveKind::Picture => {
                         let picture =
                         let cache_task_id = picture.render_task_id.expect("no render task!");
                         let cache_task_address = render_tasks.get_task_address(cache_task_id);
                         let textures = BatchTextures::render_target_cache();
                         let kind = BatchKind::Transformable(
-                            TransformBatchKind::CacheImage,
+                            TransformBatchKind::CacheImage(picture.kind),
                         let key = BatchKey::new(kind, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
                         batch.push(base_instance.build(0, cache_task_address.0 as i32, 0));
                     PrimitiveKind::AlignedGradient => {
                         let gradient_cpu =
@@ -627,16 +634,17 @@ impl AlphaRenderItem {
                             if texture == SourceTexture::Invalid {
+                                warn!("Warnings: skip a PrimitiveKind::YuvImage at {:?}.\n", item_bounding_rect);
                             textures.colors[channel] = texture;
                             uv_rect_addresses[channel] = address.as_int(gpu_cache);
                         let get_buffer_kind = |texture: SourceTexture| {
@@ -686,36 +694,16 @@ impl AlphaRenderItem {
                         let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                    PrimitiveKind::BoxShadow => {
-                        let box_shadow =
-                            &ctx.prim_store.cpu_box_shadows[prim_metadata.cpu_prim_index.0];
-                        let cache_task_id = box_shadow.render_task_id.unwrap();
-                        let cache_task_address = render_tasks.get_task_address(cache_task_id);
-                        let textures = BatchTextures::render_target_cache();
-                        let kind =
-                            BatchKind::Transformable(transform_kind, TransformBatchKind::BoxShadow);
-                        let key = BatchKey::new(kind, blend_mode, textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        for rect_index in 0 .. box_shadow.rects.len() {
-                            batch.push(base_instance.build(
-                                rect_index as i32,
-                                cache_task_address.0 as i32,
-                                0,
-                            ));
-                        }
-                    }
             AlphaRenderItem::SplitComposite(sc_index, task_id, gpu_handle, z) => {
                 let key = BatchKey::new(
@@ -807,21 +795,23 @@ impl ClipBatcher {
             borders: Vec::new(),
     fn add(
         &mut self,
         task_address: RenderTaskAddress,
         clips: &[ClipWorkItem],
+        coordinate_system_id: CoordinateSystemId,
         resource_cache: &ResourceCache,
         gpu_cache: &GpuCache,
         geometry_kind: MaskGeometryKind,
         clip_store: &ClipStore,
     ) {
+        let mut coordinate_system_id = coordinate_system_id;
         for work_item in clips.iter() {
             let instance = ClipMaskInstance {
                 render_task_address: task_address,
                 layer_address: work_item.layer_index.into(),
                 segment: 0,
                 clip_data_address: GpuCacheAddress::invalid(),
                 resource_address: GpuCacheAddress::invalid(),
@@ -829,33 +819,39 @@ impl ClipBatcher {
                 .expect("bug: clip handle should be valid");
             for &(ref source, ref handle) in &info.clips {
                 let gpu_address = gpu_cache.get_address(handle);
                 match *source {
                     ClipSource::Image(ref mask) => {
-                        let cache_item =
-                            resource_cache.get_cached_image(mask.image, ImageRendering::Auto, None);
-                        self.images
-                            .entry(cache_item.texture_id)
-                            .or_insert(Vec::new())
-                            .push(ClipMaskInstance {
+                        if let Ok(cache_item) = resource_cache.get_cached_image(mask.image, ImageRendering::Auto, None) {
+                            self.images
+                                .entry(cache_item.texture_id)
+                                .or_insert(Vec::new())
+                                .push(ClipMaskInstance {
+                                    clip_data_address: gpu_address,
+                                    resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
+                                    ..instance
+                                });
+                        } else {
+                            warn!("Warnings: skip a image mask. Key:{:?} Rect::{:?}.\n", mask.image, mask.rect);
+                            continue;
+                        }
+                    }
+                    ClipSource::Rectangle(..) => {
+                        if work_item.coordinate_system_id != coordinate_system_id {
+                            self.rectangles.push(ClipMaskInstance {
                                 clip_data_address: gpu_address,
-                                resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
+                                segment: MaskSegment::All as i32,
-                    }
-                    ClipSource::Rectangle(..) => if work_item.apply_rectangles {
-                        self.rectangles.push(ClipMaskInstance {
-                            clip_data_address: gpu_address,
-                            segment: MaskSegment::All as i32,
-                            ..instance
-                        });
+                            coordinate_system_id = work_item.coordinate_system_id;
+                        }
                     ClipSource::RoundedRectangle(..) => match geometry_kind {
                         MaskGeometryKind::Default => {
                             self.rectangles.push(ClipMaskInstance {
                                 clip_data_address: gpu_address,
                                 segment: MaskSegment::All as i32,
@@ -966,17 +962,17 @@ pub trait RenderTarget {
         ctx: &RenderTargetContext,
         gpu_cache: &GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
     fn used_rect(&self) -> DeviceIntRect;
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTargetKind {
     Color, // RGBA32
     Alpha, // R8
 pub struct RenderTargetList<T> {
     pub targets: Vec<T>,
@@ -1154,18 +1150,18 @@ impl RenderTarget for ColorRenderTarget 
                 // Find the child render task that we are applying
                 // a horizontal blur on.
                 self.horizontal_blurs.push(BlurInstance {
                     task_address: render_tasks.get_task_address(task_id),
                     src_task_address: render_tasks.get_task_address(task.children[0]),
                     blur_direction: BlurDirection::Horizontal,
-            RenderTaskKind::Picture(prim_index) => {
-                let prim_metadata = ctx.prim_store.get_metadata(prim_index);
+            RenderTaskKind::Picture(ref task_info) => {
+                let prim_metadata = ctx.prim_store.get_metadata(task_info.prim_index);
                 let prim_address = prim_metadata.gpu_location.as_int(gpu_cache);
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Picture => {
                         let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
                         let task_index = render_tasks.get_task_address(task_id);
@@ -1227,41 +1223,46 @@ impl RenderTarget for ColorRenderTarget 
                     _ => {
                         // No other primitives make use of primitive caching yet!
-            RenderTaskKind::CacheMask(..) | RenderTaskKind::BoxShadow(..) => {
+            RenderTaskKind::CacheMask(..) => {
                 panic!("Should not be added to color target!");
             RenderTaskKind::Readback(device_rect) => {
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
-    pub box_shadow_cache_prims: Vec<BoxShadowCacheInstance>,
+    pub rect_cache_prims: Vec<PrimitiveInstance>,
+    // List of blur operations to apply for this render target.
+    pub vertical_blurs: Vec<BlurInstance>,
+    pub horizontal_blurs: Vec<BlurInstance>,
     allocator: TextureAllocator,
 impl RenderTarget for AlphaRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
     fn new(size: Option<DeviceUintSize>) -> AlphaRenderTarget {
         AlphaRenderTarget {
             clip_batcher: ClipBatcher::new(),
-            box_shadow_cache_prims: Vec::new(),
+            rect_cache_prims: Vec::new(),
+            vertical_blurs: Vec::new(),
+            horizontal_blurs: Vec::new(),
             allocator: TextureAllocator::new(size.expect("bug: alpha targets need size")),
     fn used_rect(&self) -> DeviceIntRect {
@@ -1274,42 +1275,78 @@ impl RenderTarget for AlphaRenderTarget 
         clip_store: &ClipStore,
     ) {
         let task = render_tasks.get(task_id);
         match task.kind {
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: add_task() called on invalidated task");
             RenderTaskKind::Alpha(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Picture(..) |
             RenderTaskKind::Readback(..) => {
                 panic!("Should not be added to alpha target!");
-            RenderTaskKind::BoxShadow(prim_index) => {
-                let prim_metadata = ctx.prim_store.get_metadata(prim_index);
+            RenderTaskKind::VerticalBlur(..) => {
+                // Find the child render task that we are applying
+                // a vertical blur on.
+                self.vertical_blurs.push(BlurInstance {
+                    task_address: render_tasks.get_task_address(task_id),
+                    src_task_address: render_tasks.get_task_address(task.children[0]),
+                    blur_direction: BlurDirection::Vertical,
+                });
+            }
+            RenderTaskKind::HorizontalBlur(..) => {
+                // Find the child render task that we are applying
+                // a horizontal blur on.
+                self.horizontal_blurs.push(BlurInstance {
+                    task_address: render_tasks.get_task_address(task_id),
+                    src_task_address: render_tasks.get_task_address(task.children[0]),
+                    blur_direction: BlurDirection::Horizontal,
+                });
+            }
+            RenderTaskKind::Picture(ref task_info) => {
+                let prim_metadata = ctx.prim_store.get_metadata(task_info.prim_index);
                 match prim_metadata.prim_kind {
-                    PrimitiveKind::BoxShadow => {
-                        self.box_shadow_cache_prims.push(BoxShadowCacheInstance {
-                            prim_address: gpu_cache.get_address(&prim_metadata.gpu_location),
-                            task_index: render_tasks.get_task_address(task_id),
-                        });
+                    PrimitiveKind::Picture => {
+                        let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
+                        let task_index = render_tasks.get_task_address(task_id);
+                        for run in &prim.prim_runs {
+                            for i in 0 .. run.count {
+                                let sub_prim_index = PrimitiveIndex(run.prim_index.0 + i);
+                                let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
+                                let sub_prim_address =
+                                    gpu_cache.get_address(&sub_metadata.gpu_location);
+                                match sub_metadata.prim_kind {
+                                    PrimitiveKind::Brush => {
+                                        let instance = BrushInstance::new(task_index, sub_prim_address);
+                                        self.rect_cache_prims.push(PrimitiveInstance::from(instance));
+                                    }
+                                    _ => {
+                                        unreachable!("Unexpected sub primitive type");
+                                    }
+                                }
+                            }
+                        }
                     _ => {
-                        panic!("BUG: invalid prim kind");
+                        // No other primitives make use of primitive caching yet!
+                        unreachable!()
             RenderTaskKind::CacheMask(ref task_info) => {
                 let task_address = render_tasks.get_task_address(task_id);
+                    task_info.coordinate_system_id,
@@ -1483,18 +1520,17 @@ impl RenderPass {
 pub enum TransformBatchKind {
     YuvImage(ImageBufferKind, YuvFormat, YuvColorSpace),
-    BoxShadow,
-    CacheImage,
+    CacheImage(RenderTargetKind),
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum BatchKind {
     Composite {
@@ -1681,16 +1717,17 @@ impl StackingContext {
 pub struct ClipScrollGroupIndex(pub usize, pub ClipAndScrollInfo);
 pub struct ClipScrollGroup {
     pub scroll_node_id: ClipId,
     pub clip_node_id: ClipId,
     pub packed_layer_index: PackedLayerIndex,
     pub screen_bounding_rect: Option<(TransformedRectKind, DeviceIntRect)>,
+    pub coordinate_system_id: CoordinateSystemId,
 impl ClipScrollGroup {
     pub fn is_visible(&self) -> bool {
@@ -1819,18 +1856,20 @@ fn resolve_image(
                     deferred_resolves.push(DeferredResolve {
                         address: gpu_cache.get_address(&cache_handle),
                     (SourceTexture::External(external_image), cache_handle)
                 None => {
-                    let cache_item =
-                        resource_cache.get_cached_image(image_key, image_rendering, tile_offset);
-                    (cache_item.texture_id, cache_item.uv_rect_handle)
+                    if let Ok(cache_item) = resource_cache.get_cached_image(image_key, image_rendering, tile_offset) {
+                        (cache_item.texture_id, cache_item.uv_rect_handle)
+                    } else {
+                        // There is no usable texture entry for the image key. Just return an invalid texture here.
+                        (SourceTexture::Invalid, GpuCacheHandle::new())
+                    }
         None => (SourceTexture::Invalid, GpuCacheHandle::new()),
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -1,18 +1,20 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-use api::{BorderRadius, ComplexClipRegion, DeviceIntRect, DevicePoint, DeviceRect, DeviceSize};
-use api::{LayerRect, LayerToWorldTransform, LayoutRect, WorldPoint3D};
+use api::{BorderRadius, ComplexClipRegion, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use api::{DevicePoint, DeviceRect, DeviceSize, LayerRect, LayerToWorldTransform, LayoutRect};
+use api::WorldPoint3D;
 use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedRect, TypedSize2D, TypedTransform2D};
 use euclid::TypedTransform3D;
 use num_traits::Zero;
 use std::f32::consts::FRAC_1_SQRT_2;
+use std::i32;
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst>;
     fn is_identity(&self) -> bool;
@@ -202,23 +204,16 @@ pub enum TransformedRectKind {
 pub struct TransformedRect {
     pub local_rect: LayerRect,
     pub bounding_rect: DeviceIntRect,
     pub inner_rect: DeviceIntRect,
     pub vertices: [WorldPoint3D; 4],
     pub kind: TransformedRectKind,
-// Having an unlimited bounding box is fine up until we try
-// to cast it to `i32`, where we get `-2147483648` for any
-// values larger than or equal to 2^31.
-//Note: clamping to i32::MIN and i32::MAX is not a solution,
-// with explanation left as an exercise for the reader.
-const MAX_COORD: f32 = 1.0e9;
 impl TransformedRect {
     pub fn new(
         rect: &LayerRect,
         transform: &LayerToWorldTransform,
         device_pixel_ratio: f32,
     ) -> TransformedRect {
         let kind = if transform.preserves_2d_axis_alignment() {
@@ -244,20 +239,17 @@ impl TransformedRect {
         xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
         ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
         let outer_min_dp = (DevicePoint::new(xs[0], ys[0]) * device_pixel_ratio).floor();
         let outer_max_dp = (DevicePoint::new(xs[3], ys[3]) * device_pixel_ratio).ceil();
         let inner_min_dp = (DevicePoint::new(xs[1], ys[1]) * device_pixel_ratio).ceil();
         let inner_max_dp = (DevicePoint::new(xs[2], ys[2]) * device_pixel_ratio).floor();
-        let max_rect = DeviceRect::new(
-            DevicePoint::new(-MAX_COORD, -MAX_COORD),
-            DeviceSize::new(2.0 * MAX_COORD, 2.0 * MAX_COORD),
-        );
+        let max_rect = DeviceRect::max_rect();
         let bounding_rect = DeviceRect::new(outer_min_dp, (outer_max_dp - outer_min_dp).to_size())
         let inner_rect = DeviceRect::new(inner_min_dp, (inner_max_dp - inner_min_dp).to_size())
@@ -357,8 +349,38 @@ pub mod test {
         let p0 = Point2D::new(1.0, 2.0);
         // an identical transform doesn't need any inverse projection
         assert_eq!(m0.inverse_project(&p0), Some(p0));
         let m1 = Transform3D::create_rotation(0.0, 1.0, 0.0, Radians::new(PI / 3.0));
         // rotation by 60 degrees would imply scaling of X component by a factor of 2
         assert_eq!(m1.inverse_project(&p0), Some(Point2D::new(2.0, 2.0)));
+pub trait MaxRect {
+    fn max_rect() -> Self;
+impl MaxRect for DeviceIntRect {
+    fn max_rect() -> Self {
+        DeviceIntRect::new(
+            DeviceIntPoint::new(i32::MIN / 2, i32::MIN / 2),
+            DeviceIntSize::new(i32::MAX, i32::MAX),
+        )
+    }
+impl MaxRect for DeviceRect {
+    fn max_rect() -> Self {
+        // Having an unlimited bounding box is fine up until we try
+        // to cast it to `i32`, where we get `-2147483648` for any
+        // values larger than or equal to 2^31.
+        //
+        // Note: clamping to i32::MIN and i32::MAX is not a solution,
+        // with explanation left as an exercise for the reader.
+        const MAX_COORD: f32 = 1.0e9;
+        DeviceRect::new(
+            DevicePoint::new(-MAX_COORD, -MAX_COORD),
+            DeviceSize::new(2.0 * MAX_COORD, 2.0 * MAX_COORD),
+        )
+    }
--- a/gfx/webrender/tests/angle_shader_validation.rs
+++ b/gfx/webrender/tests/angle_shader_validation.rs
@@ -36,52 +36,44 @@ const SHADERS: &[Shader] = &[
     Shader {
         name: "cs_blur",
         features: CACHE_FEATURES,
     Shader {
         name: "cs_text_run",
         features: CACHE_FEATURES,
-    Shader {
-        name: "cs_box_shadow",
-        features: CACHE_FEATURES,
-    },
     // Prim shaders
     Shader {
         name: "ps_line",
         features: &["", "TRANSFORM", "CACHE"],
     Shader {
         name: "ps_border_corner",
         features: PRIM_FEATURES,
     Shader {
         name: "ps_border_edge",
         features: PRIM_FEATURES,
     Shader {
-        name: "ps_box_shadow",
-        features: PRIM_FEATURES,
-    },
-    Shader {
         name: "ps_gradient",
         features: PRIM_FEATURES,
     Shader {
         name: "ps_angle_gradient",
         features: PRIM_FEATURES,
     Shader {
         name: "ps_radial_gradient",
         features: PRIM_FEATURES,
     Shader {
         name: "ps_cache_image",
-        features: PRIM_FEATURES,
+        features: &["COLOR", "ALPHA"],
     Shader {
         name: "ps_blend",
         features: PRIM_FEATURES,
     Shader {
         name: "ps_composite",
         features: PRIM_FEATURES,
@@ -105,16 +97,21 @@ const SHADERS: &[Shader] = &[
     Shader {
         name: "ps_text_run",
         features: PRIM_FEATURES,
     Shader {
         name: "ps_rectangle",
         features: &["", "TRANSFORM", "CLIP_FEATURE", "TRANSFORM,CLIP_FEATURE"],
+    // Brush shaders
+    Shader {
+        name: "brush_mask",
+        features: &[],
+    },
 const VERSION_STRING: &str = "#version 300 es\n";
 fn validate_shaders() {
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -6,18 +6,18 @@ license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 app_units = "0.5.6"
+bitflags = "1.0"
 bincode = "0.9"
-bitflags = "0.9"
 byteorder = "1.0"
 euclid = "0.15"
 heapsize = ">= 0.3.6, < 0.5"
 ipc-channel = {version = "0.9", optional = true}
 serde = { version = "1.0", features = ["rc", "derive"] }
 time = "0.1"
 [target.'cfg(target_os = "macos")'.dependencies]
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -149,16 +149,20 @@ pub struct HitTestItem {
     /// The tag of the hit display item.
     pub tag: ItemTag,
     /// The hit point in the coordinate space of the "viewport" of the display item. The
     /// viewport is the scroll node formed by the root reference frame of the display item's
     /// pipeline.
     pub point_in_viewport: LayoutPoint,
+    /// The coordinates of the original hit test point relative to the origin of this item.
+    /// This is useful for calculating things like text offsets in the client.
+    pub point_relative_to_item: LayoutPoint,
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct HitTestResult {
     pub items: Vec<HitTestItem>,
 bitflags! {
@@ -187,16 +191,21 @@ pub enum DocumentMsg {
         epoch: Epoch,
         pipeline_id: PipelineId,
         background: Option<ColorF>,
         viewport_size: LayoutSize,
         content_size: LayoutSize,
         preserve_frame_state: bool,
         resources: ResourceUpdates,
+    UpdatePipelineResources {
+        resources: ResourceUpdates,
+        pipeline_id: PipelineId,
+        epoch: Epoch,
+    },
     EnableFrameOutput(PipelineId, bool),
     SetWindowParameters {
         window_size: DeviceUintSize,
@@ -209,16 +218,17 @@ pub enum DocumentMsg {
 impl fmt::Debug for DocumentMsg {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         f.write_str(match *self {
             DocumentMsg::SetDisplayList { .. } => "DocumentMsg::SetDisplayList",
+            DocumentMsg::UpdatePipelineResources { .. } => "DocumentMsg::UpdatePipelineResources",
             DocumentMsg::HitTest(..) => "DocumentMsg::HitTest",
             DocumentMsg::SetPageZoom(..) => "DocumentMsg::SetPageZoom",
             DocumentMsg::SetPinchZoom(..) => "DocumentMsg::SetPinchZoom",
             DocumentMsg::SetPan(..) => "DocumentMsg::SetPan",
             DocumentMsg::SetRootPipeline(..) => "DocumentMsg::SetRootPipeline",
             DocumentMsg::RemovePipeline(..) => "DocumentMsg::RemovePipeline",
             DocumentMsg::SetWindowParameters { .. } => "DocumentMsg::SetWindowParameters",
             DocumentMsg::Scroll(..) => "DocumentMsg::Scroll",
@@ -456,26 +466,44 @@ impl RenderApi {
     /// Creates an `ImageKey`.
     pub fn generate_image_key(&self) -> ImageKey {
         let new_id = self.next_unique_id();
         ImageKey::new(self.namespace_id, new_id)
-    /// Adds an image identified by the `ImageKey`.
+    /// Add/remove/update resources such as images and fonts.
     pub fn update_resources(&self, resources: ResourceUpdates) {
         if resources.updates.is_empty() {
+    /// Add/remove/update resources such as images and fonts.
+    ///
+    /// This is similar to update_resources with the addition that it allows updating
+    /// a pipeline's epoch.
+    pub fn update_pipeline_resources(
+        &self,
+        resources: ResourceUpdates,
+        document_id: DocumentId,
+        pipeline_id: PipelineId,
+        epoch: Epoch,
+    ) {
+        self.send(document_id, DocumentMsg::UpdatePipelineResources {
+            resources,
+            pipeline_id,
+            epoch,
+        });
+    }
     pub fn send_external_event(&self, evt: ExternalEvent) {
         let msg = ApiMsg::ExternalEvent(evt);
     pub fn notify_memory_pressure(&self) {
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -281,19 +281,18 @@ pub enum BorderStyle {
     Ridge = 7,
     Inset = 8,
     Outset = 9,
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub enum BoxShadowClipMode {
-    None = 0,
-    Outset = 1,
-    Inset = 2,
+    Outset = 0,
+    Inset = 1,
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct BoxShadowDisplayItem {
     pub box_bounds: LayoutRect,
     pub offset: LayoutVector2D,
     pub color: ColorF,
     pub blur_radius: f32,
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -394,17 +394,17 @@ impl<'a, T: for<'de> Deserialize<'de>> I
     fn size_hint(&self) -> (usize, Option<usize>) {
         (self.size, Some(self.size))
 impl<'a, T: for<'de> Deserialize<'de>> ::std::iter::ExactSizeIterator for AuxIter<'a, T> {}
-// This is purely for the JSON writer in wrench
+// This is purely for the JSON/RON writers in wrench
 impl Serialize for BuiltDisplayList {
     fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
         let mut seq = serializer.serialize_seq(None)?;
         let mut traversal = self.iter();
         while let Some(item) = traversal.next() {