Bug 1439565 - Update webrender to commit 8a19316a733a484bf9bafb8257e3008b1418bfe4. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 23 Feb 2018 09:29:44 -0500
changeset 758979 4c922f5d657dfe9809aa14429bbe951acf9889f5
parent 758927 6661c077325c35af028f1cdaa660f673cbea39be
child 758980 f05794aa1baf5e33284ac905b0e171f1b2c78810
push id100239
push userkgupta@mozilla.com
push dateFri, 23 Feb 2018 14:32:07 +0000
reviewersjrmuizel
bugs1439565
milestone60.0a1
Bug 1439565 - Update webrender to commit 8a19316a733a484bf9bafb8257e3008b1418bfe4. r?jrmuizel MozReview-Commit-ID: BZIK3GEG0ER
gfx/webrender/res/brush_blend.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_line.glsl
gfx/webrender/res/brush_linear_gradient.glsl
gfx/webrender/res/brush_mix_blend.glsl
gfx/webrender/res/brush_picture.glsl
gfx/webrender/res/brush_yuv_image.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_angle_gradient.glsl
gfx/webrender/res/ps_gradient.glsl
gfx/webrender/res/shared.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/hit_test.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender/tests/angle_shader_validation.rs
gfx/webrender_bindings/revision.txt
--- a/gfx/webrender/res/brush_blend.glsl
+++ b/gfx/webrender/res/brush_blend.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define VECS_PER_SPECIFIC_BRUSH 5
+#define VECS_PER_SPECIFIC_BRUSH 1
 #define FORCE_NO_PERSPECTIVE
 
 #include shared,prim_shared,brush
 
 varying vec3 vUv;
 
 flat varying float vAmount;
 flat varying int vOp;
@@ -78,19 +78,20 @@ void brush_vs(
                              vec4(0.769 - 0.769 * amount.y, 0.686 + 0.314 * amount.y, 0.534 - 0.534 * amount.y, 0.0),
                              vec4(0.189 - 0.189 * amount.y, 0.168 - 0.168 * amount.y, 0.131 + 0.869 * amount.y, 0.0),
                              vec4(0.0, 0.0, 0.0, 1.0));
             vColorOffset = vec4(0.0);
             break;
         }
         case 10: {
             // Color Matrix
-            vec4 data[4] = fetch_from_resource_cache_4(prim_address + 1);
-            vColorMat = mat4(amount, data[0], data[1], data[2]);
-            vColorOffset = data[3];
+            vec4 mat_data[4] = fetch_from_resource_cache_4(user_data.z);
+            vec4 offset_data = fetch_from_resource_cache_1(user_data.z + 4);
+            vColorMat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]);
+            vColorOffset = offset_data;
             break;
         }
         default: break;
     }
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -35,17 +35,22 @@ void brush_vs(
     vec2 uv1 = res.uv_rect.p1;
 
     vUv.z = res.layer;
 
     vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
     vUv.xy = mix(uv0, uv1, f);
     vUv.xy /= texture_size;
 
-    vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+    // Handle case where the UV coords are inverted (e.g. from an
+    // external image).
+    vUvBounds = vec4(
+        min(uv0, uv1) + vec2(0.5),
+        max(uv0, uv1) - vec2(0.5)
+    ) / texture_size.xyxy;
 
 #ifdef WR_FEATURE_ALPHA_PASS
     vLocalPos = vi.local_pos;
 #endif
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
--- a/gfx/webrender/res/brush_line.glsl
+++ b/gfx/webrender/res/brush_line.glsl
@@ -114,48 +114,17 @@ void brush_vs(
         default:
             vParams = vec4(0.0);
     }
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
-#define MAGIC_WAVY_LINE_AA_SNAP         0.7
-
-float det(vec2 a, vec2 b) {
-    return a.x * b.y - b.x * a.y;
-}
-
-// From: http://research.microsoft.com/en-us/um/people/hoppe/ravg.pdf
-vec2 get_distance_vector(vec2 b0, vec2 b1, vec2 b2) {
-    float a = det(b0, b2);
-    float b = 2.0 * det(b1, b0);
-    float d = 2.0 * det(b2, b1);
-
-    float f = b * d - a * a;
-    vec2 d21 = b2 - b1;
-    vec2 d10 = b1 - b0;
-    vec2 d20 = b2 - b0;
-
-    vec2 gf = 2.0 * (b *d21 + d * d10 + a * d20);
-    gf = vec2(gf.y,-gf.x);
-    vec2 pp = -f * gf / dot(gf, gf);
-    vec2 d0p = b0 - pp;
-    float ap = det(d0p, d20);
-    float bp = 2.0 * det(d10, d0p);
-
-    float t = clamp((ap + bp) / (2.0 * a + b + d), 0.0, 1.0);
-    return mix(mix(b0, b1, t), mix(b1,b2,t), t);
-}
-
-// Approximate distance from point to quadratic bezier.
-float approx_distance(vec2 p, vec2 b0, vec2 b1, vec2 b2) {
-    return length(get_distance_vector(b0 - p, b1 - p, b2 - p));
-}
+#define MAGIC_WAVY_LINE_AA_SNAP         0.5
 
 vec4 brush_fs() {
     // Find the appropriate distance to apply the step over.
     vec2 local_pos = vLocalPos;
     float aa_range = compute_aa_range(local_pos);
     float alpha = 1.0;
 
     // Select the x/y coord, depending on which axis this edge is.
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush_linear_gradient.glsl
@@ -0,0 +1,76 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 2
+
+#include shared,prim_shared,brush
+
+flat varying int vGradientAddress;
+flat varying float vGradientRepeat;
+
+flat varying vec2 vScaledDir;
+flat varying vec2 vStartPoint;
+
+varying vec2 vPos;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+varying vec2 vLocalPos;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+struct Gradient {
+    vec4 start_end_point;
+    vec4 extend_mode;
+};
+
+Gradient fetch_gradient(int address) {
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    return Gradient(data[0], data[1]);
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithSize local_rect,
+    ivec3 user_data,
+    PictureTask pic_task
+) {
+    Gradient gradient = fetch_gradient(prim_address);
+
+    vPos = vi.local_pos - local_rect.p0;
+
+    vec2 start_point = gradient.start_end_point.xy;
+    vec2 end_point = gradient.start_end_point.zw;
+    vec2 dir = end_point - start_point;
+
+    vStartPoint = start_point;
+    vScaledDir = dir / dot(dir, dir);
+
+    vGradientAddress = user_data.x;
+
+    // Whether to repeat the gradient instead of clamping.
+    vGradientRepeat = float(int(gradient.extend_mode.x) != EXTEND_MODE_CLAMP);
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    vLocalPos = vi.local_pos;
+#endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec4 brush_fs() {
+    float offset = dot(vPos - vStartPoint, vScaledDir);
+
+    vec4 color = sample_gradient(vGradientAddress,
+                                 offset,
+                                 vGradientRepeat);
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= init_transform_fs(vLocalPos);
+#endif
+
+    return color;
+}
+#endif
--- a/gfx/webrender/res/brush_mix_blend.glsl
+++ b/gfx/webrender/res/brush_mix_blend.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define VECS_PER_SPECIFIC_BRUSH 5
+#define VECS_PER_SPECIFIC_BRUSH 1
 
 #include shared,prim_shared,brush
 
 varying vec3 vSrcUv;
 varying vec3 vBackdropUv;
 flat varying int vOp;
 
 #ifdef WR_VERTEX_SHADER
--- a/gfx/webrender/res/brush_picture.glsl
+++ b/gfx/webrender/res/brush_picture.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define VECS_PER_SPECIFIC_BRUSH 5
+#define VECS_PER_SPECIFIC_BRUSH 1
 
 #include shared,prim_shared,brush
 
 #ifdef WR_FEATURE_ALPHA_PASS
 varying vec2 vLocalPos;
 #endif
 
 varying vec3 vUv;
--- a/gfx/webrender/res/brush_yuv_image.glsl
+++ b/gfx/webrender/res/brush_yuv_image.glsl
@@ -36,16 +36,22 @@ varying vec2 vLocalPos;
 
     varying vec3 vUv_UV;
     flat varying vec4 vUvBounds_UV;
 #elif defined (WR_FEATURE_YUV_INTERLEAVED)
     varying vec3 vUv_YUV;
     flat varying vec4 vUvBounds_YUV;
 #endif
 
+#ifdef WR_FEATURE_TEXTURE_RECT
+    #define TEX_SIZE(sampler) vec2(1.0)
+#else
+    #define TEX_SIZE(sampler) vec2(textureSize(sampler, 0).xy)
+#endif
+
 #ifdef WR_VERTEX_SHADER
 void write_uv_rect(
     int resource_id,
     vec2 f,
     vec2 texture_size,
     out vec3 uv,
     out vec4 uv_bounds
 ) {
@@ -73,24 +79,24 @@ void brush_vs(
 ) {
     vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
 
 #ifdef WR_FEATURE_ALPHA_PASS
     vLocalPos = vi.local_pos;
 #endif
 
 #if defined (WR_FEATURE_YUV_PLANAR)
-    write_uv_rect(user_data.x, f, vec2(textureSize(sColor0, 0).xy), vUv_Y, vUvBounds_Y);
-    write_uv_rect(user_data.y, f, vec2(textureSize(sColor1, 0).xy), vUv_U, vUvBounds_U);
-    write_uv_rect(user_data.z, f, vec2(textureSize(sColor2, 0).xy), vUv_V, vUvBounds_V);
+    write_uv_rect(user_data.x, f, TEX_SIZE(sColor0), vUv_Y, vUvBounds_Y);
+    write_uv_rect(user_data.y, f, TEX_SIZE(sColor1), vUv_U, vUvBounds_U);
+    write_uv_rect(user_data.z, f, TEX_SIZE(sColor2), vUv_V, vUvBounds_V);
 #elif defined (WR_FEATURE_YUV_NV12)
-    write_uv_rect(user_data.x, f, vec2(textureSize(sColor0, 0).xy), vUv_Y, vUvBounds_Y);
-    write_uv_rect(user_data.y, f, vec2(textureSize(sColor1, 0).xy), vUv_UV, vUvBounds_UV);
+    write_uv_rect(user_data.x, f, TEX_SIZE(sColor0), vUv_Y, vUvBounds_Y);
+    write_uv_rect(user_data.y, f, TEX_SIZE(sColor1), vUv_UV, vUvBounds_UV);
 #elif defined (WR_FEATURE_YUV_INTERLEAVED)
-    write_uv_rect(user_data.x, f, vec2(textureSize(sColor0, 0).xy), vUv_YUV, vUvBounds_YUV);
+    write_uv_rect(user_data.x, f, TEX_SIZE(sColor0), vUv_YUV, vUvBounds_YUV);
 #endif //WR_FEATURE_YUV_*
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 #if !defined(WR_FEATURE_YUV_REC601) && !defined(WR_FEATURE_YUV_REC709)
 #define WR_FEATURE_YUV_REC601
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -11,16 +11,18 @@
 #define LINE_STYLE_DOTTED       1
 #define LINE_STYLE_DASHED       2
 #define LINE_STYLE_WAVY         3
 
 #define SUBPX_DIR_NONE        0
 #define SUBPX_DIR_HORIZONTAL  1
 #define SUBPX_DIR_VERTICAL    2
 
+#define EPSILON     0.0001
+
 uniform sampler2DArray sCacheA8;
 uniform sampler2DArray sCacheRGBA8;
 
 // An A8 target for standalone tasks that is available to all passes.
 uniform sampler2DArray sSharedCacheA8;
 
 uniform sampler2D sGradients;
 
@@ -69,17 +71,16 @@ vec4[2] fetch_from_resource_cache_2(int 
 
 #ifdef WR_VERTEX_SHADER
 
 #define VECS_PER_CLIP_SCROLL_NODE   5
 #define VECS_PER_LOCAL_CLIP_RECT    1
 #define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_HEADER        2
 #define VECS_PER_TEXT_RUN           3
-#define VECS_PER_GRADIENT           3
 #define VECS_PER_GRADIENT_STOP      2
 
 uniform HIGHP_SAMPLER_FLOAT sampler2D sClipScrollNodes;
 uniform HIGHP_SAMPLER_FLOAT sampler2D sLocalClipRects;
 uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
 
 // Instanced attributes
 in ivec4 aData0;
@@ -299,27 +300,16 @@ ClipArea fetch_clip_area(int index) {
 
         area.common_data = task_data.common_data;
         area.screen_origin = task_data.data1.xy;
     }
 
     return area;
 }
 
-struct Gradient {
-    vec4 start_end_point;
-    vec4 tile_size_repeat;
-    vec4 extend_mode;
-};
-
-Gradient fetch_gradient(int address) {
-    vec4 data[3] = fetch_from_resource_cache_3(address);
-    return Gradient(data[0], data[1], data[2]);
-}
-
 struct Glyph {
     vec2 offset;
 };
 
 Glyph fetch_glyph(int specific_prim_address,
                   int glyph_index,
                   int subpx_dir) {
     // Two glyphs are packed in each texel in the GPU cache.
@@ -743,43 +733,62 @@ void write_clip(vec2 global_pos, ClipAre
         area.common_data.task_rect.p0 + area.common_data.task_rect.size
     );
     vClipMaskUv = vec3(uv, area.common_data.texture_layer_index);
 }
 #endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 
-/// Find the appropriate half range to apply the AA smoothstep over.
+/// Find the appropriate half range to apply the AA approximation over.
 /// This range represents a coefficient to go from one CSS pixel to half a device pixel.
 float compute_aa_range(vec2 position) {
     // The constant factor is chosen to compensate for the fact that length(fw) is equal
     // to sqrt(2) times the device pixel ratio in the typical case. 0.5/sqrt(2) = 0.35355.
     //
     // This coefficient is chosen to ensure that any sample 0.5 pixels or more inside of
     // the shape has no anti-aliasing applied to it (since pixels are sampled at their center,
     // such a pixel (axis aligned) is fully inside the border). We need this so that antialiased
     // curves properly connect with non-antialiased vertical or horizontal lines, among other things.
     //
-    // Using larger aa steps is quite common when rendering shapes with distance fields.
-    // It gives a smoother (although blurrier look) by extending the range that is smoothed
-    // to produce the anti aliasing. In our case, however, extending the range inside of
-    // the shape causes noticeable artifacts at the junction between an antialiased corner
-    // and a straight edge.
+    // Lines over a half-pixel away from the pixel center *can* intersect with the pixel square;
+    // indeed, unless they are horizontal or vertical, they are guaranteed to. However, choosing
+    // a nonzero area for such pixels causes noticeable artifacts at the junction between an anti-
+    // aliased corner and a straight edge.
+    //
     // We may want to adjust this constant in specific scenarios (for example keep the principled
     // value for straight edges where we want pixel-perfect equivalence with non antialiased lines
     // when axis aligned, while selecting a larger and smoother aa range on curves).
     return 0.35355 * length(fwidth(position));
 }
 
-/// Return the blending coefficient to for distance antialiasing.
+/// Return the blending coefficient for distance antialiasing.
 ///
 /// 0.0 means inside the shape, 1.0 means outside.
+///
+/// This cubic polynomial approximates the area of a 1x1 pixel square under a
+/// line, given the signed Euclidean distance from the center of the square to
+/// that line. Calculating the *exact* area would require taking into account
+/// not only this distance but also the angle of the line. However, in
+/// practice, this complexity is not required, as the area is roughly the same
+/// regardless of the angle.
+///
+/// The coefficients of this polynomial were determined through least-squares
+/// regression and are accurate to within 2.16% of the total area of the pixel
+/// square 95% of the time, with a maximum error of 3.53%.
+///
+/// See the comments in `compute_aa_range()` for more information on the
+/// cutoff values of -0.5 and 0.5.
 float distance_aa(float aa_range, float signed_distance) {
-    return 1.0 - smoothstep(-aa_range, aa_range, signed_distance);
+    float dist = 0.5 * signed_distance / aa_range;
+    if (dist <= -0.5 + EPSILON)
+        return 1.0;
+    if (dist >= 0.5 - EPSILON)
+        return 0.0;
+    return 0.5 + dist * (0.8431027 * dist * dist - 1.14453603);
 }
 
 float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
     vec2 d = max(p0 - pos, pos - p1);
     return length(max(vec2(0.0), d)) + min(0.0, max(d.x, d.y));
 }
 
 float init_transform_fs(vec2 local_pos) {
deleted file mode 100644
--- a/gfx/webrender/res/ps_angle_gradient.glsl
+++ /dev/null
@@ -1,68 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include shared,prim_shared
-
-flat varying int vGradientAddress;
-flat varying float vGradientRepeat;
-
-flat varying vec2 vScaledDir;
-flat varying vec2 vStartPoint;
-
-flat varying vec2 vTileSize;
-flat varying vec2 vTileRepeat;
-
-varying vec2 vPos;
-
-#ifdef WR_VERTEX_SHADER
-void main(void) {
-    Primitive prim = load_primitive();
-    Gradient gradient = fetch_gradient(prim.specific_prim_address);
-
-    VertexInfo vi = write_vertex(prim.local_rect,
-                                 prim.local_clip_rect,
-                                 prim.z,
-                                 prim.scroll_node,
-                                 prim.task,
-                                 prim.local_rect);
-
-    vPos = vi.local_pos - prim.local_rect.p0;
-
-    vec2 start_point = gradient.start_end_point.xy;
-    vec2 end_point = gradient.start_end_point.zw;
-    vec2 dir = end_point - start_point;
-
-    vStartPoint = start_point;
-    vScaledDir = dir / dot(dir, dir);
-
-    vTileSize = gradient.tile_size_repeat.xy;
-    vTileRepeat = gradient.tile_size_repeat.zw;
-
-    vGradientAddress = prim.specific_prim_address + VECS_PER_GRADIENT;
-
-    // Whether to repeat the gradient instead of clamping.
-    vGradientRepeat = float(int(gradient.extend_mode.x) != EXTEND_MODE_CLAMP);
-
-    write_clip(vi.screen_pos, prim.clip_area);
-}
-#endif
-
-#ifdef WR_FRAGMENT_SHADER
-void main(void) {
-    vec2 pos = mod(vPos, vTileRepeat);
-
-    if (pos.x >= vTileSize.x ||
-        pos.y >= vTileSize.y) {
-        discard;
-    }
-
-    float offset = dot(pos - vStartPoint, vScaledDir);
-
-    vec4 color = sample_gradient(vGradientAddress,
-                                 offset,
-                                 vGradientRepeat);
-
-    oFragColor = color * do_clip();
-}
-#endif
deleted file mode 100644
--- a/gfx/webrender/res/ps_gradient.glsl
+++ /dev/null
@@ -1,119 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include shared,prim_shared
-
-varying vec4 vColor;
-
-varying vec2 vLocalPos;
-
-#ifdef WR_VERTEX_SHADER
-struct GradientStop {
-    vec4 color;
-    vec4 offset;
-};
-
-GradientStop fetch_gradient_stop(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    return GradientStop(data[0], data[1]);
-}
-
-void main(void) {
-    Primitive prim = load_primitive();
-    Gradient gradient = fetch_gradient(prim.specific_prim_address);
-
-    vec4 abs_start_end_point = gradient.start_end_point + prim.local_rect.p0.xyxy;
-
-    int stop_address = prim.specific_prim_address +
-                       VECS_PER_GRADIENT +
-                       VECS_PER_GRADIENT_STOP * prim.user_data0;
-
-    GradientStop g0 = fetch_gradient_stop(stop_address);
-    GradientStop g1 = fetch_gradient_stop(stop_address + VECS_PER_GRADIENT_STOP);
-
-    RectWithSize segment_rect;
-    vec2 axis;
-    vec4 adjusted_color_g0 = g0.color;
-    vec4 adjusted_color_g1 = g1.color;
-    if (abs_start_end_point.y == abs_start_end_point.w) {
-        // Calculate the x coord of the gradient stops
-        vec2 g01_x = mix(abs_start_end_point.xx, abs_start_end_point.zz,
-                         vec2(g0.offset.x, g1.offset.x));
-
-        // The gradient stops might exceed the geometry rect so clamp them
-        vec2 g01_x_clamped = clamp(g01_x,
-                                   prim.local_rect.p0.xx,
-                                   prim.local_rect.p0.xx + prim.local_rect.size.xx);
-
-        // Calculate the segment rect using the clamped coords
-        segment_rect.p0 = vec2(g01_x_clamped.x, prim.local_rect.p0.y);
-        segment_rect.size = vec2(g01_x_clamped.y - g01_x_clamped.x, prim.local_rect.size.y);
-        axis = vec2(1.0, 0.0);
-
-        // Adjust the stop colors by how much they were clamped
-        vec2 adjusted_offset = (g01_x_clamped - g01_x.xx) / (g01_x.y - g01_x.x);
-        adjusted_color_g0 = mix(g0.color, g1.color, adjusted_offset.x);
-        adjusted_color_g1 = mix(g0.color, g1.color, adjusted_offset.y);
-    } else {
-        // Calculate the y coord of the gradient stops
-        vec2 g01_y = mix(abs_start_end_point.yy, abs_start_end_point.ww,
-                         vec2(g0.offset.x, g1.offset.x));
-
-        // The gradient stops might exceed the geometry rect so clamp them
-        vec2 g01_y_clamped = clamp(g01_y,
-                                   prim.local_rect.p0.yy,
-                                   prim.local_rect.p0.yy + prim.local_rect.size.yy);
-
-        // Calculate the segment rect using the clamped coords
-        segment_rect.p0 = vec2(prim.local_rect.p0.x, g01_y_clamped.x);
-        segment_rect.size = vec2(prim.local_rect.size.x, g01_y_clamped.y - g01_y_clamped.x);
-        axis = vec2(0.0, 1.0);
-
-        // Adjust the stop colors by how much they were clamped
-        vec2 adjusted_offset = (g01_y_clamped - g01_y.xx) / (g01_y.y - g01_y.x);
-        adjusted_color_g0 = mix(g0.color, g1.color, adjusted_offset.x);
-        adjusted_color_g1 = mix(g0.color, g1.color, adjusted_offset.y);
-    }
-
-#ifdef WR_FEATURE_TRANSFORM
-    VertexInfo vi = write_transform_vertex(segment_rect,
-                                           prim.local_rect,
-                                           prim.local_clip_rect,
-                                           vec4(1.0),
-                                           prim.z,
-                                           prim.scroll_node,
-                                           prim.task,
-                                           true);
-    vLocalPos = vi.local_pos;
-    vec2 f = (vi.local_pos.xy - prim.local_rect.p0) / prim.local_rect.size;
-#else
-    VertexInfo vi = write_vertex(segment_rect,
-                                 prim.local_clip_rect,
-                                 prim.z,
-                                 prim.scroll_node,
-                                 prim.task,
-                                 prim.local_rect);
-
-    vec2 f = (vi.local_pos - segment_rect.p0) / segment_rect.size;
-    vLocalPos = vi.local_pos;
-#endif
-
-    write_clip(vi.screen_pos, prim.clip_area);
-
-    vColor = mix(adjusted_color_g0, adjusted_color_g1, dot(f, axis));
-}
-#endif
-
-#ifdef WR_FRAGMENT_SHADER
-void main(void) {
-#ifdef WR_FEATURE_TRANSFORM
-    float alpha = init_transform_fs(vLocalPos);
-#else
-    float alpha = 1.0;
-#endif
-
-    alpha *= do_clip();
-    oFragColor = dither(vColor * alpha);
-}
-#endif
--- a/gfx/webrender/res/shared.glsl
+++ b/gfx/webrender/res/shared.glsl
@@ -9,26 +9,19 @@
 #endif
 
 #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
 #extension GL_ARB_explicit_attrib_location : require
 #endif
 
 #include base
 
-// The textureLod() doesn't support samplerExternalOES for WR_FEATURE_TEXTURE_EXTERNAL.
-// https://www.khronos.org/registry/OpenGL/extensions/OES/OES_EGL_image_external_essl3.txt
-//
-// The textureLod() doesn't support sampler2DRect for WR_FEATURE_TEXTURE_RECT, too.
-//
-// Use texture() instead.
 #if defined(WR_FEATURE_TEXTURE_EXTERNAL) || defined(WR_FEATURE_TEXTURE_RECT) || defined(WR_FEATURE_TEXTURE_2D)
 #define TEX_SAMPLE(sampler, tex_coord) texture(sampler, tex_coord.xy)
 #else
-// In normal case, we use textureLod(). We haven't used the lod yet. So, we always pass 0.0 now.
 #define TEX_SAMPLE(sampler, tex_coord) texture(sampler, tex_coord)
 #endif
 
 //======================================================================================
 // Vertex shader attributes and uniforms
 //======================================================================================
 #ifdef WR_VERTEX_SHADER
     // A generic uniform that shaders can optionally use to configure
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -13,17 +13,17 @@ use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushImageKind, BrushInstance, ClipChainRectIndex};
 use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 use picture::{ContentOrigin, PictureCompositeMode, PictureKind, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Polygon, Splitter};
-use prim_store::{ImageSource, PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
+use prim_store::{CachedGradient, ImageSource, PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushPrimitive, BrushKind, DeferredResolve, EdgeAaSegmentMask, PrimitiveRun};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind};
 use renderer::BLOCKS_PER_UV_RECT;
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache};
 use std::{usize, f32, i32};
 use tiling::{RenderTargetContext, RenderTargetKind};
 use util::{MatrixHelpers, TransformedRectKind};
@@ -33,18 +33,16 @@ use util::{MatrixHelpers, TransformedRec
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(0x7fff);
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum TransformBatchKind {
     TextRun(GlyphFormat),
     Image(ImageBufferKind),
-    AlignedGradient,
-    AngleGradient,
     BorderCorner,
     BorderEdge,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BrushImageSourceKind {
@@ -73,16 +71,17 @@ pub enum BrushBatchKind {
     Blend,
     MixBlend {
         task_id: RenderTaskId,
         source_id: RenderTaskId,
         backdrop_id: RenderTaskId,
     },
     YuvImage(ImageBufferKind, YuvFormat, YuvColorSpace),
     RadialGradient,
+    LinearGradient,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BatchKind {
     HardwareComposite,
     SplitComposite,
@@ -677,16 +676,17 @@ impl AlphaBatchBuilder {
 
         match prim_metadata.prim_kind {
             PrimitiveKind::Brush => {
                 let brush = &ctx.prim_store.cpu_brushes[prim_metadata.cpu_prim_index.0];
                 if let Some((batch_kind, textures, user_data)) = brush.get_batch_params(
                     ctx.resource_cache,
                     gpu_cache,
                     deferred_resolves,
+                    &ctx.cached_gradients,
                 ) {
                     self.add_brush_to_batch(
                         brush,
                         prim_metadata,
                         batch_kind,
                         specified_blend_mode,
                         non_segmented_blend_mode,
                         textures,
@@ -806,17 +806,17 @@ impl AlphaBatchBuilder {
                     PictureKind::BoxShadow { .. } | PictureKind::Image { .. } => false,
                 };
 
                 // TODO(gw): It probably makes sense to base this decision on the content
                 //           origin field in the future (once that's configurable).
                 let font_transform = if is_shadow {
                     None
                 } else {
-                    Some(&scroll_node.transform)
+                    Some(scroll_node.transform)
                 };
 
                 let font = text_cpu.get_font(
                     ctx.device_pixel_scale,
                     font_transform,
                 );
 
                 let glyph_fetch_buffer = &mut self.glyph_fetch_buffer;
@@ -964,26 +964,29 @@ impl AlphaBatchBuilder {
                                 panic!("BUG: should be handled as a texture cache surface");
                             }
                             PictureKind::Image {
                                 composite_mode,
                                 secondary_render_task_id,
                                 is_in_3d_context,
                                 reference_frame_id,
                                 real_local_rect,
+                                ref extra_gpu_data_handle,
                                 ..
                             } => {
                                 // If this picture is participating in a 3D rendering context,
                                 // then don't add it to any batches here. Instead, create a polygon
                                 // for it and add it to the current plane splitter.
                                 if is_in_3d_context {
                                     // Push into parent plane splitter.
 
-                                    let real_xf = &ctx.clip_scroll_tree.nodes[&reference_frame_id].world_content_transform;
-
+                                    let real_xf = &ctx.clip_scroll_tree
+                                        .nodes[&reference_frame_id]
+                                        .world_content_transform
+                                        .into();
                                     let polygon = make_polygon(
                                         real_local_rect,
                                         &real_xf,
                                         prim_index.0,
                                     );
 
                                     splitter.add(polygon);
 
@@ -1088,44 +1091,46 @@ impl AlphaBatchBuilder {
                                             }
                                             _ => {
                                                 let key = BatchKey::new(
                                                     BatchKind::Brush(BrushBatchKind::Blend),
                                                     BlendMode::PremultipliedAlpha,
                                                     BatchTextures::render_target_cache(),
                                                 );
 
-                                                let filter_mode = match filter {
-                                                    FilterOp::Blur(..) => 0,
-                                                    FilterOp::Contrast(..) => 1,
-                                                    FilterOp::Grayscale(..) => 2,
-                                                    FilterOp::HueRotate(..) => 3,
-                                                    FilterOp::Invert(..) => 4,
-                                                    FilterOp::Saturate(..) => 5,
-                                                    FilterOp::Sepia(..) => 6,
-                                                    FilterOp::Brightness(..) => 7,
-                                                    FilterOp::Opacity(..) => 8,
-                                                    FilterOp::DropShadow(..) => 9,
-                                                    FilterOp::ColorMatrix(..) => 10,
+                                                let (filter_mode, extra_cache_address) = match filter {
+                                                    FilterOp::Blur(..) => (0, 0),
+                                                    FilterOp::Contrast(..) => (1, 0),
+                                                    FilterOp::Grayscale(..) => (2, 0),
+                                                    FilterOp::HueRotate(..) => (3, 0),
+                                                    FilterOp::Invert(..) => (4, 0),
+                                                    FilterOp::Saturate(..) => (5, 0),
+                                                    FilterOp::Sepia(..) => (6, 0),
+                                                    FilterOp::Brightness(..) => (7, 0),
+                                                    FilterOp::Opacity(..) => (8, 0),
+                                                    FilterOp::DropShadow(..) => (9, 0),
+                                                    FilterOp::ColorMatrix(..) => {
+                                                        (10, extra_gpu_data_handle.as_int(gpu_cache))
+                                                    }
                                                 };
 
                                                 let instance = BrushInstance {
                                                     picture_address: task_address,
                                                     prim_address: prim_cache_address,
                                                     clip_chain_rect_index,
                                                     scroll_id,
                                                     clip_task_address,
                                                     z,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
                                                     user_data: [
                                                         cache_task_address.0 as i32,
                                                         filter_mode,
-                                                        0,
+                                                        extra_cache_address,
                                                     ],
                                                 };
 
                                                 let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
                                                 batch.push(PrimitiveInstance::from(instance));
                                             }
                                         }
                                     }
@@ -1201,38 +1206,16 @@ impl AlphaBatchBuilder {
                             ctx,
                             gpu_cache,
                             render_tasks,
                             deferred_resolves,
                         );
                     }
                 }
             }
-            PrimitiveKind::AlignedGradient => {
-                let gradient_cpu =
-                    &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
-                let kind = BatchKind::Transformable(
-                    transform_kind,
-                    TransformBatchKind::AlignedGradient,
-                );
-                let key = BatchKey::new(kind, non_segmented_blend_mode, no_textures);
-                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                for part_index in 0 .. (gradient_cpu.stops_count - 1) {
-                    batch.push(base_instance.build(part_index as i32, 0, 0));
-                }
-            }
-            PrimitiveKind::AngleGradient => {
-                let kind = BatchKind::Transformable(
-                    transform_kind,
-                    TransformBatchKind::AngleGradient,
-                );
-                let key = BatchKey::new(kind, non_segmented_blend_mode, no_textures);
-                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                batch.push(base_instance.build(0, 0, 0));
-            }
         }
     }
 
     fn add_brush_to_batch(
         &mut self,
         brush: &BrushPrimitive,
         prim_metadata: &PrimitiveMetadata,
         batch_kind: BrushBatchKind,
@@ -1325,16 +1308,17 @@ impl AlphaBatchBuilder {
 }
 
 impl BrushPrimitive {
     fn get_batch_params(
         &self,
         resource_cache: &ResourceCache,
         gpu_cache: &mut GpuCache,
         deferred_resolves: &mut Vec<DeferredResolve>,
+        cached_gradients: &[CachedGradient],
     ) -> Option<(BrushBatchKind, BatchTextures, [i32; 3])> {
         match self.kind {
             BrushKind::Line { .. } => {
                 Some((
                     BrushBatchKind::Line,
                     BatchTextures::no_texture(),
                     [0; 3],
                 ))
@@ -1371,27 +1355,40 @@ impl BrushPrimitive {
             }
             BrushKind::Clear => {
                 Some((
                     BrushBatchKind::Solid,
                     BatchTextures::no_texture(),
                     [0; 3],
                 ))
             }
-            BrushKind::RadialGradient { ref stops_handle, .. } => {
+            BrushKind::RadialGradient { gradient_index, .. } => {
+                let stops_handle = &cached_gradients[gradient_index.0].handle;
                 Some((
                     BrushBatchKind::RadialGradient,
                     BatchTextures::no_texture(),
                     [
                         stops_handle.as_int(gpu_cache),
                         0,
                         0,
                     ],
                 ))
             }
+            BrushKind::LinearGradient { gradient_index, .. } => {
+                let stops_handle = &cached_gradients[gradient_index.0].handle;
+                Some((
+                    BrushBatchKind::LinearGradient,
+                    BatchTextures::no_texture(),
+                    [
+                        stops_handle.as_int(gpu_cache),
+                        0,
+                        0,
+                    ],
+                ))
+            }
             BrushKind::YuvImage { format, yuv_key, image_rendering, color_space } => {
                 let mut textures = BatchTextures::no_texture();
                 let mut uv_rect_addresses = [0; 3];
 
                 //yuv channel
                 let channel_count = format.get_plane_num();
                 debug_assert!(channel_count <= 3);
                 for channel in 0 .. channel_count {
@@ -1459,18 +1456,16 @@ impl AlphaBatchHelpers for PrimitiveStor
     fn get_blend_mode(&self, metadata: &PrimitiveMetadata) -> BlendMode {
         match metadata.prim_kind {
             // Can only resolve the TextRun's blend mode once glyphs are fetched.
             PrimitiveKind::TextRun => {
                 BlendMode::PremultipliedAlpha
             }
 
             PrimitiveKind::Border |
-            PrimitiveKind::AlignedGradient |
-            PrimitiveKind::AngleGradient |
             PrimitiveKind::Picture => {
                 BlendMode::PremultipliedAlpha
             }
 
             PrimitiveKind::Brush => {
                 let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
                 match brush.kind {
                     BrushKind::Clear => {
@@ -1482,16 +1477,17 @@ impl AlphaBatchHelpers for PrimitiveStor
                             AlphaType::Alpha => BlendMode::Alpha,
                         }
                     }
                     BrushKind::Solid { .. } |
                     BrushKind::Mask { .. } |
                     BrushKind::Line { .. } |
                     BrushKind::YuvImage { .. } |
                     BrushKind::RadialGradient { .. } |
+                    BrushKind::LinearGradient { .. } |
                     BrushKind::Picture => {
                         BlendMode::PremultipliedAlpha
                     }
                 }
             }
             PrimitiveKind::Image => {
                 let image_cpu = &self.cpu_images[metadata.cpu_prim_index.0];
                 match image_cpu.alpha_type {
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -1,24 +1,24 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ClipMode, ComplexClipRegion, DeviceIntRect, DevicePixelScale, ImageMask};
-use api::{ImageRendering, LayerRect, LayerToWorldTransform, LayoutPoint, LayoutVector2D};
-use api::LocalClip;
+use api::{ImageRendering, LayerRect, LayoutPoint, LayoutVector2D, LocalClip};
 use border::{BorderCornerClipSource, ensure_no_corner_overlap};
 use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId};
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use gpu_types::ClipScrollNodeIndex;
 use prim_store::{ClipData, ImageMaskData};
 use resource_cache::{ImageRequest, ResourceCache};
-use util::{MaxRect, MatrixHelpers, calculate_screen_bounding_rect, extract_inner_rect_safe};
+use util::{LayerToWorldFastTransform, MaxRect, calculate_screen_bounding_rect};
+use util::extract_inner_rect_safe;
 use std::rc::Rc;
 
 pub type ClipStore = FreeList<ClipSources>;
 pub type ClipSourcesHandle = FreeListHandle<ClipSources>;
 pub type ClipSourcesWeakHandle = WeakFreeListHandle<ClipSources>;
 
 #[derive(Clone, Debug)]
 pub struct ClipRegion {
@@ -247,17 +247,17 @@ impl ClipSources {
 
     /// Whether or not this ClipSources has any clips (does any clipping).
     pub fn has_clips(&self) -> bool {
         !self.clips.is_empty()
     }
 
     pub fn get_screen_bounds(
         &self,
-        transform: &LayerToWorldTransform,
+        transform: &LayerToWorldFastTransform,
         device_pixel_scale: DevicePixelScale,
     ) -> (DeviceIntRect, Option<DeviceIntRect>) {
         // If this translation isn't axis aligned or has a perspective component, don't try to
         // calculate the inner rectangle. The rectangle that we produce would include potentially
         // clipped screen area.
         // TODO(mrobinson): We should eventually try to calculate an inner region or some inner
         // rectangle so that we can do screen inner rectangle optimizations for these kind of
         // cilps.
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -1,26 +1,27 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DevicePixelScale, ExternalScrollId, LayerPixel, LayerPoint, LayerRect};
-use api::{LayerSize, LayerToWorldTransform, LayerTransform, LayerVector2D, LayoutTransform};
-use api::{LayoutVector2D, PipelineId, PropertyBinding, ScrollClamping, ScrollEventPhase};
-use api::{ScrollLocation, ScrollSensitivity, StickyOffsetBounds, WorldPoint};
+use api::{LayerSize, LayerVector2D, LayoutTransform, LayoutVector2D, PipelineId, PropertyBinding};
+use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity, StickyOffsetBounds};
+use api::WorldPoint;
 use clip::{ClipChain, ClipSourcesHandle, ClipStore, ClipWorkItem};
 use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId, TransformUpdateState};
 use euclid::SideOffsets2D;
 use geometry::ray_intersects_rect;
 use gpu_cache::GpuCache;
 use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
 use spring::{DAMPING, STIFFNESS, Spring};
-use util::{MatrixHelpers, TransformOrOffset, TransformedRectKind};
+use util::{LayerToWorldFastTransform, LayerFastTransform, LayoutFastTransform};
+use util::{TransformedRectKind};
 
 #[cfg(target_os = "macos")]
 const CAN_OVERSCROLL: bool = true;
 
 #[cfg(not(target_os = "macos"))]
 const CAN_OVERSCROLL: bool = false;
 
 #[derive(Debug)]
@@ -86,20 +87,20 @@ pub struct ClipScrollNode {
     /// Viewing rectangle in the coordinate system of the parent reference frame.
     pub local_viewport_rect: LayerRect,
 
     /// The transformation for this viewport in world coordinates is the transformation for
     /// our parent reference frame, plus any accumulated scrolling offsets from nodes
     /// between our reference frame and this node. For reference frames, we also include
     /// whatever local transformation this reference frame provides. This can be combined
     /// with the local_viewport_rect to get its position in world space.
-    pub world_viewport_transform: LayerToWorldTransform,
+    pub world_viewport_transform: LayerToWorldFastTransform,
 
     /// World transform for content transformed by this node.
-    pub world_content_transform: LayerToWorldTransform,
+    pub world_content_transform: LayerToWorldFastTransform,
 
     /// Pipeline that this layer belongs to
     pub pipeline_id: PipelineId,
 
     /// Parent layer. If this is None, we are the root node.
     pub parent: Option<ClipId>,
 
     /// Child layers
@@ -114,41 +115,41 @@ pub struct ClipScrollNode {
     pub invertible: bool,
 
     /// The axis-aligned coordinate system id of this node.
     pub coordinate_system_id: CoordinateSystemId,
 
     /// The transformation from the coordinate system which established our compatible coordinate
     /// system (same coordinate system id) and us. This can change via scroll offsets and via new
     /// reference frame transforms.
-    pub coordinate_system_relative_transform: TransformOrOffset,
+    pub coordinate_system_relative_transform: LayerFastTransform,
 
     /// A linear ID / index of this clip-scroll node. Used as a reference to
     /// pass to shaders, to allow them to fetch a given clip-scroll node.
     pub node_data_index: ClipScrollNodeIndex,
 }
 
 impl ClipScrollNode {
     pub fn new(
         pipeline_id: PipelineId,
         parent_id: Option<ClipId>,
         rect: &LayerRect,
         node_type: NodeType
     ) -> Self {
         ClipScrollNode {
             local_viewport_rect: *rect,
-            world_viewport_transform: LayerToWorldTransform::identity(),
-            world_content_transform: LayerToWorldTransform::identity(),
+            world_viewport_transform: LayerToWorldFastTransform::identity(),
+            world_content_transform: LayerToWorldFastTransform::identity(),
             parent: parent_id,
             children: Vec::new(),
             pipeline_id,
             node_type: node_type,
             invertible: true,
             coordinate_system_id: CoordinateSystemId(0),
-            coordinate_system_relative_transform: TransformOrOffset::zero(),
+            coordinate_system_relative_transform: LayerFastTransform::identity(),
             node_data_index: ClipScrollNodeIndex(0),
         }
     }
 
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_id: ClipId,
         external_id: Option<ExternalScrollId>,
@@ -172,20 +173,22 @@ impl ClipScrollNode {
         parent_id: Option<ClipId>,
         frame_rect: &LayerRect,
         source_transform: Option<PropertyBinding<LayoutTransform>>,
         source_perspective: Option<LayoutTransform>,
         origin_in_parent_reference_frame: LayerVector2D,
         pipeline_id: PipelineId,
     ) -> Self {
         let identity = LayoutTransform::identity();
+        let source_perspective = source_perspective.map_or_else(
+            LayoutFastTransform::identity, |perspective| perspective.into());
         let info = ReferenceFrameInfo {
-            resolved_transform: LayerTransform::identity(),
+            resolved_transform: LayerFastTransform::identity(),
             source_transform: source_transform.unwrap_or(PropertyBinding::Value(identity)),
-            source_perspective: source_perspective.unwrap_or(identity),
+            source_perspective: source_perspective,
             origin_in_parent_reference_frame,
             invertible: true,
         };
         Self::new(pipeline_id, parent_id, frame_rect, NodeType::ReferenceFrame(info))
     }
 
     pub fn new_sticky_frame(
         parent_id: ClipId,
@@ -253,33 +256,33 @@ impl ClipScrollNode {
         scrolling.offset = new_offset;
         scrolling.bouncing_back = false;
         scrolling.started_bouncing_back = false;
         true
     }
 
     pub fn mark_uninvertible(&mut self) {
         self.invertible = false;
-        self.world_content_transform = LayerToWorldTransform::identity();
-        self.world_viewport_transform = LayerToWorldTransform::identity();
+        self.world_content_transform = LayerToWorldFastTransform::identity();
+        self.world_viewport_transform = LayerToWorldFastTransform::identity();
     }
 
     pub fn push_gpu_node_data(&mut self, node_data: &mut Vec<ClipScrollNodeData>) {
         if !self.invertible {
             node_data.push(ClipScrollNodeData::invalid());
             return;
         }
 
         let transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
             TransformedRectKind::AxisAligned
         } else {
             TransformedRectKind::Complex
         };
         let data = ClipScrollNodeData {
-            transform: self.world_content_transform,
+            transform: self.world_content_transform.into(),
             transform_kind: transform_kind as u32 as f32,
             padding: [0.0; 3],
         };
 
         // Write the data that will be made available to the GPU for this node.
         node_data.push(data);
     }
 
@@ -298,19 +301,19 @@ impl ClipScrollNode {
         // quit here.
         if !state.invertible {
             self.mark_uninvertible();
             return;
         }
 
         self.update_transform(state, next_coordinate_system_id, scene_properties);
 
-        // If this node is a reference frame, we check if the determinant is 0, which means it
-        // has a non-invertible matrix. For non-reference-frames we assume that they will
-        // produce only additional translations which should be invertible.
+        // If this node is a reference frame, we check if it has a non-invertible matrix.
+        // For non-reference-frames we assume that they will produce only additional
+        // translations which should be invertible.
         match self.node_type {
             NodeType::ReferenceFrame(info) if !info.invertible => {
                 self.mark_uninvertible();
                 return;
             }
             _ => self.invertible = true,
         }
 
@@ -357,17 +360,17 @@ impl ClipScrollNode {
         let work_item = ClipWorkItem {
             scroll_node_data_index: self.node_data_index,
             clip_sources: clip_sources_handle.weak(),
             coordinate_system_id: state.current_coordinate_system_id,
         };
 
         let mut clip_chain = clip_chains[state.parent_clip_chain_index.0].new_with_added_node(
             work_item,
-            self.coordinate_system_relative_transform.apply(&local_outer_rect),
+            self.coordinate_system_relative_transform.transform_rect(&local_outer_rect),
             screen_outer_rect,
             screen_inner_rect,
         );
 
         clip_chain.parent_index = Some(state.parent_clip_chain_index);
         clip_chains[clip_chain_index.0] = clip_chain;
         state.parent_clip_chain_index = clip_chain_index;
     }
@@ -393,26 +396,26 @@ impl ClipScrollNode {
             &state.nearest_scrolling_ancestor_viewport,
         );
 
         // The transformation for the bounds of our viewport is the parent reference frame
         // transform, plus any accumulated scroll offset from our parents, plus any offset
         // provided by our own sticky positioning.
         let accumulated_offset = state.parent_accumulated_scroll_offset + sticky_offset;
         self.world_viewport_transform = if accumulated_offset != LayerVector2D::zero() {
-            state.parent_reference_frame_transform.pre_translate(accumulated_offset.to_3d())
+            state.parent_reference_frame_transform.pre_translate(&accumulated_offset)
         } else {
             state.parent_reference_frame_transform
         };
 
         // The transformation for any content inside of us is the viewport transformation, plus
         // whatever scrolling offset we supply as well.
         let scroll_offset = self.scroll_offset();
         self.world_content_transform = if scroll_offset != LayerVector2D::zero() {
-            self.world_viewport_transform.pre_translate(scroll_offset.to_3d())
+            self.world_viewport_transform.pre_translate(&scroll_offset)
         } else {
             self.world_viewport_transform
         };
 
         let added_offset = state.parent_accumulated_scroll_offset + sticky_offset + scroll_offset;
         self.coordinate_system_relative_transform =
             state.coordinate_system_relative_transform.offset(added_offset);
 
@@ -432,45 +435,45 @@ impl ClipScrollNode {
     ) {
         let info = match self.node_type {
             NodeType::ReferenceFrame(ref mut info) => info,
             _ => unreachable!("Called update_transform_for_reference_frame on non-ReferenceFrame"),
         };
 
         // Resolve the transform against any property bindings.
         let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
-        info.resolved_transform = LayerTransform::create_translation(
-            info.origin_in_parent_reference_frame.x,
-            info.origin_in_parent_reference_frame.y,
-            0.0
-        ).pre_mul(&source_transform)
-         .pre_mul(&info.source_perspective);
+        info.resolved_transform =
+            LayerFastTransform::with_vector(info.origin_in_parent_reference_frame)
+            .pre_mul(&source_transform.into())
+            .pre_mul(&info.source_perspective);
 
         // The transformation for this viewport in world coordinates is the transformation for
         // our parent reference frame, plus any accumulated scrolling offsets from nodes
         // between our reference frame and this node. Finally, we also include
         // whatever local transformation this reference frame provides. This can be combined
         // with the local_viewport_rect to get its position in world space.
         let relative_transform = info.resolved_transform
-            .post_translate(state.parent_accumulated_scroll_offset.to_3d());
-        self.world_viewport_transform = state.parent_reference_frame_transform
-            .pre_mul(&relative_transform.with_destination::<LayerPixel>());
+            .post_translate(state.parent_accumulated_scroll_offset)
+            .to_transform()
+            .with_destination::<LayerPixel>();
+        self.world_viewport_transform =
+            state.parent_reference_frame_transform.pre_mul(&relative_transform.into());
         self.world_content_transform = self.world_viewport_transform;
 
-        info.invertible = relative_transform.determinant() != 0.0;
+        info.invertible = self.world_viewport_transform.is_invertible();
         if !info.invertible {
             return;
         }
 
         // Try to update our compatible coordinate system transform. If we cannot, start a new
         // incompatible coordinate system.
         match state.coordinate_system_relative_transform.update(relative_transform) {
             Some(offset) => self.coordinate_system_relative_transform = offset,
             None => {
-                self.coordinate_system_relative_transform = TransformOrOffset::zero();
+                self.coordinate_system_relative_transform = LayerFastTransform::identity();
                 state.current_coordinate_system_id = *next_coordinate_system_id;
                 next_coordinate_system_id.advance();
             }
         }
 
         self.coordinate_system_id = state.current_coordinate_system_id;
     }
 
@@ -839,24 +842,24 @@ impl ScrollFrameInfo {
     }
 }
 
 /// Contains information about reference frames.
 #[derive(Copy, Clone, Debug)]
 pub struct ReferenceFrameInfo {
     /// The transformation that establishes this reference frame, relative to the parent
     /// reference frame. The origin of the reference frame is included in the transformation.
-    pub resolved_transform: LayerTransform,
+    pub resolved_transform: LayerFastTransform,
 
     /// The source transform and perspective matrices provided by the stacking context
     /// that forms this reference frame. We maintain the property binding information
     /// here so that we can resolve the animated transform and update the tree each
     /// frame.
     pub source_transform: PropertyBinding<LayoutTransform>,
-    pub source_perspective: LayoutTransform,
+    pub source_perspective: LayoutFastTransform,
 
     /// The original, not including the transform and relative to the parent reference frame,
     /// origin of this reference frame. This is already rolled into the `transform' property, but
     /// we also store it here to properly transform the viewport for sticky positioning.
     pub origin_in_parent_reference_frame: LayerVector2D,
 
     /// True if the resolved transform is invertible.
     pub invertible: bool,
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -1,24 +1,24 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntRect, DevicePixelScale, ExternalScrollId, LayerPoint, LayerRect};
-use api::{LayerToWorldTransform, LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase};
-use api::{ScrollLocation, ScrollNodeState, WorldPoint};
+use api::{LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLocation};
+use api::{ScrollNodeState, WorldPoint};
 use clip::{ClipChain, ClipSourcesHandle, ClipStore};
 use clip_scroll_node::{ClipScrollNode, NodeType, ScrollFrameInfo, StickyFrameInfo};
 use gpu_cache::GpuCache;
 use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
-use util::TransformOrOffset;
+use util::{LayerFastTransform, LayerToWorldFastTransform};
 
 pub type ScrollStates = FastHashMap<ExternalScrollId, ScrollFrameInfo>;
 
 /// An id that identifies coordinate systems in the ClipScrollTree. Each
 /// coordinate system has an id and those ids will be shared when the coordinates
 /// system are the same or are in the same axis-aligned space. This allows
 /// for optimizing mask generation.
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -83,32 +83,32 @@ pub struct ClipScrollTree {
 
     /// A set of pipelines which should be discarded the next time this
     /// tree is drained.
     pub pipelines_to_discard: FastHashSet<PipelineId>,
 }
 
 #[derive(Clone)]
 pub struct TransformUpdateState {
-    pub parent_reference_frame_transform: LayerToWorldTransform,
+    pub parent_reference_frame_transform: LayerToWorldFastTransform,
     pub parent_accumulated_scroll_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_viewport: LayerRect,
 
     /// The index of the current parent's clip chain.
     pub parent_clip_chain_index: ClipChainIndex,
 
     /// An id for keeping track of the axis-aligned space of this node. This is used in
     /// order to to track what kinds of clip optimizations can be done for a particular
     /// display list item, since optimizations can usually only be done among
     /// coordinate systems which are relatively axis aligned.
     pub current_coordinate_system_id: CoordinateSystemId,
 
     /// Transform from the coordinate system that started this compatible coordinate system.
-    pub coordinate_system_relative_transform: TransformOrOffset,
+    pub coordinate_system_relative_transform: LayerFastTransform,
 
     /// True if this node is transformed by an invertible transform.  If not, display items
     /// transformed by this node will not be displayed and display items not transformed by this
     /// node will not be clipped by clips that are transformed by this node.
     pub invertible: bool,
 }
 
 impl ClipScrollTree {
@@ -324,27 +324,23 @@ impl ClipScrollTree {
         if self.nodes.is_empty() {
             return;
         }
 
         self.clip_chains[0] = ClipChain::empty(screen_rect);
 
         let root_reference_frame_id = self.root_reference_frame_id();
         let mut state = TransformUpdateState {
-            parent_reference_frame_transform: LayerToWorldTransform::create_translation(
-                pan.x,
-                pan.y,
-                0.0,
-            ),
+            parent_reference_frame_transform: LayerVector2D::new(pan.x, pan.y).into(),
             parent_accumulated_scroll_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_viewport: LayerRect::zero(),
             parent_clip_chain_index: ClipChainIndex(0),
             current_coordinate_system_id: CoordinateSystemId::root(),
-            coordinate_system_relative_transform: TransformOrOffset::zero(),
+            coordinate_system_relative_transform: LayerFastTransform::identity(),
             invertible: true,
         };
         let mut next_coordinate_system_id = state.current_coordinate_system_id.next();
         self.update_node(
             root_reference_frame_id,
             &mut state,
             &mut next_coordinate_system_id,
             device_pixel_scale,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,46 +1,45 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderDetails, BorderDisplayItem, BuiltDisplayList, ClipId, ColorF};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePixelScale, DeviceUintPoint};
 use api::{DeviceUintRect, DeviceUintSize, DocumentLayer, Epoch, ExtendMode, ExternalScrollId};
 use api::{FontRenderMode, GlyphInstance, GlyphOptions, GradientStop, ImageKey, ImageRendering};
-use api::{ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect, LayerSize, LayerTransform};
-use api::{LayerVector2D, LayoutTransform, LayoutVector2D, LineOrientation, LineStyle, LocalClip};
-use api::{PipelineId, PremultipliedColorF, PropertyBinding, RepeatMode, ScrollSensitivity, Shadow};
-use api::{TexelRect, TileOffset, TransformStyle, WorldPoint, WorldToLayerTransform, YuvColorSpace};
-use api::YuvData;
+use api::{ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect, LayerSize, LayerVector2D};
+use api::{LayoutTransform, LayoutVector2D, LineOrientation, LineStyle, LocalClip, PipelineId};
+use api::{PremultipliedColorF, PropertyBinding, RepeatMode, ScrollSensitivity, Shadow, TexelRect};
+use api::{TileOffset, TransformStyle, WorldPoint, YuvColorSpace, YuvData};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipChain, ClipRegion, ClipSource, ClipSources, ClipStore};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::{ClipScrollTree, ClipChainIndex};
 use euclid::{SideOffsets2D, vec2};
 use frame::{FrameId, ClipIdToIndexMapper};
 use glyph_rasterizer::FontInstance;
-use gpu_cache::{GpuCache, GpuCacheHandle};
+use gpu_cache::GpuCache;
 use gpu_types::{ClipChainRectIndex, ClipScrollNodeData, PictureType};
 use hit_test::{HitTester, HitTestingItem, HitTestingRun};
 use internal_types::{FastHashMap, FastHashSet};
 use picture::{ContentOrigin, PictureCompositeMode, PictureKind, PicturePrimitive, PictureSurface};
-use prim_store::{BrushKind, BrushPrimitive, BrushSegmentDescriptor, GradientPrimitiveCpu};
+use prim_store::{BrushKind, BrushPrimitive, BrushSegmentDescriptor, CachedGradient, CachedGradientIndex};
 use prim_store::{ImageCacheKey, ImagePrimitiveCpu, ImageSource, PrimitiveContainer};
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveRun, PrimitiveStore};
 use prim_store::{ScrollNodeAndClipChain, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskLocation, RenderTaskTree};
 use resource_cache::{ImageRequest, ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, usize, f32};
 use tiling::{CompositeOps, Frame, RenderPass, RenderTargetKind};
 use tiling::{RenderPassKind, RenderTargetContext, ScrollbarPrimitive};
-use util::{self, MaxRect, pack_as_float, RectHelpers, recycle_vec};
+use util::{self, MaxRect, RectHelpers, WorldToLayerFastTransform, recycle_vec};
 
 #[derive(Debug)]
 pub struct ScrollbarInfo(pub ClipId, pub LayerRect);
 
 /// Properties of a stacking context that are maintained
 /// during creation of the scene. These structures are
 /// not persisted after the initial scene build.
 struct StackingContext {
@@ -81,16 +80,17 @@ pub struct FrameBuilderConfig {
 /// A builder structure for `tiling::Frame`
 pub struct FrameBuilder {
     screen_rect: DeviceUintRect,
     background_color: Option<ColorF>,
     prim_store: PrimitiveStore,
     pub clip_store: ClipStore,
     hit_testing_runs: Vec<HitTestingRun>,
     pub config: FrameBuilderConfig,
+    pub cached_gradients: Vec<CachedGradient>,
 
     // A stack of the current shadow primitives.
     // The sub-Vec stores a buffer of fast-path primitives to be appended on pop.
     shadow_prim_stack: Vec<(PrimitiveIndex, Vec<(PrimitiveIndex, ScrollNodeAndClipChain)>)>,
     // If we're doing any fast-path shadows, we buffer the "real"
     // content here, to be appended when the shadow stack is empty.
     pending_shadow_contents: Vec<(PrimitiveIndex, ScrollNodeAndClipChain, LayerPrimitiveInfo)>,
 
@@ -119,26 +119,27 @@ pub struct FrameContext<'a> {
 
 pub struct FrameState<'a> {
     pub render_tasks: &'a mut RenderTaskTree,
     pub profile_counters: &'a mut FrameProfileCounters,
     pub clip_store: &'a mut ClipStore,
     pub local_clip_rects: &'a mut Vec<LayerRect>,
     pub resource_cache: &'a mut ResourceCache,
     pub gpu_cache: &'a mut GpuCache,
+    pub cached_gradients: &'a mut [CachedGradient],
 }
 
 pub struct PictureContext<'a> {
     pub pipeline_id: PipelineId,
     pub perform_culling: bool,
     pub prim_runs: Vec<PrimitiveRun>,
     pub original_reference_frame_id: Option<ClipId>,
     pub display_list: &'a BuiltDisplayList,
     pub draw_text_transformed: bool,
-    pub inv_world_transform: Option<WorldToLayerTransform>,
+    pub inv_world_transform: Option<WorldToLayerFastTransform>,
 }
 
 pub struct PictureState {
     pub tasks: Vec<RenderTaskId>,
 }
 
 impl PictureState {
     pub fn new() -> PictureState {
@@ -168,16 +169,17 @@ impl<'a> PrimitiveRunContext<'a> {
     }
 }
 
 impl FrameBuilder {
     pub fn empty() -> Self {
         FrameBuilder {
             hit_testing_runs: Vec::new(),
             shadow_prim_stack: Vec::new(),
+            cached_gradients: Vec::new(),
             pending_shadow_contents: Vec::new(),
             scrollbar_prims: Vec::new(),
             reference_frame_stack: Vec::new(),
             picture_stack: Vec::new(),
             sc_stack: Vec::new(),
             prim_store: PrimitiveStore::new(),
             clip_store: ClipStore::new(),
             screen_rect: DeviceUintRect::zero(),
@@ -196,16 +198,17 @@ impl FrameBuilder {
         self,
         screen_rect: DeviceUintRect,
         background_color: Option<ColorF>,
         config: FrameBuilderConfig,
     ) -> Self {
         FrameBuilder {
             hit_testing_runs: recycle_vec(self.hit_testing_runs),
             shadow_prim_stack: recycle_vec(self.shadow_prim_stack),
+            cached_gradients: recycle_vec(self.cached_gradients),
             pending_shadow_contents: recycle_vec(self.pending_shadow_contents),
             scrollbar_prims: recycle_vec(self.scrollbar_prims),
             reference_frame_stack: recycle_vec(self.reference_frame_stack),
             picture_stack: recycle_vec(self.picture_stack),
             sc_stack: recycle_vec(self.sc_stack),
             prim_store: self.prim_store.recycle(),
             clip_store: self.clip_store.recycle(),
             screen_rect,
@@ -668,21 +671,18 @@ impl FrameBuilder {
         inner_rect: DeviceUintRect,
         device_pixel_scale: DevicePixelScale,
         clip_scroll_tree: &mut ClipScrollTree,
     ) {
         let viewport_offset = (inner_rect.origin.to_vector().to_f32() / device_pixel_scale).round();
         let root_id = clip_scroll_tree.root_reference_frame_id();
         if let Some(root_node) = clip_scroll_tree.nodes.get_mut(&root_id) {
             if let NodeType::ReferenceFrame(ref mut info) = root_node.node_type {
-                info.resolved_transform = LayerTransform::create_translation(
-                    viewport_offset.x,
-                    viewport_offset.y,
-                    0.0,
-                );
+                info.resolved_transform =
+                    LayerVector2D::new(viewport_offset.x, viewport_offset.y).into();
             }
         }
     }
 
     pub fn push_root(
         &mut self,
         pipeline_id: PipelineId,
         viewport_size: &LayerSize,
@@ -1231,108 +1231,134 @@ impl FrameBuilder {
                         segment.size,
                         LayerSize::zero(),
                     );
                 }
             }
         }
     }
 
+    fn add_gradient_impl(
+        &mut self,
+        clip_and_scroll: ScrollNodeAndClipChain,
+        info: &LayerPrimitiveInfo,
+        start_point: LayerPoint,
+        end_point: LayerPoint,
+        stops: ItemRange<GradientStop>,
+        stops_count: usize,
+        extend_mode: ExtendMode,
+        gradient_index: CachedGradientIndex,
+    ) {
+        // Try to ensure that if the gradient is specified in reverse, then so long as the stops
+        // are also supplied in reverse that the rendered result will be equivalent. To do this,
+        // a reference orientation for the gradient line must be chosen, somewhat arbitrarily, so
+        // just designate the reference orientation as start < end. Aligned gradient rendering
+        // manages to produce the same result regardless of orientation, so don't worry about
+        // reversing in that case.
+        let reverse_stops = start_point.x > end_point.x ||
+            (start_point.x == end_point.x && start_point.y > end_point.y);
+
+        // To get reftests exactly matching with reverse start/end
+        // points, it's necessary to reverse the gradient
+        // line in some cases.
+        let (sp, ep) = if reverse_stops {
+            (end_point, start_point)
+        } else {
+            (start_point, end_point)
+        };
+
+        let prim = BrushPrimitive::new(
+            BrushKind::LinearGradient {
+                stops_range: stops,
+                stops_count,
+                extend_mode,
+                reverse_stops,
+                start_point: sp,
+                end_point: ep,
+                gradient_index,
+            },
+            None,
+        );
+
+        let prim = PrimitiveContainer::Brush(prim);
+
+        self.add_primitive(clip_and_scroll, info, Vec::new(), prim);
+    }
+
     pub fn add_gradient(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayerPrimitiveInfo,
         start_point: LayerPoint,
         end_point: LayerPoint,
         stops: ItemRange<GradientStop>,
         stops_count: usize,
         extend_mode: ExtendMode,
         tile_size: LayerSize,
         tile_spacing: LayerSize,
     ) {
-        let tile_repeat = tile_size + tile_spacing;
-        let is_not_tiled = tile_repeat.width >= info.rect.size.width &&
-            tile_repeat.height >= info.rect.size.height;
-
-        let aligned_and_fills_rect = (start_point.x == end_point.x &&
-            start_point.y.min(end_point.y) <= 0.0 &&
-            start_point.y.max(end_point.y) >= info.rect.size.height) ||
-            (start_point.y == end_point.y && start_point.x.min(end_point.x) <= 0.0 &&
-                start_point.x.max(end_point.x) >= info.rect.size.width);
+        let gradient_index = CachedGradientIndex(self.cached_gradients.len());
+        self.cached_gradients.push(CachedGradient::new());
 
-        // Fast path for clamped, axis-aligned gradients, with gradient lines intersecting all of rect:
-        let aligned = extend_mode == ExtendMode::Clamp && is_not_tiled && aligned_and_fills_rect;
-
-        // Try to ensure that if the gradient is specified in reverse, then so long as the stops
-        // are also supplied in reverse that the rendered result will be equivalent. To do this,
-        // a reference orientation for the gradient line must be chosen, somewhat arbitrarily, so
-        // just designate the reference orientation as start < end. Aligned gradient rendering
-        // manages to produce the same result regardless of orientation, so don't worry about
-        // reversing in that case.
-        let reverse_stops = !aligned &&
-            (start_point.x > end_point.x ||
-                (start_point.x == end_point.x && start_point.y > end_point.y));
+        let prim_infos = info.decompose(
+            tile_size,
+            tile_spacing,
+            64 * 64,
+        );
 
-        // To get reftests exactly matching with reverse start/end
-        // points, it's necessary to reverse the gradient
-        // line in some cases.
-        let (sp, ep) = if reverse_stops {
-            (end_point, start_point)
+        if prim_infos.is_empty() {
+            self.add_gradient_impl(
+                clip_and_scroll,
+                info,
+                start_point,
+                end_point,
+                stops,
+                stops_count,
+                extend_mode,
+                gradient_index,
+            );
         } else {
-            (start_point, end_point)
-        };
-
-        let gradient_cpu = GradientPrimitiveCpu {
-            stops_range: stops,
-            stops_count,
-            extend_mode,
-            reverse_stops,
-            gpu_blocks: [
-                [sp.x, sp.y, ep.x, ep.y].into(),
-                [
-                    tile_size.width,
-                    tile_size.height,
-                    tile_repeat.width,
-                    tile_repeat.height,
-                ].into(),
-                [pack_as_float(extend_mode as u32), 0.0, 0.0, 0.0].into(),
-            ],
-        };
-
-        let prim = if aligned {
-            PrimitiveContainer::AlignedGradient(gradient_cpu)
-        } else {
-            PrimitiveContainer::AngleGradient(gradient_cpu)
-        };
-
-        self.add_primitive(clip_and_scroll, info, Vec::new(), prim);
+            for prim_info in prim_infos {
+                self.add_gradient_impl(
+                    clip_and_scroll,
+                    &prim_info,
+                    start_point,
+                    end_point,
+                    stops,
+                    stops_count,
+                    extend_mode,
+                    gradient_index,
+                );
+            }
+        }
     }
 
     fn add_radial_gradient_impl(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayerPrimitiveInfo,
         start_center: LayerPoint,
         start_radius: f32,
         end_center: LayerPoint,
         end_radius: f32,
         ratio_xy: f32,
         stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
+        gradient_index: CachedGradientIndex,
     ) {
         let prim = BrushPrimitive::new(
             BrushKind::RadialGradient {
                 stops_range: stops,
                 extend_mode,
-                stops_handle: GpuCacheHandle::new(),
                 start_center,
                 end_center,
                 start_radius,
                 end_radius,
                 ratio_xy,
+                gradient_index,
             },
             None,
         );
 
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
@@ -1349,16 +1375,19 @@ impl FrameBuilder {
         end_center: LayerPoint,
         end_radius: f32,
         ratio_xy: f32,
         stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         tile_size: LayerSize,
         tile_spacing: LayerSize,
     ) {
+        let gradient_index = CachedGradientIndex(self.cached_gradients.len());
+        self.cached_gradients.push(CachedGradient::new());
+
         let prim_infos = info.decompose(
             tile_size,
             tile_spacing,
             64 * 64,
         );
 
         if prim_infos.is_empty() {
             self.add_radial_gradient_impl(
@@ -1366,29 +1395,31 @@ impl FrameBuilder {
                 info,
                 start_center,
                 start_radius,
                 end_center,
                 end_radius,
                 ratio_xy,
                 stops,
                 extend_mode,
+                gradient_index,
             );
         } else {
             for prim_info in prim_infos {
                 self.add_radial_gradient_impl(
                     clip_and_scroll,
                     &prim_info,
                     start_center,
                     start_radius,
                     end_center,
                     end_radius,
                     ratio_xy,
                     stops,
                     extend_mode,
+                    gradient_index,
                 );
             }
         }
     }
 
     pub fn add_text(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
@@ -1698,16 +1729,17 @@ impl FrameBuilder {
 
         let mut frame_state = FrameState {
             render_tasks,
             profile_counters,
             clip_store: &mut self.clip_store,
             local_clip_rects,
             resource_cache,
             gpu_cache,
+            cached_gradients: &mut self.cached_gradients,
         };
 
         let pic_context = PictureContext {
             pipeline_id: root_clip_scroll_node.pipeline_id,
             perform_culling: true,
             prim_runs: mem::replace(&mut self.prim_store.cpu_pictures[0].runs, Vec::new()),
             original_reference_frame_id: None,
             display_list,
@@ -1866,16 +1898,17 @@ impl FrameBuilder {
         for pass in &mut passes {
             let ctx = RenderTargetContext {
                 device_pixel_scale,
                 prim_store: &self.prim_store,
                 resource_cache,
                 clip_scroll_tree,
                 use_dual_source_blending,
                 node_data: &node_data,
+                cached_gradients: &self.cached_gradients,
             };
 
             pass.build(
                 &ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
--- a/gfx/webrender/src/hit_test.rs
+++ b/gfx/webrender/src/hit_test.rs
@@ -1,34 +1,34 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ClipId, ClipMode, HitTestFlags, HitTestItem, HitTestResult, ItemTag};
-use api::{LayerPoint, LayerPrimitiveInfo, LayerRect, LayerToWorldTransform, LocalClip, PipelineId};
-use api::WorldPoint;
+use api::{LayerPoint, LayerPrimitiveInfo, LayerRect, LocalClip, PipelineId, WorldPoint};
 use clip::{ClipSource, ClipStore, Contains, rounded_rectangle_contains_point};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::{ClipChainIndex, ClipScrollTree};
 use internal_types::FastHashMap;
 use prim_store::ScrollNodeAndClipChain;
+use util::LayerToWorldFastTransform;
 
 /// A copy of important clip scroll node data to use during hit testing. This a copy of
 /// data from the ClipScrollTree that will persist as a new frame is under construction,
 /// allowing hit tests consistent with the currently rendered frame.
 pub struct HitTestClipScrollNode {
     /// A particular point must be inside all of these regions to be considered clipped in
     /// for the purposes of a hit test.
     regions: Vec<HitTestRegion>,
 
     /// World transform for content transformed by this node.
-    world_content_transform: LayerToWorldTransform,
+    world_content_transform: LayerToWorldFastTransform,
 
     /// World viewport transform for content transformed by this node.
-    world_viewport_transform: LayerToWorldTransform,
+    world_viewport_transform: LayerToWorldFastTransform,
 
     /// Origin of the viewport of the node, used to calculate node-relative positions.
     node_origin: LayerPoint,
 }
 
 /// A description of a clip chain in the HitTester. This is used to describe
 /// hierarchical clip scroll nodes as well as ClipChains, so that they can be
 /// handled the same way during hit testing. Once we represent all ClipChains
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -2,17 +2,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BoxShadowClipMode, ClipId, ColorF, DeviceIntPoint, DeviceIntRect, FilterOp, LayerPoint};
 use api::{LayerRect, LayerToWorldScale, LayerVector2D, MixBlendMode, PipelineId};
 use api::{PremultipliedColorF, Shadow};
 use box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowCacheKey};
 use frame_builder::{FrameContext, FrameState, PictureState};
-use gpu_cache::GpuDataRequest;
+use gpu_cache::{GpuCacheHandle, GpuDataRequest};
 use gpu_types::{BrushImageKind, PictureType};
 use prim_store::{BrushKind, BrushPrimitive, PrimitiveIndex, PrimitiveRun, PrimitiveRunLocalRect};
 use prim_store::ScrollNodeAndClipChain;
 use render_task::{ClearMode, RenderTask, RenderTaskCacheKey};
 use render_task::{RenderTaskCacheKeyKind, RenderTaskId, RenderTaskLocation};
 use resource_cache::CacheItem;
 use scene::{FilterOpHelpers, SceneProperties};
 use tiling::RenderTargetKind;
@@ -83,16 +83,20 @@ pub enum PictureKind {
         // pages to a texture), this is the pipeline this
         // picture is the root of.
         frame_output_pipeline_id: Option<PipelineId>,
         // The original reference frame ID for this picture.
         // It is only different if this is part of a 3D
         // rendering context.
         reference_frame_id: ClipId,
         real_local_rect: LayerRect,
+        // An optional cache handle for storing extra data
+        // in the GPU cache, depending on the type of
+        // picture.
+        extra_gpu_data_handle: GpuCacheHandle,
     },
 }
 
 // The type of surface that a picture can be drawn to.
 // RenderTask surfaces are not retained across frames.
 // TextureCache surfaces are stored across frames, and
 // also shared between display lists.
 #[derive(Debug)]
@@ -212,16 +216,17 @@ impl PicturePrimitive {
             surface: None,
             kind: PictureKind::Image {
                 secondary_render_task_id: None,
                 composite_mode,
                 is_in_3d_context,
                 frame_output_pipeline_id,
                 reference_frame_id,
                 real_local_rect: LayerRect::zero(),
+                extra_gpu_data_handle: GpuCacheHandle::new(),
             },
             pipeline_id,
             cull_children: true,
             brush: BrushPrimitive::new(
                 BrushKind::Picture,
                 None,
             ),
         }
@@ -328,16 +333,17 @@ impl PicturePrimitive {
         frame_context: &FrameContext,
         frame_state: &mut FrameState,
     ) {
         let content_scale = LayerToWorldScale::new(1.0) * frame_context.device_pixel_scale;
 
         match self.kind {
             PictureKind::Image {
                 ref mut secondary_render_task_id,
+                ref mut extra_gpu_data_handle,
                 composite_mode,
                 ..
             } => {
                 let content_origin = ContentOrigin::Screen(prim_screen_rect.origin);
                 match composite_mode {
                     Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
                         let picture_task = RenderTask::new_picture(
                             RenderTaskLocation::Dynamic(None, prim_screen_rect.size),
@@ -424,16 +430,25 @@ impl PicturePrimitive {
                         // the picture, just collapse this picture into the
                         // current render task. This most commonly occurs
                         // when opacity == 1.0, but can also occur on other
                         // filters and be a significant performance win.
                         if filter.is_noop() {
                             pic_state.tasks.extend(pic_state_for_children.tasks);
                             self.surface = None;
                         } else {
+
+                            if let FilterOp::ColorMatrix(m) = filter {
+                                if let Some(mut request) = frame_state.gpu_cache.request(extra_gpu_data_handle) {
+                                    for i in 0..5 {
+                                        request.push([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]);
+                                    }
+                                }
+                            }
+
                             let picture_task = RenderTask::new_picture(
                                 RenderTaskLocation::Dynamic(None, prim_screen_rect.size),
                                 prim_index,
                                 RenderTargetKind::Color,
                                 content_origin,
                                 PremultipliedColorF::TRANSPARENT,
                                 ClearMode::Transparent,
                                 pic_state_for_children.tasks,
@@ -589,27 +604,20 @@ impl PicturePrimitive {
     pub fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
         // TODO(gw): It's unfortunate that we pay a fixed cost
         //           of 5 GPU blocks / picture, just due to the size
         //           of the color matrix. There aren't typically very
         //           many pictures in a scene, but we should consider
         //           making this more efficient for the common case.
         match self.kind {
             PictureKind::TextShadow { .. } => {
-                for _ in 0 .. 5 {
-                    request.push([0.0; 4]);
-                }
+                request.push([0.0; 4]);
             }
             PictureKind::Image { composite_mode, .. } => {
                 match composite_mode {
-                    Some(PictureCompositeMode::Filter(FilterOp::ColorMatrix(m))) => {
-                        for i in 0..5 {
-                            request.push([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]);
-                        }
-                    }
                     Some(PictureCompositeMode::Filter(filter)) => {
                         let amount = match filter {
                             FilterOp::Contrast(amount) => amount,
                             FilterOp::Grayscale(amount) => amount,
                             FilterOp::HueRotate(angle) => 0.01745329251 * angle,
                             FilterOp::Invert(amount) => amount,
                             FilterOp::Saturate(amount) => amount,
                             FilterOp::Sepia(amount) => amount,
@@ -618,33 +626,24 @@ impl PicturePrimitive {
 
                             // Go through different paths
                             FilterOp::Blur(..) |
                             FilterOp::DropShadow(..) |
                             FilterOp::ColorMatrix(_) => 0.0,
                         };
 
                         request.push([amount, 1.0 - amount, 0.0, 0.0]);
-
-                        for _ in 0 .. 4 {
-                            request.push([0.0; 4]);
-                        }
                     }
                     _ => {
-                        for _ in 0 .. 5 {
-                            request.push([0.0; 4]);
-                        }
+                        request.push([0.0; 4]);
                     }
                 }
             }
             PictureKind::BoxShadow { color, .. } => {
                 request.push(color.premultiplied());
-                for _ in 0 .. 4 {
-                    request.push([0.0; 4]);
-                }
             }
         }
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             PictureKind::TextShadow { .. } => RenderTargetKind::Color,
             PictureKind::BoxShadow { .. } => RenderTargetKind::Alpha,
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderRadius, BuiltDisplayList, ClipId, ClipMode, ColorF, ComplexClipRegion};
 use api::{DeviceIntRect, DeviceIntSize, DevicePixelScale, Epoch, ExtendMode, FontRenderMode};
 use api::{GlyphInstance, GlyphKey, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag};
 use api::{LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, LayerVector2D, LineOrientation};
-use api::{LineStyle, PremultipliedColorF, WorldToLayerTransform, YuvColorSpace, YuvFormat};
+use api::{LineStyle, PremultipliedColorF, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderEdgeKind};
 use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId};
 use clip_scroll_node::ClipScrollNode;
 use clip::{ClipChain, ClipChainNode, ClipChainNodeIter, ClipChainNodeRef, ClipSource};
 use clip::{ClipSourcesHandle, ClipWorkItem};
 use frame_builder::{FrameContext, FrameState, PictureContext, PictureState, PrimitiveRunContext};
 use glyph_rasterizer::{FontInstance, FontTransform};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
@@ -20,21 +20,21 @@ use gpu_types::{ClipChainRectIndex};
 use picture::{PictureKind, PicturePrimitive};
 use render_task::{BlitSource, RenderTask, RenderTaskCacheKey, RenderTaskCacheKeyKind};
 use render_task::RenderTaskId;
 use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
 use resource_cache::{CacheItem, ImageProperties, ImageRequest, ResourceCache};
 use segment::SegmentBuilder;
 use std::{mem, usize};
 use std::rc::Rc;
-use util::{MatrixHelpers, calculate_screen_bounding_rect, pack_as_float};
-use util::recycle_vec;
+use util::{MatrixHelpers, WorldToLayerFastTransform, calculate_screen_bounding_rect};
+use util::{pack_as_float, recycle_vec};
 
 
-const MIN_BRUSH_SPLIT_AREA: f32 = 128.0 * 128.0;
+const MIN_BRUSH_SPLIT_AREA: f32 = 256.0 * 256.0;
 
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct ScrollNodeAndClipChain {
     pub scroll_node_id: ClipId,
     pub clip_chain_index: ClipChainIndex,
 }
 
 impl ScrollNodeAndClipChain {
@@ -64,19 +64,30 @@ impl PrimitiveOpacity {
         PrimitiveOpacity { is_opaque: false }
     }
 
     pub fn from_alpha(alpha: f32) -> PrimitiveOpacity {
         PrimitiveOpacity {
             is_opaque: alpha == 1.0,
         }
     }
+}
 
-    pub fn accumulate(&mut self, alpha: f32) {
-        self.is_opaque = self.is_opaque && alpha == 1.0;
+#[derive(Debug, Copy, Clone)]
+pub struct CachedGradientIndex(pub usize);
+
+pub struct CachedGradient {
+    pub handle: GpuCacheHandle,
+}
+
+impl CachedGradient {
+    pub fn new() -> CachedGradient {
+        CachedGradient {
+            handle: GpuCacheHandle::new(),
+        }
     }
 }
 
 // Represents the local space rect of a list of
 // primitive runs. For most primitive runs, the
 // primitive runs are attached to the parent they
 // are declared in. However, when a primitive run
 // is part of a 3d rendering context, it may get
@@ -119,18 +130,16 @@ pub struct SpecificPrimitiveIndex(pub us
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum PrimitiveKind {
     TextRun,
     Image,
     Border,
-    AlignedGradient,
-    AngleGradient,
     Picture,
     Brush,
 }
 
 impl GpuCacheHandle {
     pub fn as_int(&self, gpu_cache: &GpuCache) -> i32 {
         gpu_cache.get_address(self).as_int()
     }
@@ -206,35 +215,45 @@ pub enum BrushKind {
     },
     YuvImage {
         yuv_key: [ImageKey; 3],
         format: YuvFormat,
         color_space: YuvColorSpace,
         image_rendering: ImageRendering,
     },
     RadialGradient {
+        gradient_index: CachedGradientIndex,
         stops_range: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
-        stops_handle: GpuCacheHandle,
         start_center: LayerPoint,
         end_center: LayerPoint,
         start_radius: f32,
         end_radius: f32,
         ratio_xy: f32,
+    },
+    LinearGradient {
+        gradient_index: CachedGradientIndex,
+        stops_range: ItemRange<GradientStop>,
+        stops_count: usize,
+        extend_mode: ExtendMode,
+        reverse_stops: bool,
+        start_point: LayerPoint,
+        end_point: LayerPoint,
     }
 }
 
 impl BrushKind {
     fn supports_segments(&self) -> bool {
         match *self {
             BrushKind::Solid { .. } |
             BrushKind::Picture |
             BrushKind::Image { .. } |
             BrushKind::YuvImage { .. } |
-            BrushKind::RadialGradient { .. } => true,
+            BrushKind::RadialGradient { .. } |
+            BrushKind::LinearGradient { .. } => true,
 
             BrushKind::Mask { .. } |
             BrushKind::Clear |
             BrushKind::Line { .. } => false,
         }
     }
 }
 
@@ -354,16 +373,30 @@ impl BrushPrimitive {
                 request.push(color);
                 request.push([
                     wavy_line_thickness,
                     pack_as_float(style as u32),
                     pack_as_float(orientation as u32),
                     0.0,
                 ]);
             }
+            BrushKind::LinearGradient { start_point, end_point, extend_mode, .. } => {
+                request.push([
+                    start_point.x,
+                    start_point.y,
+                    end_point.x,
+                    end_point.y,
+                ]);
+                request.push([
+                    pack_as_float(extend_mode as u32),
+                    0.0,
+                    0.0,
+                    0.0,
+                ]);
+            }
             BrushKind::RadialGradient { start_center, end_center, start_radius, end_radius, ratio_xy, extend_mode, .. } => {
                 request.push([
                     start_center.x,
                     start_center.y,
                     end_center.x,
                     end_center.y,
                 ]);
                 request.push([
@@ -427,56 +460,16 @@ pub struct BorderPrimitiveCpu {
 }
 
 impl ToGpuBlocks for BorderPrimitiveCpu {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
         request.extend_from_slice(&self.gpu_blocks);
     }
 }
 
-#[derive(Debug)]
-pub struct GradientPrimitiveCpu {
-    pub stops_range: ItemRange<GradientStop>,
-    pub stops_count: usize,
-    pub extend_mode: ExtendMode,
-    pub reverse_stops: bool,
-    pub gpu_blocks: [GpuBlockData; 3],
-}
-
-impl GradientPrimitiveCpu {
-    fn build_gpu_blocks_for_aligned(
-        &self,
-        display_list: &BuiltDisplayList,
-        mut request: GpuDataRequest,
-    ) -> PrimitiveOpacity {
-        let mut opacity = PrimitiveOpacity::opaque();
-        request.extend_from_slice(&self.gpu_blocks);
-        let src_stops = display_list.get(self.stops_range);
-
-        for src in src_stops {
-            request.push(src.color.premultiplied());
-            request.push([src.offset, 0.0, 0.0, 0.0]);
-            opacity.accumulate(src.color.a);
-        }
-
-        opacity
-    }
-
-    fn build_gpu_blocks_for_angle_radial(
-        &self,
-        display_list: &BuiltDisplayList,
-        mut request: GpuDataRequest,
-    ) {
-        request.extend_from_slice(&self.gpu_blocks);
-
-        let gradient_builder = GradientGpuBlockBuilder::new(self.stops_range, display_list);
-        gradient_builder.build(self.reverse_stops, &mut request);
-    }
-}
-
 // The gradient entry index for the first color stop
 pub const GRADIENT_DATA_FIRST_STOP: usize = 0;
 // The gradient entry index for the last color stop
 pub const GRADIENT_DATA_LAST_STOP: usize = GRADIENT_DATA_SIZE - 1;
 
 // The start of the gradient data table
 pub const GRADIENT_DATA_TABLE_BEGIN: usize = GRADIENT_DATA_FIRST_STOP + 1;
 // The exclusive bound of the gradient data table
@@ -665,35 +658,35 @@ pub struct TextRunPrimitiveCpu {
     pub glyph_gpu_blocks: Vec<GpuBlockData>,
     pub shadow: bool,
 }
 
 impl TextRunPrimitiveCpu {
     pub fn get_font(
         &self,
         device_pixel_scale: DevicePixelScale,
-        transform: Option<&LayerToWorldTransform>,
+        transform: Option<LayerToWorldTransform>,
     ) -> FontInstance {
         let mut font = self.font.clone();
         font.size = font.size.scale_by(device_pixel_scale.0);
         if let Some(transform) = transform {
             if transform.has_perspective_component() || !transform.has_2d_inverse() {
                 font.render_mode = font.render_mode.limit_by(FontRenderMode::Alpha);
             } else {
-                font.transform = FontTransform::from(transform).quantize();
+                font.transform = FontTransform::from(&transform).quantize();
             }
         }
         font
     }
 
     fn prepare_for_render(
         &mut self,
         resource_cache: &mut ResourceCache,
         device_pixel_scale: DevicePixelScale,
-        transform: Option<&LayerToWorldTransform>,
+        transform: Option<LayerToWorldTransform>,
         display_list: &BuiltDisplayList,
         gpu_cache: &mut GpuCache,
     ) {
         let font = self.get_font(device_pixel_scale, transform);
 
         // Cache the glyph positions, if not in the cache already.
         // TODO(gw): In the future, remove `glyph_instances`
         //           completely, and just reference the glyphs
@@ -932,54 +925,49 @@ impl ClipData {
     }
 }
 
 #[derive(Debug)]
 pub enum PrimitiveContainer {
     TextRun(TextRunPrimitiveCpu),
     Image(ImagePrimitiveCpu),
     Border(BorderPrimitiveCpu),
-    AlignedGradient(GradientPrimitiveCpu),
-    AngleGradient(GradientPrimitiveCpu),
     Picture(PicturePrimitive),
     Brush(BrushPrimitive),
 }
 
 pub struct PrimitiveStore {
     /// CPU side information only.
     pub cpu_brushes: Vec<BrushPrimitive>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
     pub cpu_pictures: Vec<PicturePrimitive>,
     pub cpu_images: Vec<ImagePrimitiveCpu>,
-    pub cpu_gradients: Vec<GradientPrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_brushes: Vec::new(),
             cpu_text_runs: Vec::new(),
             cpu_pictures: Vec::new(),
             cpu_images: Vec::new(),
-            cpu_gradients: Vec::new(),
             cpu_borders: Vec::new(),
         }
     }
 
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
             cpu_brushes: recycle_vec(self.cpu_brushes),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
             cpu_pictures: recycle_vec(self.cpu_pictures),
             cpu_images: recycle_vec(self.cpu_images),
-            cpu_gradients: recycle_vec(self.cpu_gradients),
             cpu_borders: recycle_vec(self.cpu_borders),
         }
     }
 
     pub fn add_primitive(
         &mut self,
         local_rect: &LayerRect,
         local_clip_rect: &LayerRect,
@@ -1010,16 +998,17 @@ impl PrimitiveStore {
                 let opacity = match brush.kind {
                     BrushKind::Clear => PrimitiveOpacity::translucent(),
                     BrushKind::Solid { ref color } => PrimitiveOpacity::from_alpha(color.a),
                     BrushKind::Mask { .. } => PrimitiveOpacity::translucent(),
                     BrushKind::Line { .. } => PrimitiveOpacity::translucent(),
                     BrushKind::Image { .. } => PrimitiveOpacity::translucent(),
                     BrushKind::YuvImage { .. } => PrimitiveOpacity::opaque(),
                     BrushKind::RadialGradient { .. } => PrimitiveOpacity::translucent(),
+                    BrushKind::LinearGradient { .. } => PrimitiveOpacity::translucent(),
                     BrushKind::Picture => {
                         // TODO(gw): This is not currently used. In the future
                         //           we should detect opaque pictures.
                         unreachable!();
                     }
                 };
 
                 let metadata = PrimitiveMetadata {
@@ -1072,39 +1061,16 @@ impl PrimitiveStore {
                     prim_kind: PrimitiveKind::Border,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_borders.len()),
                     ..base_metadata
                 };
 
                 self.cpu_borders.push(border_cpu);
                 metadata
             }
-            PrimitiveContainer::AlignedGradient(gradient_cpu) => {
-                let metadata = PrimitiveMetadata {
-                    opacity: PrimitiveOpacity::translucent(),
-                    prim_kind: PrimitiveKind::AlignedGradient,
-                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
-                    ..base_metadata
-                };
-
-                self.cpu_gradients.push(gradient_cpu);
-                metadata
-            }
-            PrimitiveContainer::AngleGradient(gradient_cpu) => {
-                let metadata = PrimitiveMetadata {
-                    // TODO: calculate if the gradient is actually opaque
-                    opacity: PrimitiveOpacity::translucent(),
-                    prim_kind: PrimitiveKind::AngleGradient,
-                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
-                    ..base_metadata
-                };
-
-                self.cpu_gradients.push(gradient_cpu);
-                metadata
-            }
         };
 
         self.cpu_metadata.push(metadata);
 
         PrimitiveIndex(prim_index)
     }
 
     pub fn get_metadata(&self, index: PrimitiveIndex) -> &PrimitiveMetadata {
@@ -1141,17 +1107,17 @@ impl PrimitiveStore {
                         frame_context,
                         frame_state,
                     );
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 // The transform only makes sense for screen space rasterization
                 let transform = if pic_context.draw_text_transformed {
-                    Some(&prim_run_context.scroll_node.world_content_transform)
+                    Some(prim_run_context.scroll_node.world_content_transform.into())
                 } else {
                     None
                 };
                 text.prepare_for_render(
                     frame_state.resource_cache,
                     frame_context.device_pixel_scale,
                     transform,
                     pic_context.display_list,
@@ -1299,37 +1265,49 @@ impl PrimitiveStore {
                                     key: yuv_key[channel],
                                     rendering: image_rendering,
                                     tile: None,
                                 },
                                 frame_state.gpu_cache,
                             );
                         }
                     }
-                    BrushKind::RadialGradient { ref mut stops_handle, stops_range, .. } => {
+                    BrushKind::RadialGradient { gradient_index, stops_range, .. } => {
+                        let stops_handle = &mut frame_state.cached_gradients[gradient_index.0].handle;
                         if let Some(mut request) = frame_state.gpu_cache.request(stops_handle) {
                             let gradient_builder = GradientGpuBlockBuilder::new(
                                 stops_range,
                                 pic_context.display_list,
                             );
                             gradient_builder.build(
                                 false,
                                 &mut request,
                             );
                         }
                     }
+                    BrushKind::LinearGradient { gradient_index, stops_range, reverse_stops, .. } => {
+                        let stops_handle = &mut frame_state.cached_gradients[gradient_index.0].handle;
+                        if let Some(mut request) = frame_state.gpu_cache.request(stops_handle) {
+                            let gradient_builder = GradientGpuBlockBuilder::new(
+                                stops_range,
+                                pic_context.display_list,
+                            );
+                            gradient_builder.build(
+                                reverse_stops,
+                                &mut request,
+                            );
+                        }
+                    }
                     BrushKind::Mask { .. } |
                     BrushKind::Solid { .. } |
                     BrushKind::Clear |
                     BrushKind::Line { .. } |
                     BrushKind::Picture { .. } => {}
                 }
             }
-            PrimitiveKind::AlignedGradient |
-            PrimitiveKind::AngleGradient => {}
         }
 
         // Mark this GPU resource as required for this frame.
         if let Some(mut request) = frame_state.gpu_cache.request(&mut metadata.gpu_location) {
             // has to match VECS_PER_BRUSH_PRIM
             request.push(metadata.local_rect);
             request.push(metadata.local_clip_rect);
 
@@ -1337,30 +1315,16 @@ impl PrimitiveStore {
                 PrimitiveKind::Border => {
                     let border = &self.cpu_borders[metadata.cpu_prim_index.0];
                     border.write_gpu_blocks(request);
                 }
                 PrimitiveKind::Image => {
                     let image = &self.cpu_images[metadata.cpu_prim_index.0];
                     image.write_gpu_blocks(request);
                 }
-                PrimitiveKind::AlignedGradient => {
-                    let gradient = &self.cpu_gradients[metadata.cpu_prim_index.0];
-                    metadata.opacity = gradient.build_gpu_blocks_for_aligned(
-                        pic_context.display_list,
-                        request,
-                    );
-                }
-                PrimitiveKind::AngleGradient => {
-                    let gradient = &self.cpu_gradients[metadata.cpu_prim_index.0];
-                    gradient.build_gpu_blocks_for_angle_radial(
-                        pic_context.display_list,
-                        request,
-                    );
-                }
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::Picture => {
                     let pic = &self.cpu_pictures[metadata.cpu_prim_index.0];
                     pic.write_gpu_blocks(&mut request);
 
@@ -1466,24 +1430,24 @@ impl PrimitiveStore {
 
                 // If the scroll node transforms are different between the clip
                 // node and the primitive, we need to get the clip rect in the
                 // local space of the primitive, in order to generate correct
                 // local segments.
                 let local_clip_rect = if clip_item.scroll_node_data_index == prim_run_context.scroll_node.node_data_index {
                     local_clip_rect
                 } else {
-                    let clip_transform_data = &frame_context
-                        .node_data[clip_item.scroll_node_data_index.0 as usize];
+                    let clip_transform = frame_context
+                        .node_data[clip_item.scroll_node_data_index.0 as usize]
+                        .transform;
                     let prim_transform = &prim_run_context.scroll_node.world_content_transform;
-
                     let relative_transform = prim_transform
                         .inverse()
-                        .unwrap_or(WorldToLayerTransform::identity())
-                        .pre_mul(&clip_transform_data.transform);
+                        .unwrap_or(WorldToLayerFastTransform::identity())
+                        .pre_mul(&clip_transform.into());
 
                     relative_transform.transform_rect(&local_clip_rect)
                 };
 
                 segment_builder.push_rect(local_clip_rect, radius, mode);
             }
         }
 
@@ -2056,18 +2020,17 @@ fn get_local_clip_rect_for_nodes(
                 Some(combined_rect) =>
                     combined_rect.intersection(&node.local_clip_rect).unwrap_or_else(LayerRect::zero),
                 None => node.local_clip_rect,
             })
         }
     );
 
     match local_rect {
-        Some(local_rect) =>
-            Some(scroll_node.coordinate_system_relative_transform.unapply(&local_rect)),
+        Some(local_rect) => scroll_node.coordinate_system_relative_transform.unapply(&local_rect),
         None => None,
     }
 }
 
 impl<'a> GpuDataRequest<'a> {
     // Write the GPU cache data for an individual segment.
     // TODO(gw): The second block is currently unused. In
     //           the future, it will be used to store a
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1002,16 +1002,19 @@ impl RenderBackend {
                     doc.render(
                         &mut self.resource_cache,
                         &mut self.gpu_cache,
                         &mut profile_counters.resources,
                     )
                 }
             };
 
+            let msg_update = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates());
+            self.result_tx.send(msg_update).unwrap();
+
             let msg_publish = ResultMsg::PublishDocument(
                 id,
                 render_doc,
                 self.resource_cache.pending_updates(),
                 profile_counters.clone(),
             );
             self.result_tx.send(msg_publish).unwrap();
             profile_counters.reset();
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -78,16 +78,20 @@ pub const MAX_VERTEX_TEXTURE_WIDTH: usiz
 /// Enabling this toggle would force the GPU cache scattered texture to
 /// be resized every frame, which enables GPU debuggers to see if this
 /// is performed correctly.
 const GPU_CACHE_RESIZE_TEST: bool = false;
 
 /// Number of GPU blocks per UV rectangle provided for an image.
 pub const BLOCKS_PER_UV_RECT: usize = 2;
 
+const GPU_TAG_BRUSH_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "B_LinearGradient",
+    color: debug_colors::POWDERBLUE,
+};
 const GPU_TAG_BRUSH_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
     label: "B_RadialGradient",
     color: debug_colors::LIGHTPINK,
 };
 const GPU_TAG_BRUSH_YUV_IMAGE: GpuProfileTag = GpuProfileTag {
     label: "B_YuvImage",
     color: debug_colors::DARKGREEN,
 };
@@ -146,24 +150,16 @@ const GPU_TAG_PRIM_HW_COMPOSITE: GpuProf
 const GPU_TAG_PRIM_SPLIT_COMPOSITE: GpuProfileTag = GpuProfileTag {
     label: "SplitComposite",
     color: debug_colors::DARKBLUE,
 };
 const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag {
     label: "TextRun",
     color: debug_colors::BLUE,
 };
-const GPU_TAG_PRIM_GRADIENT: GpuProfileTag = GpuProfileTag {
-    label: "Gradient",
-    color: debug_colors::YELLOW,
-};
-const GPU_TAG_PRIM_ANGLE_GRADIENT: GpuProfileTag = GpuProfileTag {
-    label: "AngleGradient",
-    color: debug_colors::POWDERBLUE,
-};
 const GPU_TAG_PRIM_BORDER_CORNER: GpuProfileTag = GpuProfileTag {
     label: "BorderCorner",
     color: debug_colors::DARKSLATEGREY,
 };
 const GPU_TAG_PRIM_BORDER_EDGE: GpuProfileTag = GpuProfileTag {
     label: "BorderEdge",
     color: debug_colors::LAVENDER,
 };
@@ -195,31 +191,27 @@ impl TransformBatchKind {
         match *self {
             TransformBatchKind::TextRun(..) => "TextRun",
             TransformBatchKind::Image(image_buffer_kind, ..) => match image_buffer_kind {
                 ImageBufferKind::Texture2D => "Image (2D)",
                 ImageBufferKind::TextureRect => "Image (Rect)",
                 ImageBufferKind::TextureExternal => "Image (External)",
                 ImageBufferKind::Texture2DArray => "Image (Array)",
             },
-            TransformBatchKind::AlignedGradient => "AlignedGradient",
-            TransformBatchKind::AngleGradient => "AngleGradient",
             TransformBatchKind::BorderCorner => "BorderCorner",
             TransformBatchKind::BorderEdge => "BorderEdge",
         }
     }
 
     fn gpu_sampler_tag(&self) -> GpuProfileTag {
         match *self {
             TransformBatchKind::TextRun(..) => GPU_TAG_PRIM_TEXT_RUN,
             TransformBatchKind::Image(..) => GPU_TAG_PRIM_IMAGE,
             TransformBatchKind::BorderCorner => GPU_TAG_PRIM_BORDER_CORNER,
             TransformBatchKind::BorderEdge => GPU_TAG_PRIM_BORDER_EDGE,
-            TransformBatchKind::AlignedGradient => GPU_TAG_PRIM_GRADIENT,
-            TransformBatchKind::AngleGradient => GPU_TAG_PRIM_ANGLE_GRADIENT,
         }
     }
 }
 
 impl BatchKind {
     #[cfg(feature = "debugger")]
     fn debug_name(&self) -> &'static str {
         match *self {
@@ -230,16 +222,17 @@ impl BatchKind {
                     BrushBatchKind::Picture(..) => "Brush (Picture)",
                     BrushBatchKind::Solid => "Brush (Solid)",
                     BrushBatchKind::Line => "Brush (Line)",
                     BrushBatchKind::Image(..) => "Brush (Image)",
                     BrushBatchKind::Blend => "Brush (Blend)",
                     BrushBatchKind::MixBlend { .. } => "Brush (Composite)",
                     BrushBatchKind::YuvImage(..) => "Brush (YuvImage)",
                     BrushBatchKind::RadialGradient => "Brush (RadialGradient)",
+                    BrushBatchKind::LinearGradient => "Brush (LinearGradient)",
                 }
             }
             BatchKind::Transformable(_, batch_kind) => batch_kind.debug_name(),
         }
     }
 
     fn gpu_sampler_tag(&self) -> GpuProfileTag {
         match *self {
@@ -250,16 +243,17 @@ impl BatchKind {
                     BrushBatchKind::Picture(..) => GPU_TAG_BRUSH_PICTURE,
                     BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
                     BrushBatchKind::Line => GPU_TAG_BRUSH_LINE,
                     BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
                     BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND,
                     BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND,
                     BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE,
                     BrushBatchKind::RadialGradient => GPU_TAG_BRUSH_RADIAL_GRADIENT,
+                    BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT,
                 }
             }
             BatchKind::Transformable(_, batch_kind) => batch_kind.gpu_sampler_tag(),
         }
     }
 }
 
 bitflags! {
@@ -1613,16 +1607,17 @@ pub struct Renderer {
     brush_picture_a8: BrushShader,
     brush_solid: BrushShader,
     brush_line: BrushShader,
     brush_image: Vec<Option<BrushShader>>,
     brush_blend: BrushShader,
     brush_mix_blend: BrushShader,
     brush_yuv_image: Vec<Option<BrushShader>>,
     brush_radial_gradient: BrushShader,
+    brush_linear_gradient: BrushShader,
 
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
     cs_clip_border: LazilyCompiledShader,
 
@@ -1633,18 +1628,16 @@ pub struct Renderer {
     // shadow primitive shader stretches the box shadow cache
     // output, and the cache_image shader blits the results of
     // a cache shader (e.g. blur) to the screen.
     ps_text_run: TextShader,
     ps_text_run_dual_source: TextShader,
     ps_image: Vec<Option<PrimitiveShader>>,
     ps_border_corner: PrimitiveShader,
     ps_border_edge: PrimitiveShader,
-    ps_gradient: PrimitiveShader,
-    ps_angle_gradient: PrimitiveShader,
 
     ps_hw_composite: LazilyCompiledShader,
     ps_split_composite: LazilyCompiledShader,
 
     max_texture_size: u32,
 
     max_recorded_profiles: usize,
     clear_color: Option<ColorF>,
@@ -1879,16 +1872,27 @@ impl Renderer {
                              if options.enable_dithering {
                                 &dithering_feature
                              } else {
                                 &[]
                              },
                              options.precache_shaders)
         };
 
+        let brush_linear_gradient = try!{
+            BrushShader::new("brush_linear_gradient",
+                             &mut device,
+                             if options.enable_dithering {
+                                &dithering_feature
+                             } else {
+                                &[]
+                             },
+                             options.precache_shaders)
+        };
+
         let cs_blur_a8 = try!{
             LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Blur),
                                      "cs_blur",
                                       &["ALPHA_TARGET"],
                                       &mut device,
                                       options.precache_shaders)
         };
 
@@ -2025,38 +2029,16 @@ impl Renderer {
 
         let ps_border_edge = try!{
             PrimitiveShader::new("ps_border_edge",
                                  &mut device,
                                  &[],
                                  options.precache_shaders)
         };
 
-        let ps_gradient = try!{
-            PrimitiveShader::new("ps_gradient",
-                                 &mut device,
-                                 if options.enable_dithering {
-                                    &dithering_feature
-                                 } else {
-                                    &[]
-                                 },
-                                 options.precache_shaders)
-        };
-
-        let ps_angle_gradient = try!{
-            PrimitiveShader::new("ps_angle_gradient",
-                                 &mut device,
-                                 if options.enable_dithering {
-                                    &dithering_feature
-                                 } else {
-                                    &[]
-                                 },
-                                 options.precache_shaders)
-        };
-
         let ps_hw_composite = try!{
             LazilyCompiledShader::new(ShaderKind::Primitive,
                                      "ps_hardware_composite",
                                      &[],
                                      &mut device,
                                      options.precache_shaders)
         };
 
@@ -2298,26 +2280,25 @@ impl Renderer {
             brush_picture_a8,
             brush_solid,
             brush_line,
             brush_image,
             brush_blend,
             brush_mix_blend,
             brush_yuv_image,
             brush_radial_gradient,
+            brush_linear_gradient,
             cs_clip_rectangle,
             cs_clip_border,
             cs_clip_image,
             ps_text_run,
             ps_text_run_dual_source,
             ps_image,
             ps_border_corner,
             ps_border_edge,
-            ps_gradient,
-            ps_angle_gradient,
             ps_hw_composite,
             ps_split_composite,
             debug: debug_renderer,
             debug_flags,
             backend_profile_counters: BackendProfileCounters::new(),
             profile_counters: RendererProfileCounters::new(),
             profiler: Profiler::new(),
             max_texture_size: max_texture_size,
@@ -2887,17 +2868,19 @@ impl Renderer {
             #[cfg(feature = "replay")]
             self.texture_resolver.external_images.extend(
                 self.owned_external_images.iter().map(|(key, value)| (*key, value.clone()))
             );
 
             for &mut (_, RenderedDocument { ref mut frame, .. }) in &mut active_documents {
                 frame.profile_counters.reset_targets();
                 self.prepare_gpu_cache(frame);
-                assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id);
+                assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id,
+                    "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})",
+                    frame.gpu_cache_frame_id, self.gpu_cache_frame_id);
 
                 self.draw_tile_frame(
                     frame,
                     framebuffer_size,
                     clear_depth_value.is_some(),
                     cpu_frame_id,
                     &mut stats
                 );
@@ -3267,16 +3250,25 @@ impl Renderer {
                         self.brush_radial_gradient.bind(
                             &mut self.device,
                             key.blend_mode,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
                     }
+                    BrushBatchKind::LinearGradient => {
+                        self.brush_linear_gradient.bind(
+                            &mut self.device,
+                            key.blend_mode,
+                            projection,
+                            0,
+                            &mut self.renderer_errors,
+                        );
+                    }
                     BrushBatchKind::YuvImage(image_buffer_kind, format, color_space) => {
                         let shader_index =
                             Renderer::get_yuv_shader_index(image_buffer_kind, format, color_space);
                         self.brush_yuv_image[shader_index]
                             .as_mut()
                             .expect("Unsupported YUV shader kind")
                             .bind(
                                 &mut self.device,
@@ -3317,34 +3309,16 @@ impl Renderer {
                     self.ps_border_edge.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
                 }
-                TransformBatchKind::AlignedGradient => {
-                    self.ps_gradient.bind(
-                        &mut self.device,
-                        transform_kind,
-                        projection,
-                        0,
-                        &mut self.renderer_errors,
-                    );
-                }
-                TransformBatchKind::AngleGradient => {
-                    self.ps_angle_gradient.bind(
-                        &mut self.device,
-                        transform_kind,
-                        projection,
-                        0,
-                        &mut self.renderer_errors,
-                    );
-                }
             },
         };
 
         // Handle special case readback for composites.
         if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, source_id, backdrop_id }) = key.kind {
             if scissor_rect.is_some() {
                 self.device.disable_scissor();
             }
@@ -4736,16 +4710,17 @@ impl Renderer {
         self.brush_picture_rgba8.deinit(&mut self.device);
         self.brush_picture_rgba8_alpha_mask.deinit(&mut self.device);
         self.brush_picture_a8.deinit(&mut self.device);
         self.brush_solid.deinit(&mut self.device);
         self.brush_line.deinit(&mut self.device);
         self.brush_blend.deinit(&mut self.device);
         self.brush_mix_blend.deinit(&mut self.device);
         self.brush_radial_gradient.deinit(&mut self.device);
+        self.brush_linear_gradient.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
         self.ps_text_run_dual_source.deinit(&mut self.device);
         for shader in self.brush_image {
             if let Some(shader) = shader {
                 shader.deinit(&mut self.device);
@@ -4761,18 +4736,16 @@ impl Renderer {
                 shader.deinit(&mut self.device);
             }
         }
         for (_, target) in self.output_targets {
             self.device.delete_fbo(target.fbo_id);
         }
         self.ps_border_corner.deinit(&mut self.device);
         self.ps_border_edge.deinit(&mut self.device);
-        self.ps_gradient.deinit(&mut self.device);
-        self.ps_angle_gradient.deinit(&mut self.device);
         self.ps_hw_composite.deinit(&mut self.device);
         self.ps_split_composite.deinit(&mut self.device);
         #[cfg(feature = "capture")]
         self.device.delete_fbo(self.read_fbo);
         #[cfg(feature = "replay")]
         for (_, ext) in self.owned_external_images {
             self.device.delete_external_texture(ext);
         }
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -11,17 +11,17 @@ use clip::{ClipStore};
 use clip_scroll_tree::{ClipScrollTree};
 use device::{FrameId, Texture};
 use gpu_cache::{GpuCache};
 use gpu_types::{BlurDirection, BlurInstance, BrushFlags, BrushInstance, ClipChainRectIndex};
 use gpu_types::{ClipScrollNodeData, ClipScrollNodeIndex};
 use gpu_types::{PrimitiveInstance};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 use picture::{PictureKind};
-use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveStore};
+use prim_store::{CachedGradient, PrimitiveIndex, PrimitiveKind, PrimitiveStore};
 use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, EdgeAaSegmentMask};
 use profiler::FrameProfileCounters;
 use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use resource_cache::ResourceCache;
 use std::{cmp, usize, f32, i32};
 use texture_allocator::GuillotineAllocator;
 
@@ -41,16 +41,17 @@ pub struct RenderTargetIndex(pub usize);
 
 pub struct RenderTargetContext<'a> {
     pub device_pixel_scale: DevicePixelScale,
     pub prim_store: &'a PrimitiveStore,
     pub resource_cache: &'a ResourceCache,
     pub clip_scroll_tree: &'a ClipScrollTree,
     pub use_dual_source_blending: bool,
     pub node_data: &'a [ClipScrollNodeData],
+    pub cached_gradients: &'a [CachedGradient],
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct TextureAllocator {
     // TODO(gw): Replace this with a simpler allocator for
     // render target allocation - this use case doesn't need
     // to deal with coalescing etc that the general texture
@@ -594,16 +595,17 @@ impl RenderTarget for AlphaRenderTarget 
                                         let brush = &ctx.prim_store.cpu_brushes[sub_metadata.cpu_prim_index.0];
                                         let batch = match brush.kind {
                                             BrushKind::Solid { .. } |
                                             BrushKind::Clear |
                                             BrushKind::Picture |
                                             BrushKind::Line { .. } |
                                             BrushKind::YuvImage { .. } |
                                             BrushKind::RadialGradient { .. } |
+                                            BrushKind::LinearGradient { .. } |
                                             BrushKind::Image { .. } => {
                                                 unreachable!("bug: unexpected brush here");
                                             }
                                             BrushKind::Mask { ref kind, .. } => {
                                                 match *kind {
                                                     BrushMaskKind::Corner(..) => &mut self.brush_mask_corners,
                                                     BrushMaskKind::RoundedRect(..) => &mut self.brush_mask_rounded_rects,
                                                 }
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -1,32 +1,33 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePixelScale};
-use api::{DevicePoint, DeviceRect, DeviceSize, LayerPoint, LayerRect, LayerSize};
-use api::{LayerToWorldTransform, LayerTransform, LayerVector2D, WorldRect};
-use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedRect, TypedSize2D, TypedTransform2D};
-use euclid::TypedTransform3D;
+use api::{DevicePoint, DeviceRect, DeviceSize, LayerPixel, LayerPoint, LayerRect, LayerSize};
+use api::{LayoutPixel, WorldPixel, WorldRect};
+use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedPoint3D, TypedRect, TypedSize2D};
+use euclid::{TypedTransform2D, TypedTransform3D, TypedVector2D};
 use num_traits::Zero;
 use std::{i32, f32};
 
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn preserves_2d_axis_alignment(&self) -> bool;
     fn has_perspective_component(&self) -> bool;
     fn has_2d_inverse(&self) -> bool;
     fn inverse_project(&self, target: &TypedPoint2D<f32, Dst>) -> Option<TypedPoint2D<f32, Src>>;
     fn inverse_rect_footprint(&self, rect: &TypedRect<f32, Dst>) -> TypedRect<f32, Src>;
     fn transform_kind(&self) -> TransformedRectKind;
     fn is_simple_translation(&self) -> bool;
+    fn is_simple_2d_translation(&self) -> bool;
 }
 
 impl<Src, Dst> MatrixHelpers<Src, Dst> for TypedTransform3D<f32, Src, Dst> {
     // A port of the preserves2dAxisAlignment function in Skia.
     // Defined in the SkMatrix44 class.
     fn preserves_2d_axis_alignment(&self) -> bool {
         if self.m14 != 0.0 || self.m24 != 0.0 {
             return false;
@@ -100,16 +101,24 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> f
             return false;
         }
         self.m12.abs() < NEARLY_ZERO && self.m13.abs() < NEARLY_ZERO &&
             self.m14.abs() < NEARLY_ZERO && self.m21.abs() < NEARLY_ZERO &&
             self.m23.abs() < NEARLY_ZERO && self.m24.abs() < NEARLY_ZERO &&
             self.m31.abs() < NEARLY_ZERO && self.m32.abs() < NEARLY_ZERO &&
             self.m34.abs() < NEARLY_ZERO
     }
+
+    fn is_simple_2d_translation(&self) -> bool {
+        if !self.is_simple_translation() {
+            return false;
+        }
+
+        self.m43.abs() < NEARLY_ZERO
+    }
 }
 
 pub trait RectHelpers<U>
 where
     Self: Sized,
 {
     fn from_floats(x0: f32, y0: f32, x1: f32, y1: f32) -> Self;
     fn is_well_formed_and_nonempty(&self) -> bool;
@@ -140,17 +149,17 @@ pub fn rect_from_points_f(x0: f32, y0: f
     Rect::new(Point2D::new(x0, y0), Size2D::new(x1 - x0, y1 - y0))
 }
 
 pub fn lerp(a: f32, b: f32, t: f32) -> f32 {
     (b - a) * t + a
 }
 
 pub fn calculate_screen_bounding_rect(
-    transform: &LayerToWorldTransform,
+    transform: &LayerToWorldFastTransform,
     rect: &LayerRect,
     device_pixel_scale: DevicePixelScale,
 ) -> DeviceIntRect {
     let points = [
         transform.transform_point2d(&rect.origin),
         transform.transform_point2d(&rect.top_right()),
         transform.transform_point2d(&rect.bottom_left()),
         transform.transform_point2d(&rect.bottom_right()),
@@ -329,76 +338,221 @@ impl MaxRect for DeviceRect {
             DevicePoint::new(-MAX_COORD, -MAX_COORD),
             DeviceSize::new(2.0 * MAX_COORD, 2.0 * MAX_COORD),
         )
     }
 }
 
 /// An enum that tries to avoid expensive transformation matrix calculations
 /// when possible when dealing with non-perspective axis-aligned transformations.
-#[derive(Debug, Clone)]
-pub enum TransformOrOffset {
+#[derive(Debug, Clone, Copy)]
+pub enum FastTransform<Src, Dst> {
     /// A simple offset, which can be used without doing any matrix math.
-    Offset(LayerVector2D),
+    Offset(TypedVector2D<f32, Src>),
 
-    /// A transformation with an inverse. If the inverse isn't present, this isn't a 2D
-    /// transformation, which means we need to fall back to using inverse_rect_footprint.
-    /// Since this operation is so expensive, we avoid it for the 2D case.
+    /// A 2D transformation with an inverse.
     Transform {
-        transform: LayerTransform,
-        inverse: Option<LayerTransform>,
-    }
+        transform: TypedTransform3D<f32, Src, Dst>,
+        inverse: Option<TypedTransform3D<f32, Dst, Src>>,
+        is_2d: bool,
+    },
 }
 
-impl TransformOrOffset {
-    pub fn zero() -> TransformOrOffset {
-        TransformOrOffset::Offset(LayerVector2D::zero())
+impl<Src, Dst> FastTransform<Src, Dst> {
+    pub fn identity() -> Self {
+        FastTransform::Offset(TypedVector2D::zero())
+    }
+
+    pub fn with_vector(offset: TypedVector2D<f32, Src>) -> Self {
+        FastTransform::Offset(offset)
     }
 
-    fn new_transform(transform: LayerTransform) -> TransformOrOffset {
-        if transform.is_2d() {
-            TransformOrOffset::Transform {
-                transform,
-                inverse: Some(transform.inverse().expect("Expected invertible matrix."))
-            }
-        } else {
-            TransformOrOffset::Transform { transform, inverse: None }
+    #[inline(always)]
+    pub fn with_transform(transform: TypedTransform3D<f32, Src, Dst>) -> Self {
+        if transform.is_simple_2d_translation() {
+            return FastTransform::Offset(TypedVector2D::new(transform.m41, transform.m42));
+        }
+        let inverse = transform.inverse();
+        let is_2d = transform.is_2d();
+        FastTransform::Transform { transform, inverse, is_2d}
+    }
+
+    pub fn to_transform(&self) -> TypedTransform3D<f32, Src, Dst> {
+        match *self {
+            FastTransform::Offset(offset) =>
+                TypedTransform3D::create_translation(offset.x, offset.y, 0.0),
+            FastTransform::Transform { transform, .. } => transform
         }
     }
 
-    pub fn apply(&self, rect: &LayerRect) -> LayerRect {
+    pub fn is_invertible(&self) -> bool {
         match *self {
-            TransformOrOffset::Offset(offset) => rect.translate(&offset),
-            TransformOrOffset::Transform {transform, .. } => transform.transform_rect(&rect),
+            FastTransform::Offset(..) => true,
+            FastTransform::Transform { ref inverse, .. } => inverse.is_some(),
         }
     }
 
-    pub fn unapply(&self, rect: &LayerRect) -> LayerRect {
-        match *self {
-            TransformOrOffset::Offset(offset) => rect.translate(&-offset),
-            TransformOrOffset::Transform { inverse: Some(inverse), .. }  =>
-                inverse.transform_rect(&rect),
-            TransformOrOffset::Transform { transform, inverse: None } =>
-                transform.inverse_rect_footprint(rect),
-        }
-    }
-
-    pub fn offset(&self, new_offset: LayerVector2D) -> TransformOrOffset {
-        match *self {
-            TransformOrOffset::Offset(offset) => TransformOrOffset::Offset(offset + new_offset),
-            TransformOrOffset::Transform { transform, .. } => {
-                let transform = transform.pre_translate(new_offset.to_3d());
-                TransformOrOffset::new_transform(transform)
+    #[inline(always)]
+    pub fn pre_mul<NewSrc>(
+        &self,
+        other: &FastTransform<NewSrc, Src>
+    ) -> FastTransform<NewSrc, Dst> {
+        match (self, other) {
+            (&FastTransform::Offset(ref offset), &FastTransform::Offset(ref other_offset)) => {
+                let offset = TypedVector2D::from_untyped(&offset.to_untyped());
+                FastTransform::Offset((offset + *other_offset))
+            }
+            _ => {
+                let new_transform = self.to_transform().pre_mul(&other.to_transform());
+                FastTransform::with_transform(new_transform)
             }
         }
     }
 
-    pub fn update(&self, transform: LayerTransform) -> Option<TransformOrOffset> {
-        if transform.is_simple_translation() {
-            let offset = LayerVector2D::new(transform.m41, transform.m42);
-            Some(self.offset(offset))
+    #[inline(always)]
+    pub fn pre_translate(&self, other_offset: &TypedVector2D<f32, Src>) -> Self {
+        match self {
+            &FastTransform::Offset(ref offset) =>
+                return FastTransform::Offset(*offset + *other_offset),
+            &FastTransform::Transform { transform, .. } =>
+                FastTransform::with_transform(transform.pre_translate(other_offset.to_3d()))
+        }
+    }
+
+    #[inline(always)]
+    pub fn preserves_2d_axis_alignment(&self) -> bool {
+        match *self {
+            FastTransform::Offset(..) => true,
+            FastTransform::Transform { ref transform, .. } =>
+                transform.preserves_2d_axis_alignment(),
+        }
+    }
+
+    #[inline(always)]
+    pub fn has_perspective_component(&self) -> bool {
+        match *self {
+            FastTransform::Offset(..) => false,
+            FastTransform::Transform { ref transform, .. } => transform.has_perspective_component(),
+        }
+    }
+
+    #[inline(always)]
+    pub fn is_backface_visible(&self) -> bool {
+        match *self {
+            FastTransform::Offset(..) => false,
+            FastTransform::Transform { ref transform, .. } => transform.is_backface_visible(),
+        }
+    }
+
+    #[inline(always)]
+    pub fn transform_point2d(&self, point: &TypedPoint2D<f32, Src>) -> TypedPoint2D<f32, Dst> {
+        match *self {
+            FastTransform::Offset(offset) => {
+                let new_point = *point + offset;
+                TypedPoint2D::from_untyped(&new_point.to_untyped())
+            }
+            FastTransform::Transform { ref transform, .. } => transform.transform_point2d(point),
+        }
+    }
+
+    #[inline(always)]
+    pub fn transform_point3d(&self, point: &TypedPoint3D<f32, Src>) -> TypedPoint3D<f32, Dst> {
+        match *self {
+            FastTransform::Offset(offset) =>
+                TypedPoint3D::new(point.x + offset.x, point.y + offset.y, point.z),
+            FastTransform::Transform { ref transform, .. } => transform.transform_point3d(point),
+        }
+    }
+
+    #[inline(always)]
+    pub fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst> {
+        match *self {
+            FastTransform::Offset(offset) =>
+                TypedRect::from_untyped(&rect.to_untyped().translate(&offset.to_untyped())),
+            FastTransform::Transform { ref transform, .. } => transform.transform_rect(rect),
+        }
+    }
+
+    pub fn unapply(&self, rect: &TypedRect<f32, Dst>) -> Option<TypedRect<f32, Src>> {
+        match *self {
+            FastTransform::Offset(offset) =>
+                Some(TypedRect::from_untyped(&rect.to_untyped().translate(&-offset.to_untyped()))),
+            FastTransform::Transform { inverse: Some(ref inverse), is_2d: true, .. }  =>
+                Some(inverse.transform_rect(&rect)),
+            FastTransform::Transform { ref transform, is_2d: false, .. } =>
+                Some(transform.inverse_rect_footprint(rect)),
+            FastTransform::Transform { inverse: None, .. }  => None,
+        }
+    }
+
+    #[inline(always)]
+    pub fn offset(&self, new_offset: TypedVector2D<f32, Src>) -> Self {
+        match *self {
+            FastTransform::Offset(offset) => FastTransform::Offset(offset + new_offset),
+            FastTransform::Transform { ref transform, .. } => {
+                let transform = transform.pre_translate(new_offset.to_3d());
+                FastTransform::with_transform(transform)
+            }
+        }
+    }
+
+    pub fn post_translate(&self, new_offset: TypedVector2D<f32, Dst>) -> Self {
+        match *self {
+            FastTransform::Offset(offset) => {
+                let offset = offset.to_untyped() + new_offset.to_untyped();
+                FastTransform::Offset(TypedVector2D::from_untyped(&offset))
+            }
+            FastTransform::Transform { ref transform, .. } => {
+                let transform = transform.post_translate(new_offset.to_3d());
+                FastTransform::with_transform(transform)
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub fn inverse(&self) -> Option<FastTransform<Dst, Src>> {
+        match *self {
+            FastTransform::Offset(offset) =>
+                Some(FastTransform::Offset(TypedVector2D::new(-offset.x, -offset.y))),
+            FastTransform::Transform { transform, inverse: Some(inverse), is_2d, } =>
+                Some(FastTransform::Transform {
+                    transform: inverse,
+                    inverse: Some(transform),
+                    is_2d
+                }),
+            FastTransform::Transform { inverse: None, .. } => None,
+
+        }
+    }
+
+    pub fn update(&self, transform: TypedTransform3D<f32, Src, Dst>) -> Option<Self> {
+        if transform.is_simple_2d_translation() {
+            Some(self.offset(TypedVector2D::new(transform.m41, transform.m42)))
         } else {
             // If we break 2D axis alignment or have a perspective component, we need to start a
             // new incompatible coordinate system with which we cannot share clips without masking.
             None
         }
     }
 }
+
+impl<Src, Dst> From<TypedTransform3D<f32, Src, Dst>> for FastTransform<Src, Dst> {
+    fn from(transform: TypedTransform3D<f32, Src, Dst>) -> FastTransform<Src, Dst> {
+        FastTransform::with_transform(transform)
+    }
+}
+
+impl<Src, Dst> Into<TypedTransform3D<f32, Src, Dst>> for FastTransform<Src, Dst> {
+    fn into(self) -> TypedTransform3D<f32, Src, Dst> {
+        self.to_transform()
+    }
+}
+
+impl<Src, Dst> From<TypedVector2D<f32, Src>> for FastTransform<Src, Dst> {
+    fn from(vector: TypedVector2D<f32, Src>) -> FastTransform<Src, Dst> {
+        FastTransform::with_vector(vector)
+    }
+}
+
+pub type LayoutFastTransform = FastTransform<LayoutPixel, LayoutPixel>;
+pub type LayerFastTransform = FastTransform<LayerPixel, LayerPixel>;
+pub type LayerToWorldFastTransform = FastTransform<LayerPixel, WorldPixel>;
+pub type WorldToLayerFastTransform = FastTransform<WorldPixel, LayerPixel>;
--- a/gfx/webrender/tests/angle_shader_validation.rs
+++ b/gfx/webrender/tests/angle_shader_validation.rs
@@ -50,24 +50,16 @@ const SHADERS: &[Shader] = &[
         name: "ps_border_corner",
         features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_border_edge",
         features: PRIM_FEATURES,
     },
     Shader {
-        name: "ps_gradient",
-        features: PRIM_FEATURES,
-    },
-    Shader {
-        name: "ps_angle_gradient",
-        features: PRIM_FEATURES,
-    },
-    Shader {
         name: "ps_hardware_composite",
         features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_split_composite",
         features: PRIM_FEATURES,
     },
     Shader {
@@ -76,17 +68,17 @@ const SHADERS: &[Shader] = &[
     },
     Shader {
         name: "ps_text_run",
         features: PRIM_FEATURES,
     },
     // Brush shaders
     Shader {
         name: "brush_yuv_image",
-        features: &["", "YUV_NV12", "YUV_PLANAR", "YUV_INTERLEAVED"],
+        features: &["", "YUV_NV12", "YUV_PLANAR", "YUV_INTERLEAVED", "YUV_NV12,TEXTURE_RECT"],
     },
     Shader {
         name: "brush_mask",
         features: &[],
     },
     Shader {
         name: "brush_solid",
         features: &[],
@@ -104,16 +96,20 @@ const SHADERS: &[Shader] = &[
         features: &[],
     },
     Shader {
         name: "brush_line",
         features: &[],
     },
     Shader {
         name: "brush_radial_gradient",
+        features: &[ "DITHERING" ],
+    },
+    Shader {
+        name: "brush_linear_gradient",
         features: &[],
     },
 ];
 
 const VERSION_STRING: &str = "#version 300 es\n";
 
 #[test]
 fn validate_shaders() {
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-e8d2ffb404a85651fe08a6d09abbece9bd2b9182
+8a19316a733a484bf9bafb8257e3008b1418bfe4