Bug 1391614 - Update webrender to commit 310af2613e7508b22cad11e734b8c47e66447cc7. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Thu, 24 Aug 2017 10:27:22 -0400
changeset 652175 85fbdd97a7750adb7ef1ad3a45f1a31656a39bd2
parent 652168 c66094c3ea2ae521dcd07f4b2e393b3c0e00bd51
child 728000 29a6f91a3884ef22b5b6839b702491f614d776d2
push id75964
push userkgupta@mozilla.com
push dateThu, 24 Aug 2017 14:27:50 +0000
reviewersjrmuizel
bugs1391614
milestone57.0a1
Bug 1391614 - Update webrender to commit 310af2613e7508b22cad11e734b8c47e66447cc7. r?jrmuizel MozReview-Commit-ID: 5eHEaGU5jRv
gfx/doc/README.webrender
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_blur.fs.glsl
gfx/webrender/res/cs_blur.glsl
gfx/webrender/res/cs_blur.vs.glsl
gfx/webrender/res/cs_box_shadow.fs.glsl
gfx/webrender/res/cs_box_shadow.glsl
gfx/webrender/res/cs_box_shadow.vs.glsl
gfx/webrender/res/cs_clip_border.fs.glsl
gfx/webrender/res/cs_clip_border.glsl
gfx/webrender/res/cs_clip_border.vs.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_image.vs.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/cs_clip_rectangle.vs.glsl
gfx/webrender/res/cs_text_run.fs.glsl
gfx/webrender/res/cs_text_run.glsl
gfx/webrender/res/cs_text_run.vs.glsl
gfx/webrender/res/debug_color.fs.glsl
gfx/webrender/res/debug_color.glsl
gfx/webrender/res/debug_color.vs.glsl
gfx/webrender/res/debug_font.fs.glsl
gfx/webrender/res/debug_font.glsl
gfx/webrender/res/debug_font.vs.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_angle_gradient.fs.glsl
gfx/webrender/res/ps_angle_gradient.glsl
gfx/webrender/res/ps_angle_gradient.vs.glsl
gfx/webrender/res/ps_blend.glsl
gfx/webrender/res/ps_blend.vs.glsl
gfx/webrender/res/ps_border_corner.fs.glsl
gfx/webrender/res/ps_border_corner.glsl
gfx/webrender/res/ps_border_corner.vs.glsl
gfx/webrender/res/ps_border_edge.fs.glsl
gfx/webrender/res/ps_border_edge.glsl
gfx/webrender/res/ps_border_edge.vs.glsl
gfx/webrender/res/ps_box_shadow.glsl
gfx/webrender/res/ps_box_shadow.vs.glsl
gfx/webrender/res/ps_cache_image.fs.glsl
gfx/webrender/res/ps_cache_image.glsl
gfx/webrender/res/ps_cache_image.vs.glsl
gfx/webrender/res/ps_clear.vs.glsl
gfx/webrender/res/ps_composite.fs.glsl
gfx/webrender/res/ps_composite.glsl
gfx/webrender/res/ps_composite.vs.glsl
gfx/webrender/res/ps_gradient.glsl
gfx/webrender/res/ps_gradient.vs.glsl
gfx/webrender/res/ps_hardware_composite.glsl
gfx/webrender/res/ps_hardware_composite.vs.glsl
gfx/webrender/res/ps_image.fs.glsl
gfx/webrender/res/ps_image.glsl
gfx/webrender/res/ps_image.vs.glsl
gfx/webrender/res/ps_line.fs.glsl
gfx/webrender/res/ps_line.glsl
gfx/webrender/res/ps_line.vs.glsl
gfx/webrender/res/ps_radial_gradient.glsl
gfx/webrender/res/ps_radial_gradient.vs.glsl
gfx/webrender/res/ps_rectangle.glsl
gfx/webrender/res/ps_rectangle.vs.glsl
gfx/webrender/res/ps_split_composite.fs.glsl
gfx/webrender/res/ps_split_composite.glsl
gfx/webrender/res/ps_split_composite.vs.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/ps_text_run.vs.glsl
gfx/webrender/res/ps_yuv_image.fs.glsl
gfx/webrender/res/ps_yuv_image.glsl
gfx/webrender/res/ps_yuv_image.vs.glsl
gfx/webrender/res/shared_border.glsl
gfx/webrender/src/debug_render.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/tiling.rs
gfx/webrender/tests/angle_shader_validation.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -74,9 +74,9 @@ there is another crate in m-c called moz
 the same folder to store its rust dependencies. If one of the libraries that is
 required by both mozjs_sys and webrender is updated without updating the other
 project's Cargo.lock file, that results in build bustage.
 This means that any time you do this sort of manual update of packages, you need
 to make sure that mozjs_sys also has its Cargo.lock file updated if needed, hence
 the need to run the cargo update command in js/src as well. Hopefully this will
 be resolved soon.
 
-Latest Commit: 1007a65c6dd1fdfb8b39d57d7faff3cae7b32e0c
+Latest Commit: 310af2613e7508b22cad11e734b8c47e66447cc7
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifdef WR_VERTEX_SHADER
 
 #define SEGMENT_ALL         0
 #define SEGMENT_CORNER_TL   1
--- a/gfx/webrender/res/cs_blur.fs.glsl
+++ b/gfx/webrender/res/cs_blur.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // TODO(gw): Write a fast path blur that handles smaller blur radii
 //           with a offset / weight uniform table and a constant
 //           loop iteration count!
 
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -1,10 +1,11 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec3 vUv;
 flat varying vec4 vUvRect;
 flat varying vec2 vOffsetScale;
 flat varying float vSigma;
 flat varying int vBlurRadius;
--- a/gfx/webrender/res/cs_blur.vs.glsl
+++ b/gfx/webrender/res/cs_blur.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Applies a separable gaussian blur in one direction, as specified
 // by the dir field in the blur command.
 
 #define DIR_HORIZONTAL  0
--- a/gfx/webrender/res/cs_box_shadow.fs.glsl
+++ b/gfx/webrender/res/cs_box_shadow.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // See http://asciimath.org to render the equations here.
 
 // The Gaussian function used for blurring:
 //
--- a/gfx/webrender/res/cs_box_shadow.glsl
+++ b/gfx/webrender/res/cs_box_shadow.glsl
@@ -1,10 +1,11 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec2 vPos;
 flat varying vec2 vBorderRadii;
 flat varying float vBlurRadius;
 flat varying vec4 vBoxShadowRect;
 flat varying float vInverted;
--- a/gfx/webrender/res/cs_box_shadow.vs.glsl
+++ b/gfx/webrender/res/cs_box_shadow.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     PrimitiveInstance pi = fetch_prim_instance();
     RenderTaskData task = fetch_render_task(pi.render_task_index);
     BoxShadow bs = fetch_boxshadow(pi.specific_prim_address);
--- a/gfx/webrender/res/cs_clip_border.fs.glsl
+++ b/gfx/webrender/res/cs_clip_border.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     vec2 local_pos = vPos.xy / vPos.z;
 
     // Get local space position relative to the clip center.
--- a/gfx/webrender/res/cs_clip_border.glsl
+++ b/gfx/webrender/res/cs_clip_border.glsl
@@ -1,14 +1,14 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared,clip_shared
+
 varying vec3 vPos;
 
 flat varying vec2 vClipCenter;
 
 flat varying vec4 vPoint_Tangent0;
 flat varying vec4 vPoint_Tangent1;
 flat varying vec3 vDotParams;
 flat varying vec2 vAlphaMask;
--- a/gfx/webrender/res/cs_clip_border.vs.glsl
+++ b/gfx/webrender/res/cs_clip_border.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Matches BorderCorner enum in border.rs
 #define CORNER_TOP_LEFT     0
 #define CORNER_TOP_RIGHT    1
 #define CORNER_BOTTOM_LEFT  2
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -1,10 +1,10 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared,clip_shared
+
 varying vec3 vPos;
 flat varying vec4 vClipMaskUvRect;
 flat varying vec4 vClipMaskUvInnerRect;
 flat varying float vLayer;
--- a/gfx/webrender/res/cs_clip_image.vs.glsl
+++ b/gfx/webrender/res/cs_clip_image.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 struct ImageMaskData {
     RectWithSize local_rect;
 };
 
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -1,12 +1,12 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared,clip_shared
+
 varying vec3 vPos;
 flat varying float vClipMode;
 flat varying vec4 vClipCenter_Radius_TL;
 flat varying vec4 vClipCenter_Radius_TR;
 flat varying vec4 vClipCenter_Radius_BL;
 flat varying vec4 vClipCenter_Radius_BR;
--- a/gfx/webrender/res/cs_clip_rectangle.vs.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 struct ClipRect {
     RectWithSize rect;
     vec4 mode;
 };
--- a/gfx/webrender/res/cs_text_run.fs.glsl
+++ b/gfx/webrender/res/cs_text_run.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     float a = texture(sColor0, vUv).a;
     oFragColor = vec4(vColor.rgb, vColor.a * a);
 }
--- a/gfx/webrender/res/cs_text_run.glsl
+++ b/gfx/webrender/res/cs_text_run.glsl
@@ -1,7 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec3 vUv;
 flat varying vec4 vColor;
--- a/gfx/webrender/res/cs_text_run.vs.glsl
+++ b/gfx/webrender/res/cs_text_run.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Draw a text run to a cache target. These are always
 // drawn un-transformed. These are used for effects such
 // as text-shadow.
 
deleted file mode 100644
--- a/gfx/webrender/res/debug_color.fs.glsl
+++ /dev/null
@@ -1,10 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-varying vec4 vColor;
-
-void main(void)
-{
-    oFragColor = vColor;
-}
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/debug_color.glsl
@@ -0,0 +1,24 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,shared_other
+
+varying vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aColor;
+
+void main(void) {
+    vColor = aColor;
+    vec4 pos = vec4(aPosition, 1.0);
+    pos.xy = floor(pos.xy * uDevicePixelRatio + 0.5) / uDevicePixelRatio;
+    gl_Position = uTransform * pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    oFragColor = vColor;
+}
+#endif
deleted file mode 100644
--- a/gfx/webrender/res/debug_color.vs.glsl
+++ /dev/null
@@ -1,14 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-in vec4 aColor;
-varying vec4 vColor;
-
-void main(void)
-{
-    vColor = aColor;
-    vec4 pos = vec4(aPosition, 1.0);
-    pos.xy = floor(pos.xy * uDevicePixelRatio + 0.5) / uDevicePixelRatio;
-    gl_Position = uTransform * pos;
-}
deleted file mode 100644
--- a/gfx/webrender/res/debug_font.fs.glsl
+++ /dev/null
@@ -1,12 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-varying vec2 vColorTexCoord;
-varying vec4 vColor;
-
-void main(void)
-{
-    float alpha = texture(sColor0, vec3(vColorTexCoord.xy, 0.0)).r;
-    oFragColor = vec4(vColor.xyz, vColor.w * alpha);
-}
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/debug_font.glsl
@@ -0,0 +1,28 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,shared_other
+
+varying vec2 vColorTexCoord;
+varying vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aColor;
+in vec4 aColorTexCoord;
+
+void main(void) {
+    vColor = aColor;
+    vColorTexCoord = aColorTexCoord.xy;
+    vec4 pos = vec4(aPosition, 1.0);
+    pos.xy = floor(pos.xy * uDevicePixelRatio + 0.5) / uDevicePixelRatio;
+    gl_Position = uTransform * pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float alpha = texture(sColor0, vec3(vColorTexCoord.xy, 0.0)).r;
+    oFragColor = vec4(vColor.xyz, vColor.w * alpha);
+}
+#endif
deleted file mode 100644
--- a/gfx/webrender/res/debug_font.vs.glsl
+++ /dev/null
@@ -1,18 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-in vec4 aColor;
-in vec4 aColorTexCoord;
-
-varying vec2 vColorTexCoord;
-varying vec4 vColor;
-
-void main(void)
-{
-    vColor = aColor;
-    vColorTexCoord = aColorTexCoord.xy;
-    vec4 pos = vec4(aPosition, 1.0);
-    pos.xy = floor(pos.xy * uDevicePixelRatio + 0.5) / uDevicePixelRatio;
-    gl_Position = uTransform * pos;
-}
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -1,42 +1,12 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define PST_TOP_LEFT     0
-#define PST_TOP          1
-#define PST_TOP_RIGHT    2
-#define PST_RIGHT        3
-#define PST_BOTTOM_RIGHT 4
-#define PST_BOTTOM       5
-#define PST_BOTTOM_LEFT  6
-#define PST_LEFT         7
-
-#define BORDER_LEFT      0
-#define BORDER_TOP       1
-#define BORDER_RIGHT     2
-#define BORDER_BOTTOM    3
-
-// Border styles as defined in webrender_api/types.rs
-#define BORDER_STYLE_NONE         0
-#define BORDER_STYLE_SOLID        1
-#define BORDER_STYLE_DOUBLE       2
-#define BORDER_STYLE_DOTTED       3
-#define BORDER_STYLE_DASHED       4
-#define BORDER_STYLE_HIDDEN       5
-#define BORDER_STYLE_GROOVE       6
-#define BORDER_STYLE_RIDGE        7
-#define BORDER_STYLE_INSET        8
-#define BORDER_STYLE_OUTSET       9
-
-#define UV_NORMALIZED    uint(0)
-#define UV_PIXEL         uint(1)
-
 #define EXTEND_MODE_CLAMP  0
 #define EXTEND_MODE_REPEAT 1
 
 #define LINE_STYLE_SOLID        0
 #define LINE_STYLE_DOTTED       1
 #define LINE_STYLE_DASHED       2
 #define LINE_STYLE_WAVY         3
 
@@ -327,95 +297,16 @@ struct RadialGradient {
     vec4 tile_size_repeat;
 };
 
 RadialGradient fetch_radial_gradient(int address) {
     vec4 data[3] = fetch_from_resource_cache_3(address);
     return RadialGradient(data[0], data[1], data[2]);
 }
 
-struct Border {
-    vec4 style;
-    vec4 widths;
-    vec4 colors[4];
-    vec4 radii[2];
-};
-
-vec4 get_effective_border_widths(Border border, int style) {
-    switch (style) {
-        case BORDER_STYLE_DOUBLE:
-            // Calculate the width of a border segment in a style: double
-            // border. Round to the nearest CSS pixel.
-
-            // The CSS spec doesn't define what width each of the segments
-            // in a style: double border should be. It only says that the
-            // sum of the segments should be equal to the total border
-            // width. We pick to make the segments (almost) equal thirds
-            // for now - we can adjust this if we find other browsers pick
-            // different values in some cases.
-            // SEE: https://drafts.csswg.org/css-backgrounds-3/#double
-            return floor(0.5 + border.widths / 3.0);
-        case BORDER_STYLE_GROOVE:
-        case BORDER_STYLE_RIDGE:
-            return floor(0.5 + border.widths * 0.5);
-        default:
-            return border.widths;
-    }
-}
-
-Border fetch_border(int address) {
-    vec4 data[8] = fetch_from_resource_cache_8(address);
-    return Border(data[0], data[1],
-                  vec4[4](data[2], data[3], data[4], data[5]),
-                  vec4[2](data[6], data[7]));
-}
-
-struct BorderCorners {
-    vec2 tl_outer;
-    vec2 tl_inner;
-    vec2 tr_outer;
-    vec2 tr_inner;
-    vec2 br_outer;
-    vec2 br_inner;
-    vec2 bl_outer;
-    vec2 bl_inner;
-};
-
-BorderCorners get_border_corners(Border border, RectWithSize local_rect) {
-    vec2 tl_outer = local_rect.p0;
-    vec2 tl_inner = tl_outer + vec2(max(border.radii[0].x, border.widths.x),
-                                    max(border.radii[0].y, border.widths.y));
-
-    vec2 tr_outer = vec2(local_rect.p0.x + local_rect.size.x,
-                         local_rect.p0.y);
-    vec2 tr_inner = tr_outer + vec2(-max(border.radii[0].z, border.widths.z),
-                                    max(border.radii[0].w, border.widths.y));
-
-    vec2 br_outer = vec2(local_rect.p0.x + local_rect.size.x,
-                         local_rect.p0.y + local_rect.size.y);
-    vec2 br_inner = br_outer - vec2(max(border.radii[1].x, border.widths.z),
-                                    max(border.radii[1].y, border.widths.w));
-
-    vec2 bl_outer = vec2(local_rect.p0.x,
-                         local_rect.p0.y + local_rect.size.y);
-    vec2 bl_inner = bl_outer + vec2(max(border.radii[1].z, border.widths.x),
-                                    -max(border.radii[1].w, border.widths.w));
-
-    return BorderCorners(
-        tl_outer,
-        tl_inner,
-        tr_outer,
-        tr_inner,
-        br_outer,
-        br_inner,
-        bl_outer,
-        bl_inner
-    );
-}
-
 struct Glyph {
     vec2 offset;
 };
 
 Glyph fetch_glyph(int specific_prim_address,
                   int glyph_index,
                   int subpx_dir) {
     // Two glyphs are packed in each texel in the GPU cache.
--- a/gfx/webrender/res/ps_angle_gradient.fs.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.fs.glsl
@@ -1,10 +1,8 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     vec2 pos = mod(vPos, vTileRepeat);
 
     if (pos.x >= vTileSize.x ||
--- a/gfx/webrender/res/ps_angle_gradient.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 flat varying int vGradientAddress;
 flat varying float vGradientRepeat;
 
 flat varying vec2 vScaledDir;
 flat varying vec2 vStartPoint;
 
 flat varying vec2 vTileSize;
 flat varying vec2 vTileRepeat;
--- a/gfx/webrender/res/ps_angle_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.specific_prim_address);
 
--- a/gfx/webrender/res/ps_blend.glsl
+++ b/gfx/webrender/res/ps_blend.glsl
@@ -1,8 +1,10 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec3 vUv;
 flat varying vec4 vUvBounds;
 flat varying float vAmount;
 flat varying int vOp;
--- a/gfx/webrender/res/ps_blend.vs.glsl
+++ b/gfx/webrender/res/ps_blend.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
     AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
     AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
--- a/gfx/webrender/res/ps_border_corner.fs.glsl
+++ b/gfx/webrender/res/ps_border_corner.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
     alpha = 0.0;
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -1,13 +1,14 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared,shared_border
+
 // Edge color transition
 flat varying vec4 vColor00;
 flat varying vec4 vColor01;
 flat varying vec4 vColor10;
 flat varying vec4 vColor11;
 flat varying vec4 vColorEdgeLine;
 
 // Border radius
--- a/gfx/webrender/res/ps_border_corner.vs.glsl
+++ b/gfx/webrender/res/ps_border_corner.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Matches BorderCornerSide enum in border.rs
 #define SIDE_BOTH       0
 #define SIDE_FIRST      1
 #define SIDE_SECOND     2
--- a/gfx/webrender/res/ps_border_edge.fs.glsl
+++ b/gfx/webrender/res/ps_border_edge.fs.glsl
@@ -1,10 +1,8 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
     alpha = 0.0;
--- a/gfx/webrender/res/ps_border_edge.glsl
+++ b/gfx/webrender/res/ps_border_edge.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared,shared_border
+
 flat varying vec4 vColor0;
 flat varying vec4 vColor1;
 flat varying vec2 vEdgeDistance;
 flat varying float vAxisSelect;
 flat varying float vAlphaSelect;
 flat varying vec4 vClipParams;
 flat varying float vClipSelect;
 
--- a/gfx/webrender/res/ps_border_edge.vs.glsl
+++ b/gfx/webrender/res/ps_border_edge.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void write_edge_distance(float p0,
                          float original_width,
                          float adjusted_width,
                          float style,
--- a/gfx/webrender/res/ps_box_shadow.glsl
+++ b/gfx/webrender/res/ps_box_shadow.glsl
@@ -1,9 +1,11 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 flat varying vec4 vColor;
 
 varying vec3 vUv;
 flat varying vec2 vMirrorPoint;
 flat varying vec4 vCacheUvRectCoords;
--- a/gfx/webrender/res/ps_box_shadow.vs.glsl
+++ b/gfx/webrender/res/ps_box_shadow.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #define BS_HEADER_VECS 4
 
 void main(void) {
     Primitive prim = load_primitive();
--- a/gfx/webrender/res/ps_cache_image.fs.glsl
+++ b/gfx/webrender/res/ps_cache_image.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
     oFragColor = texture(sColor0, vec3(uv, vUv.z));
 }
--- a/gfx/webrender/res/ps_cache_image.glsl
+++ b/gfx/webrender/res/ps_cache_image.glsl
@@ -1,6 +1,8 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec3 vUv;
 flat varying vec4 vUvBounds;
--- a/gfx/webrender/res/ps_cache_image.vs.glsl
+++ b/gfx/webrender/res/ps_cache_image.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // Draw a cached primitive (e.g. a blurred text run) from the
 // target cache to the framebuffer, applying tile clip boundaries.
 
 void main(void) {
--- a/gfx/webrender/res/ps_clear.vs.glsl
+++ b/gfx/webrender/res/ps_clear.vs.glsl
@@ -1,10 +1,8 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 in ivec4 aClearRectangle;
 
 void main() {
     vec4 rect = vec4(aClearRectangle);
--- a/gfx/webrender/res/ps_composite.fs.glsl
+++ b/gfx/webrender/res/ps_composite.fs.glsl
@@ -1,10 +1,8 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 float gauss(float x, float sigma) {
     if (sigma == 0.0)
         return 1.0;
     return (1.0 / sqrt(6.283185307179586 * sigma * sigma)) * exp(-(x * x) / (2.0 * sigma * sigma));
--- a/gfx/webrender/res/ps_composite.glsl
+++ b/gfx/webrender/res/ps_composite.glsl
@@ -1,7 +1,9 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec3 vUv0;
 varying vec3 vUv1;
 flat varying int vOp;
--- a/gfx/webrender/res/ps_composite.vs.glsl
+++ b/gfx/webrender/res/ps_composite.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
     AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
     ReadbackTask backdrop_task = fetch_readback_task(ci.backdrop_task_index);
--- a/gfx/webrender/res/ps_gradient.glsl
+++ b/gfx/webrender/res/ps_gradient.glsl
@@ -1,11 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec4 vColor;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 #else
 varying vec2 vPos;
 #endif
--- a/gfx/webrender/res/ps_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_gradient.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.specific_prim_address);
 
--- a/gfx/webrender/res/ps_hardware_composite.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.glsl
@@ -1,5 +1,7 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec3 vUv;
--- a/gfx/webrender/res/ps_hardware_composite.vs.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
     AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
     AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
--- a/gfx/webrender/res/ps_image.fs.glsl
+++ b/gfx/webrender/res/ps_image.fs.glsl
@@ -1,10 +1,8 @@
-#line 1
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
     float alpha = 0.0;
     vec2 pos = init_transform_fs(vLocalPos, alpha);
--- a/gfx/webrender/res/ps_image.glsl
+++ b/gfx/webrender/res/ps_image.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use non-normalized
 // texture coordinates. Otherwise, it uses normalized texture coordinates. Please
 // check GL_TEXTURE_RECTANGLE.
 flat varying vec2 vTextureOffset; // Offset of this image into the texture atlas.
 flat varying vec2 vTextureSize;   // Size of the image in the texture atlas.
 flat varying vec2 vTileSpacing;   // Amount of space between tiled instances of this image.
 flat varying vec4 vStRect;        // Rectangle of valid texture rect.
 flat varying float vLayer;
--- a/gfx/webrender/res/ps_image.vs.glsl
+++ b/gfx/webrender/res/ps_image.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Image image = fetch_image(prim.specific_prim_address);
     ImageResource res = fetch_image_resource(prim.user_data0);
--- a/gfx/webrender/res/ps_line.fs.glsl
+++ b/gfx/webrender/res/ps_line.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 float det(vec2 a, vec2 b) {
     return a.x * b.y - b.x * a.y;
 }
 
--- a/gfx/webrender/res/ps_line.glsl
+++ b/gfx/webrender/res/ps_line.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec4 vColor;
 flat varying int vStyle;
 flat varying float vAxisSelect;
 flat varying vec4 vParams;
 flat varying vec2 vLocalOrigin;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
--- a/gfx/webrender/res/ps_line.vs.glsl
+++ b/gfx/webrender/res/ps_line.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #define LINE_ORIENTATION_VERTICAL       0
 #define LINE_ORIENTATION_HORIZONTAL     1
 
 void main(void) {
--- a/gfx/webrender/res/ps_radial_gradient.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 flat varying int vGradientAddress;
 flat varying float vGradientRepeat;
 
 flat varying vec2 vStartCenter;
 flat varying vec2 vEndCenter;
 flat varying float vStartRadius;
 flat varying float vEndRadius;
 
--- a/gfx/webrender/res/ps_radial_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     RadialGradient gradient = fetch_radial_gradient(prim.specific_prim_address);
 
--- a/gfx/webrender/res/ps_rectangle.glsl
+++ b/gfx/webrender/res/ps_rectangle.glsl
@@ -1,9 +1,11 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec4 vColor;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 #endif
--- a/gfx/webrender/res/ps_rectangle.vs.glsl
+++ b/gfx/webrender/res/ps_rectangle.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Rectangle rect = fetch_rectangle(prim.specific_prim_address);
     vColor = rect.color;
--- a/gfx/webrender/res/ps_split_composite.fs.glsl
+++ b/gfx/webrender/res/ps_split_composite.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     bvec4 inside = lessThanEqual(vec4(vUvTaskBounds.xy, vUv.xy),
                                  vec4(vUv.xy, vUvTaskBounds.zw));
     if (all(inside)) {
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -1,8 +1,9 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 varying vec3 vUv;
 flat varying vec4 vUvTaskBounds;
 flat varying vec4 vUvSampleBounds;
--- a/gfx/webrender/res/ps_split_composite.vs.glsl
+++ b/gfx/webrender/res/ps_split_composite.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 struct SplitGeometry {
     vec3 points[4];
 };
 
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -1,11 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 flat varying vec4 vColor;
 varying vec3 vUv;
 flat varying vec4 vUvBorder;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 #endif
--- a/gfx/webrender/res/ps_text_run.vs.glsl
+++ b/gfx/webrender/res/ps_text_run.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
--- a/gfx/webrender/res/ps_yuv_image.fs.glsl
+++ b/gfx/webrender/res/ps_yuv_image.fs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #if !defined(WR_FEATURE_YUV_REC601) && !defined(WR_FEATURE_YUV_REC709)
 #define WR_FEATURE_YUV_REC601
 #endif
 
--- a/gfx/webrender/res/ps_yuv_image.glsl
+++ b/gfx/webrender/res/ps_yuv_image.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include shared,prim_shared
+
 // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use non-normalized
 // texture coordinates. Otherwise, it uses normalized texture coordinates. Please
 // check GL_TEXTURE_RECTANGLE.
 flat varying vec2 vTextureOffsetY; // Offset of the y plane into the texture atlas.
 flat varying vec2 vTextureOffsetU; // Offset of the u plane into the texture atlas.
 flat varying vec2 vTextureOffsetV; // Offset of the v plane into the texture atlas.
 flat varying vec2 vTextureSizeY;   // Size of the y plane in the texture atlas.
 flat varying vec2 vTextureSizeUv;  // Size of the u and v planes in the texture atlas.
--- a/gfx/webrender/res/ps_yuv_image.vs.glsl
+++ b/gfx/webrender/res/ps_yuv_image.vs.glsl
@@ -1,9 +1,8 @@
-#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/shared_border.glsl
@@ -0,0 +1,98 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef WR_VERTEX_SHADER
+
+// Border styles as defined in webrender_api/types.rs
+#define BORDER_STYLE_NONE         0
+#define BORDER_STYLE_SOLID        1
+#define BORDER_STYLE_DOUBLE       2
+#define BORDER_STYLE_DOTTED       3
+#define BORDER_STYLE_DASHED       4
+#define BORDER_STYLE_HIDDEN       5
+#define BORDER_STYLE_GROOVE       6
+#define BORDER_STYLE_RIDGE        7
+#define BORDER_STYLE_INSET        8
+#define BORDER_STYLE_OUTSET       9
+
+struct Border {
+    vec4 style;
+    vec4 widths;
+    vec4 colors[4];
+    vec4 radii[2];
+};
+
+struct BorderCorners {
+    vec2 tl_outer;
+    vec2 tl_inner;
+    vec2 tr_outer;
+    vec2 tr_inner;
+    vec2 br_outer;
+    vec2 br_inner;
+    vec2 bl_outer;
+    vec2 bl_inner;
+};
+
+vec4 get_effective_border_widths(Border border, int style) {
+    switch (style) {
+        case BORDER_STYLE_DOUBLE:
+            // Calculate the width of a border segment in a style: double
+            // border. Round to the nearest CSS pixel.
+
+            // The CSS spec doesn't define what width each of the segments
+            // in a style: double border should be. It only says that the
+            // sum of the segments should be equal to the total border
+            // width. We pick to make the segments (almost) equal thirds
+            // for now - we can adjust this if we find other browsers pick
+            // different values in some cases.
+            // SEE: https://drafts.csswg.org/css-backgrounds-3/#double
+            return floor(0.5 + border.widths / 3.0);
+        case BORDER_STYLE_GROOVE:
+        case BORDER_STYLE_RIDGE:
+            return floor(0.5 + border.widths * 0.5);
+        default:
+            return border.widths;
+    }
+}
+
+Border fetch_border(int address) {
+    vec4 data[8] = fetch_from_resource_cache_8(address);
+    return Border(data[0], data[1],
+                  vec4[4](data[2], data[3], data[4], data[5]),
+                  vec4[2](data[6], data[7]));
+}
+
+BorderCorners get_border_corners(Border border, RectWithSize local_rect) {
+    vec2 tl_outer = local_rect.p0;
+    vec2 tl_inner = tl_outer + vec2(max(border.radii[0].x, border.widths.x),
+                                    max(border.radii[0].y, border.widths.y));
+
+    vec2 tr_outer = vec2(local_rect.p0.x + local_rect.size.x,
+                         local_rect.p0.y);
+    vec2 tr_inner = tr_outer + vec2(-max(border.radii[0].z, border.widths.z),
+                                    max(border.radii[0].w, border.widths.y));
+
+    vec2 br_outer = vec2(local_rect.p0.x + local_rect.size.x,
+                         local_rect.p0.y + local_rect.size.y);
+    vec2 br_inner = br_outer - vec2(max(border.radii[1].x, border.widths.z),
+                                    max(border.radii[1].y, border.widths.w));
+
+    vec2 bl_outer = vec2(local_rect.p0.x,
+                         local_rect.p0.y + local_rect.size.y);
+    vec2 bl_inner = bl_outer + vec2(max(border.radii[1].z, border.widths.x),
+                                    -max(border.radii[1].w, border.widths.w));
+
+    return BorderCorners(
+        tl_outer,
+        tl_inner,
+        tr_outer,
+        tr_inner,
+        br_outer,
+        br_inner,
+        bl_outer,
+        bl_inner
+    );
+}
+
+#endif
--- a/gfx/webrender/src/debug_render.rs
+++ b/gfx/webrender/src/debug_render.rs
@@ -78,18 +78,22 @@ pub struct DebugRenderer {
     tri_vao: VAO,
     line_vertices: Vec<DebugColorVertex>,
     line_vao: VAO,
     color_program: Program,
 }
 
 impl DebugRenderer {
     pub fn new(device: &mut Device) -> DebugRenderer {
-        let font_program = device.create_program("debug_font", "shared_other", &DESC_FONT).unwrap();
-        let color_program = device.create_program("debug_color", "shared_other", &DESC_COLOR).unwrap();
+        let font_program = device.create_program("debug_font",
+                                                 "",
+                                                 &DESC_FONT).unwrap();
+        let color_program = device.create_program("debug_color",
+                                                  "",
+                                                  &DESC_COLOR).unwrap();
 
         let font_vao = device.create_vao(&DESC_FONT, 32);
         let line_vao = device.create_vao(&DESC_COLOR, 32);
         let tri_vao = device.create_vao(&DESC_COLOR, 32);
 
         let font_texture_id = device.create_texture_ids(1, TextureTarget::Array)[0];
         device.init_texture(font_texture_id,
                             debug_font_data::BMP_WIDTH,
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -41,20 +41,22 @@ const GL_FORMAT_A: gl::GLuint = gl::RED;
 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
 const GL_FORMAT_A: gl::GLuint = gl::ALPHA;
 
 const GL_FORMAT_BGRA_GL: gl::GLuint = gl::BGRA;
 
 const GL_FORMAT_BGRA_GLES: gl::GLuint = gl::BGRA_EXT;
 
 const SHADER_VERSION_GL: &str = "#version 150\n";
-
 const SHADER_VERSION_GLES: &str = "#version 300 es\n";
 
-static SHADER_PREAMBLE: &str = "shared";
+const SHADER_KIND_VERTEX: &str = "#define WR_VERTEX_SHADER\n";
+const SHADER_KIND_FRAGMENT: &str = "#define WR_FRAGMENT_SHADER\n";
+const SHADER_IMPORT: &str = "#include ";
+const SHADER_LINE_MARKER: &str = "#line 1\n";
 
 #[repr(u32)]
 pub enum DepthFunction {
     Less = gl::LESS,
     LessEqual = gl::LEQUAL,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -125,32 +127,101 @@ fn get_shader_version(gl: &gl::Gl) -> &'
             SHADER_VERSION_GL
         }
         gl::GlType::Gles => {
             SHADER_VERSION_GLES
         }
     }
 }
 
-fn get_optional_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> Option<String> {
+// Get a shader string by name, from the built in resources or
+// an override path, if supplied.
+fn get_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> Option<String> {
     if let Some(ref base) = *base_path {
         let shader_path = base.join(&format!("{}.glsl", shader_name));
         if shader_path.exists() {
             let mut source = String::new();
             File::open(&shader_path).unwrap().read_to_string(&mut source).unwrap();
             return Some(source);
         }
     }
 
-    shader_source::SHADERS.get(shader_name).and_then(|s| Some((*s).to_owned()))
+    shader_source::SHADERS.get(shader_name).map(|s| s.to_string())
+}
+
+// Parse a shader string for imports. Imports are recursively processed, and
+// prepended to the list of outputs.
+fn parse_shader_source(source: String, base_path: &Option<PathBuf>, output: &mut String) {
+    for line in source.lines() {
+        if line.starts_with(SHADER_IMPORT) {
+            let imports = line[SHADER_IMPORT.len()..].split(",");
+
+            // For each import, get the source, and recurse.
+            for import in imports {
+                if let Some(include) = get_shader_source(import, base_path) {
+                    parse_shader_source(include, base_path, output);
+                }
+            }
+        } else {
+            output.push_str(line);
+            output.push_str("\n");
+        }
+    }
 }
 
-fn get_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> String {
-    get_optional_shader_source(shader_name, base_path)
-        .expect(&format!("Couldn't get required shader: {}", shader_name))
+pub fn build_shader_strings(gl_version_string: &str,
+                            features: &str,
+                            base_filename: &str,
+                            override_path: &Option<PathBuf>) -> (String, String) {
+    // Construct a list of strings to be passed to the shader compiler.
+    let mut vs_source = String::new();
+    let mut fs_source = String::new();
+
+    // GLSL requires that the version number comes first.
+    vs_source.push_str(gl_version_string);
+    fs_source.push_str(gl_version_string);
+
+    // Define a constant depending on whether we are compiling VS or FS.
+    vs_source.push_str(SHADER_KIND_VERTEX);
+    fs_source.push_str(SHADER_KIND_FRAGMENT);
+
+    // Add any defines that were passed by the caller.
+    vs_source.push_str(features);
+    fs_source.push_str(features);
+
+    // Parse the main .glsl file, including any imports
+    // and append them to the list of sources.
+    let mut shared_result = String::new();
+    if let Some(shared_source) = get_shader_source(base_filename, override_path) {
+        parse_shader_source(shared_source,
+            override_path,
+            &mut shared_result);
+    }
+
+    vs_source.push_str(SHADER_LINE_MARKER);
+    vs_source.push_str(&shared_result);
+    fs_source.push_str(SHADER_LINE_MARKER);
+    fs_source.push_str(&shared_result);
+
+    // Append legacy (.vs and .fs) files if they exist.
+    // TODO(gw): Once all shaders are ported to just use the
+    //           .glsl file, we can remove this code.
+    let vs_name = format!("{}.vs", base_filename);
+    if let Some(old_vs_source) = get_shader_source(&vs_name, override_path) {
+        vs_source.push_str(SHADER_LINE_MARKER);
+        vs_source.push_str(&old_vs_source);
+    }
+
+    let fs_name = format!("{}.fs", base_filename);
+    if let Some(old_fs_source) = get_shader_source(&fs_name, override_path) {
+        fs_source.push_str(SHADER_LINE_MARKER);
+        fs_source.push_str(&old_fs_source);
+    }
+
+    (vs_source, fs_source)
 }
 
 pub trait FileWatcherHandler : Send {
     fn file_changed(&self, path: PathBuf);
 }
 
 impl VertexAttributeKind {
     fn size_in_bytes(&self) -> u32 {
@@ -323,53 +394,16 @@ impl Drop for Texture {
         self.gl.delete_textures(&[self.id]);
     }
 }
 
 pub struct Program {
     id: gl::GLuint,
     u_transform: gl::GLint,
     u_device_pixel_ratio: gl::GLint,
-    name: String,
-    vs_source: String,
-    fs_source: String,
-    prefix: Option<String>,
-    vs_id: Option<gl::GLuint>,
-    fs_id: Option<gl::GLuint>,
-}
-
-impl Program {
-    fn attach_and_bind_shaders(&mut self,
-                               vs_id: gl::GLuint,
-                               fs_id: gl::GLuint,
-                               descriptor: &VertexDescriptor,
-                               gl: &gl::Gl) -> Result<(), ShaderError> {
-        gl.attach_shader(self.id, vs_id);
-        gl.attach_shader(self.id, fs_id);
-
-        for (i, attr) in descriptor.vertex_attributes
-                                   .iter()
-                                   .chain(descriptor.instance_attributes.iter())
-                                   .enumerate() {
-            gl.bind_attrib_location(self.id,
-                                    i as gl::GLuint,
-                                    attr.name);
-        }
-
-        gl.link_program(self.id);
-        if gl.get_program_iv(self.id, gl::LINK_STATUS) == (0 as gl::GLint) {
-            let error_log = gl.get_program_info_log(self.id);
-            println!("Failed to link shader program: {:?}\n{}", self.name, error_log);
-            gl.detach_shader(self.id, vs_id);
-            gl.detach_shader(self.id, fs_id);
-            return Err(ShaderError::Link(self.name.clone(), error_log));
-        }
-
-        Ok(())
-    }
 }
 
 impl Drop for Program {
     fn drop(&mut self) {
         debug_assert!(thread::panicking() || self.id == 0, "renderer::deinit not called");
     }
 }
 
@@ -709,31 +743,27 @@ pub struct Device {
 
     // debug
     inside_frame: bool,
 
     // resources
     resource_override_path: Option<PathBuf>,
     textures: FastHashMap<TextureId, Texture>,
 
-    // misc.
-    shader_preamble: String,
-
     max_texture_size: u32,
 
     // Frame counter. This is used to map between CPU
     // frames and GPU frames.
     frame_id: FrameId,
 }
 
 impl Device {
     pub fn new(gl: Rc<gl::Gl>,
                resource_override_path: Option<PathBuf>,
                _file_changed_handler: Box<FileWatcherHandler>) -> Device {
-        let shader_preamble = get_shader_source(SHADER_PREAMBLE, &resource_override_path);
         let max_texture_size = gl.get_integer_v(gl::MAX_TEXTURE_SIZE) as u32;
 
         Device {
             gl,
             resource_override_path,
             // This is initialized to 1 by default, but it is set
             // every frame by the call to begin_frame().
             device_pixel_ratio: 1.0,
@@ -749,18 +779,16 @@ impl Device {
             bound_pbo: PBOId(0),
             bound_read_fbo: FBOId(0),
             bound_draw_fbo: FBOId(0),
             default_read_fbo: 0,
             default_draw_fbo: 0,
 
             textures: FastHashMap::default(),
 
-            shader_preamble,
-
             max_texture_size,
             frame_id: FrameId(0),
         }
     }
 
     pub fn gl(&self) -> &gl::Gl {
         &*self.gl
     }
@@ -774,33 +802,22 @@ impl Device {
     }
 
     pub fn get_capabilities(&self) -> &Capabilities {
         &self.capabilities
     }
 
     pub fn compile_shader(gl: &gl::Gl,
                           name: &str,
-                          source_str: &str,
                           shader_type: gl::GLenum,
-                          shader_preamble: &[String])
+                          source: String)
                           -> Result<gl::GLuint, ShaderError> {
         debug!("compile {:?}", name);
-
-        let mut s = String::new();
-        s.push_str(get_shader_version(gl));
-        for prefix in shader_preamble {
-            s.push_str(prefix);
-        }
-        s.push_str(source_str);
-
         let id = gl.create_shader(shader_type);
-        let mut source = Vec::new();
-        source.extend_from_slice(s.as_bytes());
-        gl.shader_source(id, &[&source[..]]);
+        gl.shader_source(id, &[source.as_bytes()]);
         gl.compile_shader(id);
         let log = gl.get_shader_info_log(id);
         if gl.get_shader_iv(id, gl::COMPILE_STATUS) == (0 as gl::GLint) {
             println!("Failed to compile shader: {:?}\n{}", name, log);
             Err(ShaderError::Compilation(name.to_string(), log))
         } else {
             if !log.is_empty() {
                 println!("Warnings detected on shader: {:?}\n{}", name, log);
@@ -1194,138 +1211,100 @@ impl Device {
         }
 
         texture.format = ImageFormat::Invalid;
         texture.width = 0;
         texture.height = 0;
         texture.layer_count = 0;
     }
 
-    pub fn create_program(&mut self,
-                          base_filename: &str,
-                          include_filename: &str,
-                          descriptor: &VertexDescriptor) -> Result<Program, ShaderError> {
-        self.create_program_with_prefix(base_filename,
-                                        &[include_filename],
-                                        None,
-                                        descriptor)
-    }
-
     pub fn delete_program(&mut self, mut program: Program) {
         self.gl.delete_program(program.id);
         program.id = 0;
     }
 
-    pub fn create_program_with_prefix(&mut self,
-                                      base_filename: &str,
-                                      include_filenames: &[&str],
-                                      prefix: Option<String>,
-                                      descriptor: &VertexDescriptor) -> Result<Program, ShaderError> {
-        debug_assert!(self.inside_frame);
-
-        let pid = self.gl.create_program();
-
-        let mut vs_name = String::from(base_filename);
-        vs_name.push_str(".vs");
-        let mut fs_name = String::from(base_filename);
-        fs_name.push_str(".fs");
-
-        let mut include = format!("// Base shader: {}\n", base_filename);
-        for inc_filename in include_filenames {
-            let src = get_shader_source(inc_filename, &self.resource_override_path);
-            include.push_str(&src);
-        }
-
-        if let Some(shared_src) = get_optional_shader_source(base_filename, &self.resource_override_path) {
-            include.push_str(&shared_src);
-        }
-
-        let mut program = Program {
-            name: base_filename.to_owned(),
-            id: pid,
-            u_transform: -1,
-            u_device_pixel_ratio: -1,
-            vs_source: get_shader_source(&vs_name, &self.resource_override_path),
-            fs_source: get_shader_source(&fs_name, &self.resource_override_path),
-            prefix,
-            vs_id: None,
-            fs_id: None,
-        };
-
-        try!{ self.load_program(&mut program, include, descriptor) };
-
-        Ok(program)
-    }
-
-    fn load_program(&mut self,
-                    program: &mut Program,
-                    include: String,
-                    descriptor: &VertexDescriptor) -> Result<(), ShaderError> {
+    pub fn create_program(&mut self,
+                          base_filename: &str,
+                          features: &str,
+                          descriptor: &VertexDescriptor) -> Result<Program, ShaderError> {
         debug_assert!(self.inside_frame);
 
-        let mut vs_preamble = Vec::new();
-        let mut fs_preamble = Vec::new();
+        let gl_version_string = get_shader_version(&*self.gl);
+
+        let (vs_source, fs_source) = build_shader_strings(gl_version_string,
+                                                          features,
+                                                          base_filename,
+                                                          &self.resource_override_path);
+
+        // Compile the vertex shader
+        let vs_id = match Device::compile_shader(&*self.gl,
+                                                 base_filename,
+                                                 gl::VERTEX_SHADER,
+                                                 vs_source) {
+            Ok(vs_id) => vs_id,
+            Err(err) => return Err(err),
+        };
 
-        vs_preamble.push("#define WR_VERTEX_SHADER\n".to_owned());
-        fs_preamble.push("#define WR_FRAGMENT_SHADER\n".to_owned());
+        // Compiler the fragment shader
+        let fs_id = match Device::compile_shader(&*self.gl,
+                                                 base_filename,
+                                                 gl::FRAGMENT_SHADER,
+                                                 fs_source) {
+            Ok(fs_id) => fs_id,
+            Err(err) => {
+                self.gl.delete_shader(vs_id);
+                return Err(err);
+            }
+        };
 
-        if let Some(ref prefix) = program.prefix {
-            vs_preamble.push(prefix.clone());
-            fs_preamble.push(prefix.clone());
+        // Create program and attach shaders
+        let pid = self.gl.create_program();
+        self.gl.attach_shader(pid, vs_id);
+        self.gl.attach_shader(pid, fs_id);
+
+        // Bind vertex attributes
+        for (i, attr) in descriptor.vertex_attributes
+                                   .iter()
+                                   .chain(descriptor.instance_attributes.iter())
+                                   .enumerate() {
+            self.gl.bind_attrib_location(pid,
+                                         i as gl::GLuint,
+                                         attr.name);
         }
 
-        vs_preamble.push(self.shader_preamble.to_owned());
-        fs_preamble.push(self.shader_preamble.to_owned());
-
-        vs_preamble.push(include.clone());
-        fs_preamble.push(include);
+        // Link!
+        self.gl.link_program(pid);
 
-        // todo(gw): store shader ids so they can be freed!
-        let vs_id = try!{ Device::compile_shader(&*self.gl,
-                                                 &program.name,
-                                                 &program.vs_source,
-                                                 gl::VERTEX_SHADER,
-                                                 &vs_preamble) };
-        let fs_id = try!{ Device::compile_shader(&*self.gl,
-                                                 &program.name,
-                                                 &program.fs_source,
-                                                 gl::FRAGMENT_SHADER,
-                                                 &fs_preamble) };
+        // GL recommends detaching and deleting shaders once the link
+        // is complete (whether successful or not). This allows the driver
+        // to free any memory associated with the parsing and compilation.
+        self.gl.detach_shader(pid, vs_id);
+        self.gl.detach_shader(pid, fs_id);
+        self.gl.delete_shader(vs_id);
+        self.gl.delete_shader(fs_id);
 
-        if let Some(vs_id) = program.vs_id {
-            self.gl.detach_shader(program.id, vs_id);
+        if self.gl.get_program_iv(pid, gl::LINK_STATUS) == (0 as gl::GLint) {
+            let error_log = self.gl.get_program_info_log(pid);
+            println!("Failed to link shader program: {:?}\n{}", base_filename, error_log);
+            self.gl.delete_program(pid);
+            return Err(ShaderError::Link(base_filename.to_string(), error_log));
         }
 
-        if let Some(fs_id) = program.fs_id {
-            self.gl.detach_shader(program.id, fs_id);
-        }
+        let u_transform = self.gl.get_uniform_location(pid, "uTransform");
+        let u_device_pixel_ratio = self.gl.get_uniform_location(pid, "uDevicePixelRatio");
 
-        if let Err(bind_error) = program.attach_and_bind_shaders(vs_id, fs_id, descriptor, &*self.gl) {
-            if let (Some(vs_id), Some(fs_id)) = (program.vs_id, program.fs_id) {
-                try! { program.attach_and_bind_shaders(vs_id, fs_id, descriptor, &*self.gl) };
-            } else {
-               return Err(bind_error);
-            }
-        } else {
-            if let Some(vs_id) = program.vs_id {
-                self.gl.delete_shader(vs_id);
-            }
+        let program = Program {
+            id: pid,
+            u_transform,
+            u_device_pixel_ratio,
+        };
 
-            if let Some(fs_id) = program.fs_id {
-                self.gl.delete_shader(fs_id);
-            }
+        self.bind_program(&program);
 
-            program.vs_id = Some(vs_id);
-            program.fs_id = Some(fs_id);
-        }
-
-        program.u_transform = self.gl.get_uniform_location(program.id, "uTransform");
-        program.u_device_pixel_ratio = self.gl.get_uniform_location(program.id, "uDevicePixelRatio");
-
-        self.bind_program(program);
+        // TODO(gw): Abstract these to not be part of the device code!
         let u_color_0 = self.gl.get_uniform_location(program.id, "sColor0");
         if u_color_0 != -1 {
             self.gl.uniform_1i(u_color_0, TextureSampler::Color0 as i32);
         }
         let u_color1 = self.gl.get_uniform_location(program.id, "sColor1");
         if u_color1 != -1 {
             self.gl.uniform_1i(u_color1, TextureSampler::Color1 as i32);
         }
@@ -1356,17 +1335,17 @@ impl Device {
             self.gl.uniform_1i(u_tasks, TextureSampler::RenderTasks as i32);
         }
 
         let u_resource_cache = self.gl.get_uniform_location(program.id, "sResourceCache");
         if u_resource_cache != -1 {
             self.gl.uniform_1i(u_resource_cache, TextureSampler::ResourceCache as i32);
         }
 
-        Ok(())
+        Ok(program)
     }
 
     pub fn get_uniform_location(&self, program: &Program, name: &str) -> UniformLocation {
         UniformLocation(self.gl.get_uniform_location(program.id, name))
     }
 
     pub fn set_uniform_2f(&self, uniform: UniformLocation, x: f32, y: f32) {
         debug_assert!(self.inside_frame);
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -16,27 +16,27 @@ use internal_types::{FastHashMap, Hardwa
 use mask_cache::{ClipMode, ClipRegion, ClipSource, MaskCacheInfo};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu, TextRunMode};
 use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu, TextShadowPrimitiveCpu};
 use prim_store::{BoxShadowPrimitiveCpu, TexelRect, YuvImagePrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
-use render_task::{AlphaRenderItem, ClipWorkItem, MaskCacheKey, RenderTask, RenderTaskIndex};
-use render_task::{RenderTaskId, RenderTaskLocation};
+use render_task::{AlphaRenderItem, ClipWorkItem, RenderTask};
+use render_task::{RenderTaskTree, RenderTaskId, RenderTaskLocation};
 use resource_cache::ResourceCache;
 use clip_scroll_node::{ClipInfo, ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
 use std::{cmp, f32, i32, mem, usize};
 use euclid::{SideOffsets2D, vec2, vec3};
 use tiling::{ContextIsolation, StackingContextIndex};
 use tiling::{ClipScrollGroup, ClipScrollGroupIndex, CompositeOps, DisplayListMap, Frame};
 use tiling::{PackedLayer, PackedLayerIndex, PrimitiveFlags, PrimitiveRunCmd, RenderPass};
-use tiling::{RenderTargetContext, RenderTaskCollection, ScrollbarPrimitive, StackingContext};
+use tiling::{RenderTargetContext, ScrollbarPrimitive, StackingContext};
 use util::{self, pack_as_float, subtract_rect, recycle_vec};
 use util::{MatrixHelpers, RectHelpers};
 
 #[derive(Debug, Clone)]
 struct ImageBorderSegment {
     geom_rect: LayerRect,
     sub_rect: TexelRect,
     stretch_size: LayerSize,
@@ -1271,25 +1271,27 @@ impl FrameBuilder {
     /// Compute the contribution (bounding rectangles, and resources) of layers and their
     /// primitives in screen space.
     fn build_layer_screen_rects_and_cull_layers(&mut self,
                                                 screen_rect: &DeviceIntRect,
                                                 clip_scroll_tree: &mut ClipScrollTree,
                                                 display_lists: &DisplayListMap,
                                                 resource_cache: &mut ResourceCache,
                                                 gpu_cache: &mut GpuCache,
+                                                render_tasks: &mut RenderTaskTree,
                                                 profile_counters: &mut FrameProfileCounters,
                                                 device_pixel_ratio: f32) {
         profile_scope!("cull");
         LayerRectCalculationAndCullingPass::create_and_run(self,
                                                            screen_rect,
                                                            clip_scroll_tree,
                                                            display_lists,
                                                            resource_cache,
                                                            gpu_cache,
+                                                           render_tasks,
                                                            profile_counters,
                                                            device_pixel_ratio);
     }
 
     fn update_scroll_bars(&mut self,
                           clip_scroll_tree: &ClipScrollTree,
                           gpu_cache: &mut GpuCache) {
         let distance_from_edge = 8.0;
@@ -1333,37 +1335,34 @@ impl FrameBuilder {
             //           has been broken for a long time, so I've removed it
             //           for now. We can re-add that code once the clips
             //           data is moved over to the GPU cache!
         }
     }
 
     fn build_render_task(&mut self,
                          clip_scroll_tree: &ClipScrollTree,
-                         gpu_cache: &mut GpuCache)
-                         -> (RenderTask, usize) {
+                         gpu_cache: &mut GpuCache,
+                         render_tasks: &mut RenderTaskTree)
+                         -> RenderTaskId {
         profile_scope!("build_render_task");
 
         let mut next_z = 0;
-        let mut next_task_index = RenderTaskIndex(0);
-
         let mut sc_stack: Vec<StackingContextIndex> = Vec::new();
-        let mut current_task = RenderTask::new_alpha_batch(next_task_index,
-                                                           DeviceIntPoint::zero(),
+        let mut current_task = RenderTask::new_alpha_batch(DeviceIntPoint::zero(),
                                                            RenderTaskLocation::Fixed);
-        next_task_index.0 += 1;
         // A stack of the alpha batcher tasks. We create them on the way down,
         // and then actually populate with items and dependencies on the way up.
         let mut alpha_task_stack = Vec::new();
         // A map of "preserve-3d" contexts. We are baking these into render targets
         // and only compositing once we are out of "preserve-3d" hierarchy.
         // The stacking contexts that fall into this category are
         //  - ones with `ContextIsolation::Items`, for their actual items to be backed
         //  - immediate children of `ContextIsolation::Items`
-        let mut preserve_3d_map: FastHashMap<StackingContextIndex, RenderTask> = FastHashMap::default();
+        let mut preserve_3d_map: FastHashMap<StackingContextIndex, RenderTaskId> = FastHashMap::default();
         // The plane splitter stack, using a simple BSP tree.
         let mut splitter_stack = Vec::new();
 
         debug!("build_render_task()");
 
         for cmd in &self.cmds {
             match *cmd {
                 PrimitiveRunCmd::PushStackingContext(stacking_context_index) => {
@@ -1378,44 +1377,41 @@ impl FrameBuilder {
 
                     debug!("\tpush {:?} {:?}", stacking_context_index, stacking_context.isolation);
 
                     let stacking_context_rect = &stacking_context.screen_bounds;
                     let composite_count = stacking_context.composite_ops.count();
 
                     if stacking_context.isolation == ContextIsolation::Full && composite_count == 0 {
                         alpha_task_stack.push(current_task);
-                        current_task = RenderTask::new_dynamic_alpha_batch(next_task_index, stacking_context_rect);
-                        next_task_index.0 += 1;
+                        current_task = RenderTask::new_dynamic_alpha_batch(stacking_context_rect);
                     }
 
                     if parent_isolation == Some(ContextIsolation::Items) ||
                        stacking_context.isolation == ContextIsolation::Items {
                         if parent_isolation != Some(ContextIsolation::Items) {
                             splitter_stack.push(BspSplitter::new());
                         }
                         alpha_task_stack.push(current_task);
-                        current_task = RenderTask::new_dynamic_alpha_batch(next_task_index, stacking_context_rect);
-                        next_task_index.0 += 1;
+                        current_task = RenderTask::new_dynamic_alpha_batch(stacking_context_rect);
                         //Note: technically, we shouldn't make a new alpha task for "preserve-3d" contexts
                         // that have no child items (only other stacking contexts). However, we don't know if
                         // there are any items at this time (in `PushStackingContext`).
                         //Note: the reason we add the polygon for splitting during `Push*` as opposed to `Pop*`
                         // is because we need to preserve the order of drawing for planes that match together.
                         let frame_node = clip_scroll_tree.nodes.get(&stacking_context.reference_frame_id).unwrap();
                         let sc_polygon = make_polygon(stacking_context, frame_node, stacking_context_index.0);
                         debug!("\tsplitter[{}]: add {:?} -> {:?} with bounds {:?}", splitter_stack.len(),
                             stacking_context_index, sc_polygon, stacking_context.isolated_items_bounds);
                         splitter_stack.last_mut().unwrap().add(sc_polygon);
                     }
 
                     for _ in 0..composite_count {
                         alpha_task_stack.push(current_task);
-                        current_task = RenderTask::new_dynamic_alpha_batch(next_task_index, stacking_context_rect);
-                        next_task_index.0 += 1;
+                        current_task = RenderTask::new_dynamic_alpha_batch(stacking_context_rect);
                     }
                 }
                 PrimitiveRunCmd::PopStackingContext => {
                     let stacking_context_index = sc_stack.pop().unwrap();
                     let stacking_context = &self.stacking_context_store[stacking_context_index.0];
                     let composite_count = stacking_context.composite_ops.count();
 
                     if !stacking_context.is_visible {
@@ -1423,86 +1419,90 @@ impl FrameBuilder {
                     }
 
                     debug!("\tpop {:?}", stacking_context_index);
                     let parent_isolation = sc_stack.last()
                                                    .map(|index| self.stacking_context_store[index.0].isolation);
 
                     if stacking_context.isolation == ContextIsolation::Full && composite_count == 0 {
                         let mut prev_task = alpha_task_stack.pop().unwrap();
+                        let current_task_id = render_tasks.add(current_task);
                         let item = AlphaRenderItem::HardwareComposite(stacking_context_index,
-                                                                      current_task.id,
+                                                                      current_task_id,
                                                                       HardwareCompositeOp::PremultipliedAlpha,
                                                                       next_z);
                         next_z += 1;
-                        prev_task.as_alpha_batch().items.push(item);
-                        prev_task.children.push(current_task);
+                        prev_task.as_alpha_batch_mut().items.push(item);
+                        prev_task.children.push(current_task_id);
                         current_task = prev_task;
                     }
 
                     for filter in &stacking_context.composite_ops.filters {
                         let mut prev_task = alpha_task_stack.pop().unwrap();
+                        let current_task_id = render_tasks.add(current_task);
                         let item = AlphaRenderItem::Blend(stacking_context_index,
-                                                          current_task.id,
+                                                          current_task_id,
                                                           *filter,
                                                           next_z);
                         next_z += 1;
-                        prev_task.as_alpha_batch().items.push(item);
-                        prev_task.children.push(current_task);
+                        prev_task.as_alpha_batch_mut().items.push(item);
+                        prev_task.children.push(current_task_id);
                         current_task = prev_task;
                     }
 
                     if let Some(mix_blend_mode) = stacking_context.composite_ops.mix_blend_mode {
-                        let readback_task =
-                            RenderTask::new_readback(stacking_context_index,
-                                                     stacking_context.screen_bounds);
+                        let backdrop_task =
+                            RenderTask::new_readback(stacking_context.screen_bounds);
+                        let source_task_id = render_tasks.add(current_task);
+                        let backdrop_task_id = render_tasks.add(backdrop_task);
 
                         let mut prev_task = alpha_task_stack.pop().unwrap();
                         let item = AlphaRenderItem::Composite(stacking_context_index,
-                                                              readback_task.id,
-                                                              current_task.id,
+                                                              source_task_id,
+                                                              backdrop_task_id,
                                                               mix_blend_mode,
                                                               next_z);
                         next_z += 1;
-                        prev_task.as_alpha_batch().items.push(item);
-                        prev_task.children.push(current_task);
-                        prev_task.children.push(readback_task);
+                        prev_task.as_alpha_batch_mut().items.push(item);
+                        prev_task.children.push(source_task_id);
+                        prev_task.children.push(backdrop_task_id);
                         current_task = prev_task;
                     }
 
                     if parent_isolation == Some(ContextIsolation::Items) ||
                        stacking_context.isolation == ContextIsolation::Items {
                         //Note: we don't register the dependent tasks here. It's only done
                         // when we are out of the `preserve-3d` branch (see the code below),
                         // since this is only where the parent task is known.
-                        preserve_3d_map.insert(stacking_context_index, current_task);
+                        let current_task_id = render_tasks.add(current_task);
+                        preserve_3d_map.insert(stacking_context_index, current_task_id);
                         current_task = alpha_task_stack.pop().unwrap();
                     }
 
                     if parent_isolation != Some(ContextIsolation::Items) &&
                        stacking_context.isolation == ContextIsolation::Items {
-                        debug!("\tsplitter[{}]: flush {:?}", splitter_stack.len(), current_task.id);
+                        debug!("\tsplitter[{}]: flush", splitter_stack.len());
                         let mut splitter = splitter_stack.pop().unwrap();
                         // Flush the accumulated plane splits onto the task tree.
                         // Notice how this is done before splitting in order to avoid duplicate tasks.
                         current_task.children.extend(preserve_3d_map.values().cloned());
                         // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
                         for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
                             let sc_index = StackingContextIndex(poly.anchor);
-                            let task_id = preserve_3d_map[&sc_index].id;
+                            let task_id = preserve_3d_map[&sc_index];
                             debug!("\t\tproduce {:?} -> {:?} for {:?}", sc_index, poly, task_id);
                             let pp = &poly.points;
                             let gpu_blocks = [
                                 [pp[0].x, pp[0].y, pp[0].z, pp[1].x].into(),
                                 [pp[1].y, pp[1].z, pp[2].x, pp[2].y].into(),
                                 [pp[2].z, pp[3].x, pp[3].y, pp[3].z].into(),
                             ];
                             let handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
                             let item = AlphaRenderItem::SplitComposite(sc_index, task_id, handle, next_z);
-                            current_task.as_alpha_batch().items.push(item);
+                            current_task.as_alpha_batch_mut().items.push(item);
                         }
                         preserve_3d_map.clear();
                         next_z += 1;
                     }
                 }
                 PrimitiveRunCmd::PrimitiveRun(first_prim_index, prim_count, clip_and_scroll) => {
                     let stacking_context_index = *sc_stack.last().unwrap();
                     if !self.stacking_context_store[stacking_context_index.0].is_visible {
@@ -1510,45 +1510,44 @@ impl FrameBuilder {
                     }
 
                     let group_index = *self.clip_scroll_group_indices.get(&clip_and_scroll).unwrap();
                     if self.clip_scroll_group_store[group_index.0].screen_bounding_rect.is_none() {
                         debug!("\tcs-group {:?} screen rect is None", group_index);
                         continue
                     }
 
-                    debug!("\trun of {} items into {:?}", prim_count, current_task.id);
+                    debug!("\trun of {} items", prim_count);
 
                     for i in 0..prim_count {
                         let prim_index = PrimitiveIndex(first_prim_index.0 + i);
 
                         if self.prim_store.cpu_bounding_rects[prim_index.0].is_some() {
                             let prim_metadata = self.prim_store.get_metadata(prim_index);
 
                             // Add any dynamic render tasks needed to render this primitive
-                            if let Some(ref render_task) = prim_metadata.render_task {
-                                current_task.children.push(render_task.clone());
+                            if let Some(render_task_id) = prim_metadata.render_task_id {
+                                current_task.children.push(render_task_id);
                             }
-                            if let Some(ref clip_task) = prim_metadata.clip_task {
-                                current_task.children.push(clip_task.clone());
+                            if let Some(clip_task_id) = prim_metadata.clip_task_id {
+                                current_task.children.push(clip_task_id);
                             }
 
                             let item = AlphaRenderItem::Primitive(Some(group_index), prim_index, next_z);
-                            current_task.as_alpha_batch().items.push(item);
+                            current_task.as_alpha_batch_mut().items.push(item);
                             next_z += 1;
                         }
                     }
                 }
             }
         }
 
         debug_assert!(alpha_task_stack.is_empty());
         debug_assert!(preserve_3d_map.is_empty());
-        debug_assert_eq!(current_task.id, RenderTaskId::Static(RenderTaskIndex(0)));
-        (current_task, next_task_index.0)
+        render_tasks.add(current_task)
     }
 
     pub fn build(&mut self,
                  resource_cache: &mut ResourceCache,
                  gpu_cache: &mut GpuCache,
                  frame_id: FrameId,
                  clip_scroll_tree: &mut ClipScrollTree,
                  display_lists: &DisplayListMap,
@@ -1573,45 +1572,46 @@ impl FrameBuilder {
         // has to be at least as large as the framebuffer size. This ensures that it will
         // always be able to allocate the worst case render task (such as a clip mask that
         // covers the entire screen).
         let cache_size = DeviceUintSize::new(cmp::max(1024, screen_rect.size.width as u32),
                                              cmp::max(1024, screen_rect.size.height as u32));
 
         self.update_scroll_bars(clip_scroll_tree, gpu_cache);
 
+        let mut render_tasks = RenderTaskTree::new();
+
         self.build_layer_screen_rects_and_cull_layers(&screen_rect,
                                                       clip_scroll_tree,
                                                       display_lists,
                                                       resource_cache,
                                                       gpu_cache,
+                                                      &mut render_tasks,
                                                       &mut profile_counters,
                                                       device_pixel_ratio);
 
-        let (main_render_task, static_render_task_count) = self.build_render_task(clip_scroll_tree, gpu_cache);
-        let mut render_tasks = RenderTaskCollection::new(static_render_task_count);
+        let main_render_task_id = self.build_render_task(clip_scroll_tree, gpu_cache, &mut render_tasks);
 
         let mut required_pass_count = 0;
-        main_render_task.max_depth(0, &mut required_pass_count);
+        render_tasks.max_depth(main_render_task_id, 0, &mut required_pass_count);
 
         resource_cache.block_until_all_resources_added(gpu_cache, texture_cache_profile);
 
         let mut deferred_resolves = vec![];
 
         let mut passes = Vec::new();
 
         // Do the allocations now, assigning each tile's tasks to a render
         // pass and target as required.
         for index in 0..required_pass_count {
-            passes.push(RenderPass::new(index as isize,
-                                        index == required_pass_count-1,
+            passes.push(RenderPass::new(index == required_pass_count-1,
                                         cache_size));
         }
 
-        main_render_task.assign_to_passes(passes.len() - 1, &mut passes);
+        render_tasks.assign_to_passes(main_render_task_id, passes.len() - 1, &mut passes);
 
         for pass in &mut passes {
             let ctx = RenderTargetContext {
                 device_pixel_ratio,
                 stacking_context_store: &self.stacking_context_store,
                 clip_scroll_group_store: &self.clip_scroll_group_store,
                 prim_store: &self.prim_store,
                 resource_cache,
@@ -1621,27 +1621,29 @@ impl FrameBuilder {
 
             profile_counters.passes.inc();
             profile_counters.color_targets.add(pass.color_targets.target_count());
             profile_counters.alpha_targets.add(pass.alpha_targets.target_count());
         }
 
         let gpu_cache_updates = gpu_cache.end_frame(gpu_cache_profile);
 
+        render_tasks.build();
+
         resource_cache.end_frame();
 
         Frame {
             device_pixel_ratio,
             background_color: self.background_color,
             window_size: self.screen_size,
             profile_counters,
             passes,
             cache_size,
             layer_texture_data: self.packed_layers.clone(),
-            render_task_data: render_tasks.render_task_data,
+            render_tasks,
             deferred_resolves,
             gpu_cache_updates: Some(gpu_cache_updates),
         }
     }
 
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -1655,16 +1657,17 @@ struct LayerRectCalculationAndCullingPas
     screen_rect: &'a DeviceIntRect,
     clip_scroll_tree: &'a mut ClipScrollTree,
     display_lists: &'a DisplayListMap,
     resource_cache: &'a mut ResourceCache,
     gpu_cache: &'a mut GpuCache,
     profile_counters: &'a mut FrameProfileCounters,
     device_pixel_ratio: f32,
     stacking_context_stack: Vec<StackingContextIndex>,
+    render_tasks: &'a mut RenderTaskTree,
 
     /// A cached clip info stack, which should handle the most common situation,
     /// which is that we are using the same clip info stack that we were using
     /// previously.
     current_clip_stack: Vec<ClipWorkItem>,
 
     /// Information about the cached clip stack, which is used to avoid having
     /// to recalculate it for every primitive.
@@ -1673,31 +1676,33 @@ struct LayerRectCalculationAndCullingPas
 
 impl<'a> LayerRectCalculationAndCullingPass<'a> {
     fn create_and_run(frame_builder: &'a mut FrameBuilder,
                       screen_rect: &'a DeviceIntRect,
                       clip_scroll_tree: &'a mut ClipScrollTree,
                       display_lists: &'a DisplayListMap,
                       resource_cache: &'a mut ResourceCache,
                       gpu_cache: &'a mut GpuCache,
+                      render_tasks: &'a mut RenderTaskTree,
                       profile_counters: &'a mut FrameProfileCounters,
                       device_pixel_ratio: f32) {
 
         let mut pass = LayerRectCalculationAndCullingPass {
             frame_builder,
             screen_rect,
             clip_scroll_tree,
             display_lists,
             resource_cache,
             gpu_cache,
             profile_counters,
             device_pixel_ratio,
             stacking_context_stack: Vec::new(),
             current_clip_stack: Vec::new(),
             current_clip_info: None,
+            render_tasks,
         };
         pass.run();
     }
 
     fn run(&mut self) {
         self.recalculate_clip_scroll_nodes();
         self.recalculate_clip_scroll_groups();
 
@@ -1972,54 +1977,57 @@ impl<'a> LayerRectCalculationAndCullingP
             debug!("\t\t{:?} bound is {:?}", prim_index, prim_screen_rect);
 
             let prim_metadata = prim_store.prepare_prim_for_render(prim_index,
                                                                    self.resource_cache,
                                                                    self.gpu_cache,
                                                                    &packed_layer.transform,
                                                                    self.device_pixel_ratio,
                                                                    display_list,
-                                                                   TextRunMode::Normal);
+                                                                   TextRunMode::Normal,
+                                                                   &mut self.render_tasks);
 
             stacking_context.screen_bounds = stacking_context.screen_bounds.union(&prim_screen_rect);
             stacking_context.isolated_items_bounds = stacking_context.isolated_items_bounds.union(&prim_local_rect);
 
             // Try to create a mask if we may need to.
             if !self.current_clip_stack.is_empty() || prim_metadata.clip_cache_info.is_some() {
                 // If the primitive doesn't have a specific clip, key the task ID off the
                 // stacking context. This means that two primitives which are only clipped
                 // by the stacking context stack can share clip masks during render task
                 // assignment to targets.
-                let (mask_key, mask_rect, extra) = match prim_metadata.clip_cache_info {
+                let (cache_key, mask_rect, extra) = match prim_metadata.clip_cache_info {
                     Some(ref info) => {
                         // Take into account the actual clip info of the primitive, and
                         // mutate the current bounds accordingly.
                         let mask_rect = match info.bounds.outer {
                             Some(ref outer) => {
                                 match prim_screen_rect.intersection(&outer.device_rect) {
                                     Some(rect) => rect,
                                     None => continue,
                                 }
                             }
                             _ => prim_screen_rect,
                         };
-                        (MaskCacheKey::Primitive(prim_index),
+                        (None,
                          mask_rect,
                          Some((packed_layer_index, info.strip_aligned())))
                     }
                     None => {
-                        //Note: can't use `prim_bounding_rect` since
-                        // the primitive ID is not a part of the task key
-                        (MaskCacheKey::ClipNode(clip_and_scroll.clip_node_id()),
+                        (Some(clip_and_scroll.clip_node_id()),
                          clip_bounds,
                          None)
                     }
                 };
-                prim_metadata.clip_task = RenderTask::new_mask(mask_rect,
-                                                               mask_key,
-                                                               &self.current_clip_stack,
-                                                               extra)
+                let clip_task = RenderTask::new_mask(cache_key,
+                                                     mask_rect,
+                                                     &self.current_clip_stack,
+                                                     extra);
+                let render_tasks = &mut self.render_tasks;
+                prim_metadata.clip_task_id = clip_task.map(|clip_task| {
+                    render_tasks.add(clip_task)
+                });
             }
 
             self.profile_counters.visible_primitives.inc();
         }
     }
 }
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -136,8 +136,11 @@ extern crate plane_split;
 
 #[cfg(any(target_os="macos", target_os="windows"))]
 extern crate gamma_lut;
 
 pub use renderer::{ExternalImage, ExternalImageSource, ExternalImageHandler};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
 
 pub use webrender_api as api;
+
+#[doc(hidden)]
+pub use device::build_shader_strings;
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -8,17 +8,17 @@ use api::{ImageKey, ImageRendering, Item
 use api::{GlyphKey, LayerToWorldTransform, TileOffset, YuvColorSpace, YuvFormat};
 use api::{device_length, FontInstance, LayerVector2D, LineOrientation, LineStyle, SubpixelDirection};
 use app_units::Au;
 use border::BorderCornerInstance;
 use euclid::{Size2D};
 use gpu_cache::{GpuCacheAddress, GpuBlockData, GpuCache, GpuCacheHandle, GpuDataRequest, ToGpuBlocks};
 use mask_cache::{ClipMode, ClipRegion, ClipSource, MaskCacheInfo};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
-use render_task::{RenderTask, RenderTaskLocation};
+use render_task::{RenderTask, RenderTaskId, RenderTaskTree};
 use resource_cache::{ImageProperties, ResourceCache};
 use std::{mem, usize};
 use util::{pack_as_float, TransformedRect, recycle_vec};
 
 
 pub const CLIP_DATA_GPU_BLOCKS: usize = 10;
 
 #[derive(Debug, Copy, Clone)]
@@ -114,22 +114,16 @@ pub enum PrimitiveKind {
     AlignedGradient,
     AngleGradient,
     RadialGradient,
     BoxShadow,
     TextShadow,
     Line,
 }
 
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub enum PrimitiveCacheKey {
-    BoxShadow(BoxShadowPrimitiveCacheKey),
-    TextShadow(PrimitiveIndex),
-}
-
 impl GpuCacheHandle {
     pub fn as_int(&self, gpu_cache: &GpuCache) -> i32 {
         let address = gpu_cache.get_address(self);
 
         // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
         //           In the future, we can change the PrimitiveInstance struct
         //           to use 2x u16 for the vertex attribute instead of an i32.
         address.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + address.u as i32
@@ -148,29 +142,29 @@ pub struct PrimitiveMetadata {
     // An optional render task that is a dependency of
     // drawing this primitive. For instance, box shadows
     // use this to draw a portion of the box shadow to
     // a render target to reduce the number of pixels
     // that the box-shadow shader needs to run on. For
     // text-shadow, this creates a render task chain
     // that implements a 2-pass separable blur on a
     // text run.
-    pub render_task: Option<RenderTask>,
-    pub clip_task: Option<RenderTask>,
+    pub render_task_id: Option<RenderTaskId>,
+    pub clip_task_id: Option<RenderTaskId>,
 
     // TODO(gw): In the future, we should just pull these
     //           directly from the DL item, instead of
     //           storing them here.
     pub local_rect: LayerRect,
     pub local_clip_rect: LayerRect,
 }
 
 impl PrimitiveMetadata {
     pub fn needs_clipping(&self) -> bool {
-        self.clip_task.is_some()
+        self.clip_task_id.is_some()
     }
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct RectanglePrimitive {
     pub color: ColorF,
 }
@@ -837,216 +831,191 @@ impl PrimitiveStore {
             PrimitiveContainer::Rectangle(rect) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::from_alpha(rect.color.a),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Rectangle,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_rectangles.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_rectangles.push(rect);
 
                 metadata
             }
             PrimitiveContainer::Line(line) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Line,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_lines.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_lines.push(line);
                 metadata
             }
             PrimitiveContainer::TextRun(text_cpu) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::TextRun,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_text_runs.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_text_runs.push(text_cpu);
                 metadata
             }
             PrimitiveContainer::TextShadow(text_shadow) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::TextShadow,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_text_shadows.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_text_shadows.push(text_shadow);
                 metadata
             }
             PrimitiveContainer::Image(image_cpu) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Image,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_images.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_images.push(image_cpu);
                 metadata
             }
             PrimitiveContainer::YuvImage(image_cpu) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::opaque(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::YuvImage,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_yuv_images.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_yuv_images.push(image_cpu);
                 metadata
             }
             PrimitiveContainer::Border(border_cpu) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Border,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_borders.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_borders.push(border_cpu);
                 metadata
             }
             PrimitiveContainer::AlignedGradient(gradient_cpu) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::AlignedGradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_gradients.push(gradient_cpu);
                 metadata
             }
             PrimitiveContainer::AngleGradient(gradient_cpu) => {
                 let metadata = PrimitiveMetadata {
                     // TODO: calculate if the gradient is actually opaque
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::AngleGradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_gradients.push(gradient_cpu);
                 metadata
             }
             PrimitiveContainer::RadialGradient(radial_gradient_cpu) => {
                 let metadata = PrimitiveMetadata {
                     // TODO: calculate if the gradient is actually opaque
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::RadialGradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_radial_gradients.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: None,
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_radial_gradients.push(radial_gradient_cpu);
                 metadata
             }
             PrimitiveContainer::BoxShadow(box_shadow) => {
-                let cache_key = PrimitiveCacheKey::BoxShadow(BoxShadowPrimitiveCacheKey {
-                    blur_radius: Au::from_f32_px(box_shadow.blur_radius),
-                    border_radius: Au::from_f32_px(box_shadow.border_radius),
-                    inverted: box_shadow.inverted != 0.0,
-                    shadow_rect_size: Size2D::new(Au::from_f32_px(box_shadow.bs_rect.size.width),
-                                                  Au::from_f32_px(box_shadow.bs_rect.size.height)),
-                });
-
-                // The actual cache size is calculated during prepare_prim_for_render().
-                // This is necessary since the size may change depending on the device
-                // pixel ratio (for example, during zoom or moving the window to a
-                // monitor with a different device pixel ratio).
-                let cache_size = DeviceIntSize::zero();
-
-                // Create a render task for this box shadow primitive. This renders a small
-                // portion of the box shadow to a render target. That portion is then
-                // stretched over the actual primitive rect by the box shadow primitive
-                // shader, to reduce the number of pixels that the expensive box
-                // shadow shader needs to run on.
-                // TODO(gw): In the future, we can probably merge the box shadow
-                // primitive (stretch) shader with the generic cached primitive shader.
-                let render_task = RenderTask::new_prim_cache(cache_key,
-                                                             cache_size,
-                                                             PrimitiveIndex(prim_index));
-
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::BoxShadow,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_box_shadows.len()),
                     gpu_location: GpuCacheHandle::new(),
-                    render_task: Some(render_task),
-                    clip_task: None,
+                    render_task_id: None,
+                    clip_task_id: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_box_shadows.push(box_shadow);
                 metadata
             }
         };
@@ -1089,17 +1058,18 @@ impl PrimitiveStore {
     /// Returns true if the bounding box needs to be updated.
     pub fn prepare_prim_for_render(&mut self,
                                    prim_index: PrimitiveIndex,
                                    resource_cache: &mut ResourceCache,
                                    gpu_cache: &mut GpuCache,
                                    layer_transform: &LayerToWorldTransform,
                                    device_pixel_ratio: f32,
                                    display_list: &BuiltDisplayList,
-                                   text_run_mode: TextRunMode)
+                                   text_run_mode: TextRunMode,
+                                   render_tasks: &mut RenderTaskTree)
                                    -> &mut PrimitiveMetadata {
         let (prim_kind, cpu_prim_index) = {
             let metadata = &self.cpu_metadata[prim_index.0];
             (metadata.prim_kind, metadata.cpu_prim_index)
         };
 
         // Recurse into any sub primitives and prepare them for rendering first.
         // TODO(gw): This code is a bit hacky to work around the borrow checker.
@@ -1109,17 +1079,18 @@ impl PrimitiveStore {
         if prim_kind == PrimitiveKind::TextShadow {
             for sub_prim_index in self.cpu_text_shadows[cpu_prim_index.0].primitives.clone() {
                 self.prepare_prim_for_render(sub_prim_index,
                                              resource_cache,
                                              gpu_cache,
                                              layer_transform,
                                              device_pixel_ratio,
                                              display_list,
-                                             TextRunMode::Shadow);
+                                             TextRunMode::Shadow,
+                                             render_tasks);
             }
         }
 
         let metadata = &mut self.cpu_metadata[prim_index.0];
 
         if let Some(ref mut clip_info) = metadata.clip_cache_info {
             clip_info.update(&metadata.clips, layer_transform, gpu_cache, device_pixel_ratio);
 
@@ -1143,40 +1114,58 @@ impl PrimitiveStore {
             PrimitiveKind::BoxShadow => {
                 // TODO(gw): Account for zoom factor!
                 // Here, we calculate the size of the patch required in order
                 // to create the box shadow corner. First, scale it by the
                 // device pixel ratio since the cache shader expects vertices
                 // in device space. The shader adds a 1-pixel border around
                 // the patch, in order to prevent bilinear filter artifacts as
                 // the patch is clamped / mirrored across the box shadow rect.
-                let box_shadow_cpu = &self.cpu_box_shadows[cpu_prim_index.0];
-                let edge_size = box_shadow_cpu.edge_size.ceil() * device_pixel_ratio;
+                let box_shadow = &self.cpu_box_shadows[cpu_prim_index.0];
+                let edge_size = box_shadow.edge_size.ceil() * device_pixel_ratio;
                 let edge_size = edge_size as i32 + 2;   // Account for bilinear filtering
                 let cache_size = DeviceIntSize::new(edge_size, edge_size);
-                let location = RenderTaskLocation::Dynamic(None, cache_size);
-                metadata.render_task.as_mut().unwrap().location = location;
+
+                let cache_key = BoxShadowPrimitiveCacheKey {
+                    blur_radius: Au::from_f32_px(box_shadow.blur_radius),
+                    border_radius: Au::from_f32_px(box_shadow.border_radius),
+                    inverted: box_shadow.inverted != 0.0,
+                    shadow_rect_size: Size2D::new(Au::from_f32_px(box_shadow.bs_rect.size.width),
+                                                  Au::from_f32_px(box_shadow.bs_rect.size.height)),
+                };
+
+                // Create a render task for this box shadow primitive. This renders a small
+                // portion of the box shadow to a render target. That portion is then
+                // stretched over the actual primitive rect by the box shadow primitive
+                // shader, to reduce the number of pixels that the expensive box
+                // shadow shader needs to run on.
+                // TODO(gw): In the future, we can probably merge the box shadow
+                // primitive (stretch) shader with the generic cached primitive shader.
+                let render_task = RenderTask::new_box_shadow(cache_key, cache_size, prim_index);
+                let render_task_id = render_tasks.add(render_task);
+
+                metadata.render_task_id = Some(render_task_id);
             }
             PrimitiveKind::TextShadow => {
                 let shadow = &mut self.cpu_text_shadows[cpu_prim_index.0];
 
                 // This is a text-shadow element. Create a render task that will
                 // render the text run to a target, and then apply a gaussian
                 // blur to that text run in order to build the actual primitive
                 // which will be blitted to the framebuffer.
                 let cache_width = (metadata.local_rect.size.width * device_pixel_ratio).ceil() as i32;
                 let cache_height = (metadata.local_rect.size.height * device_pixel_ratio).ceil() as i32;
                 let cache_size = DeviceIntSize::new(cache_width, cache_height);
-                let cache_key = PrimitiveCacheKey::TextShadow(prim_index);
                 let blur_radius = device_length(shadow.shadow.blur_radius,
                                                 device_pixel_ratio);
-                metadata.render_task = Some(RenderTask::new_blur(cache_key,
-                                                                 cache_size,
-                                                                 blur_radius,
-                                                                 prim_index));
+                let render_task = RenderTask::new_blur(cache_size,
+                                                       blur_radius,
+                                                       prim_index,
+                                                       render_tasks);
+                metadata.render_task_id = Some(render_tasks.add(render_task));
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[cpu_prim_index.0];
                 text.prepare_for_render(resource_cache,
                                         device_pixel_ratio,
                                         display_list,
                                         text_run_mode,
                                         gpu_cache);
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -1,54 +1,117 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use gpu_cache::GpuCacheHandle;
 use internal_types::HardwareCompositeOp;
 use mask_cache::MaskCacheInfo;
-use prim_store::{PrimitiveCacheKey, PrimitiveIndex};
+use prim_store::{BoxShadowPrimitiveCacheKey, PrimitiveIndex};
 use std::{cmp, f32, i32, mem, usize};
 use tiling::{ClipScrollGroupIndex, PackedLayerIndex, RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind, StackingContextIndex};
 use api::{ClipId, DeviceIntLength, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{FilterOp, MixBlendMode};
 
 const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
 
-#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
-pub struct RenderTaskIndex(pub usize);
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub struct RenderTaskId(pub u32);       // TODO(gw): Make private when using GPU cache!
+
+#[derive(Debug, Copy, Clone)]
+pub struct RenderTaskAddress(pub u32);
+
+#[derive(Debug)]
+pub struct RenderTaskTree {
+    pub tasks: Vec<RenderTask>,
+    pub task_data: Vec<RenderTaskData>,
+}
+
+impl RenderTaskTree {
+    pub fn new() -> RenderTaskTree {
+        RenderTaskTree {
+            tasks: Vec::new(),
+            task_data: Vec::new(),
+        }
+    }
+
+    pub fn add(&mut self, task: RenderTask) -> RenderTaskId {
+        let id = RenderTaskId(self.tasks.len() as u32);
+        self.tasks.push(task);
+        id
+    }
+
+    pub fn max_depth(&self, id: RenderTaskId, depth: usize, max_depth: &mut usize) {
+        let depth = depth + 1;
+        *max_depth = cmp::max(*max_depth, depth);
+        let task = &self.tasks[id.0 as usize];
+        for child in &task.children {
+            self.max_depth(*child, depth, max_depth);
+        }
+    }
+
+    pub fn assign_to_passes(&self, id: RenderTaskId, pass_index: usize, passes: &mut Vec<RenderPass>) {
+        let task = &self.tasks[id.0 as usize];
+
+        for child in &task.children {
+            self.assign_to_passes(*child,
+                                  pass_index - 1,
+                                  passes);
+        }
+
+        // Sanity check - can be relaxed if needed
+        match task.location {
+            RenderTaskLocation::Fixed => {
+                debug_assert!(pass_index == passes.len() - 1);
+            }
+            RenderTaskLocation::Dynamic(..) => {
+                debug_assert!(pass_index < passes.len() - 1);
+            }
+        }
+
+        let pass = &mut passes[pass_index];
+        pass.add_render_task(id);
+    }
+
+    pub fn get(&self, id: RenderTaskId) -> &RenderTask {
+        &self.tasks[id.0 as usize]
+    }
+
+    pub fn get_mut(&mut self, id: RenderTaskId) -> &mut RenderTask {
+        &mut self.tasks[id.0 as usize]
+    }
+
+    pub fn get_task_address(&self, id: RenderTaskId) -> RenderTaskAddress {
+        let task = &self.tasks[id.0 as usize];
+        match task.kind {
+            RenderTaskKind::Alias(alias_id) => {
+                RenderTaskAddress(alias_id.0)
+            }
+            _ => {
+                RenderTaskAddress(id.0)
+            }
+        }
+    }
+
+    pub fn build(&mut self) {
+        for task in &mut self.tasks {
+            self.task_data.push(task.write_task_data());
+        }
+    }
+}
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTaskKey {
-    /// Draw this primitive to a cache target.
-    CachePrimitive(PrimitiveCacheKey),
-    /// Draw the alpha mask for a primitive.
-    CacheMask(MaskCacheKey),
-    /// Apply a vertical blur pass of given radius for this primitive.
-    VerticalBlur(i32, PrimitiveIndex),
-    /// Apply a horizontal blur pass of given radius for this primitive.
-    HorizontalBlur(i32, PrimitiveIndex),
-    /// Allocate a block of space in target for framebuffer copy.
-    CopyFramebuffer(StackingContextIndex),
+    /// Draw this box shadow to a cache target.
+    BoxShadow(BoxShadowPrimitiveCacheKey),
+    /// Draw the alpha mask for a shared clip.
+    CacheMask(ClipId),
 }
 
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub enum MaskCacheKey {
-    Primitive(PrimitiveIndex),
-    ClipNode(ClipId),
-}
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub enum RenderTaskId {
-    Static(RenderTaskIndex),
-    Dynamic(RenderTaskKey),
-}
-
-
 #[derive(Debug, Clone)]
 pub enum RenderTaskLocation {
     Fixed,
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
 }
 
 #[derive(Debug, Clone)]
 pub enum AlphaRenderItem {
@@ -56,17 +119,17 @@ pub enum AlphaRenderItem {
     Blend(StackingContextIndex, RenderTaskId, FilterOp, i32),
     Composite(StackingContextIndex, RenderTaskId, RenderTaskId, MixBlendMode, i32),
     SplitComposite(StackingContextIndex, RenderTaskId, GpuCacheHandle, i32),
     HardwareComposite(StackingContextIndex, RenderTaskId, HardwareCompositeOp, i32),
 }
 
 #[derive(Debug, Clone)]
 pub struct AlphaRenderTask {
-    screen_origin: DeviceIntPoint,
+    pub screen_origin: DeviceIntPoint,
     pub items: Vec<AlphaRenderItem>,
 }
 
 #[derive(Debug, Copy, Clone)]
 #[repr(C)]
 pub enum MaskSegment {
     // This must match the SEGMENT_ values in clip_shared.glsl!
     All = 0,
@@ -94,97 +157,94 @@ pub struct CacheMaskTask {
     pub geometry_kind: MaskGeometryKind,
 }
 
 #[derive(Debug, Clone)]
 pub struct RenderTaskData {
     pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 }
 
-impl RenderTaskData {
-    pub fn empty() -> RenderTaskData {
-        RenderTaskData {
-            data: unsafe { mem::uninitialized() }
-        }
-    }
-}
-
 impl Default for RenderTaskData {
     fn default() -> RenderTaskData {
         RenderTaskData {
             data: unsafe { mem::uninitialized() },
         }
     }
 }
 
 #[derive(Debug, Clone)]
 pub enum RenderTaskKind {
     Alpha(AlphaRenderTask),
     CachePrimitive(PrimitiveIndex),
     CacheMask(CacheMaskTask),
-    VerticalBlur(DeviceIntLength, PrimitiveIndex),
-    HorizontalBlur(DeviceIntLength, PrimitiveIndex),
+    VerticalBlur(DeviceIntLength),
+    HorizontalBlur(DeviceIntLength),
     Readback(DeviceIntRect),
+    Alias(RenderTaskId),
 }
 
-// TODO(gw): Consider storing these in a separate array and having
-//           primitives hold indices - this could avoid cloning
-//           when adding them as child tasks to tiles.
 #[derive(Debug, Clone)]
 pub struct RenderTask {
-    pub id: RenderTaskId,
+    pub cache_key: Option<RenderTaskKey>,
     pub location: RenderTaskLocation,
-    pub children: Vec<RenderTask>,
+    pub children: Vec<RenderTaskId>,
     pub kind: RenderTaskKind,
 }
 
 impl RenderTask {
-    pub fn new_alpha_batch(task_index: RenderTaskIndex,
-                           screen_origin: DeviceIntPoint,
+    pub fn new_alpha_batch(screen_origin: DeviceIntPoint,
                            location: RenderTaskLocation) -> RenderTask {
         RenderTask {
-            id: RenderTaskId::Static(task_index),
+            cache_key: None,
             children: Vec::new(),
             location,
             kind: RenderTaskKind::Alpha(AlphaRenderTask {
                 screen_origin,
                 items: Vec::new(),
             }),
         }
     }
 
-    pub fn new_dynamic_alpha_batch(task_index: RenderTaskIndex,
-                                   rect: &DeviceIntRect) -> RenderTask {
+    pub fn new_dynamic_alpha_batch(rect: &DeviceIntRect) -> RenderTask {
         let location = RenderTaskLocation::Dynamic(None, rect.size);
-        Self::new_alpha_batch(task_index, rect.origin, location)
+        Self::new_alpha_batch(rect.origin, location)
     }
 
-    pub fn new_prim_cache(key: PrimitiveCacheKey,
-                          size: DeviceIntSize,
+    pub fn new_prim_cache(size: DeviceIntSize,
                           prim_index: PrimitiveIndex) -> RenderTask {
         RenderTask {
-            id: RenderTaskId::Dynamic(RenderTaskKey::CachePrimitive(key)),
+            cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, size),
             kind: RenderTaskKind::CachePrimitive(prim_index),
         }
     }
 
-    pub fn new_readback(key: StackingContextIndex,
-                    screen_rect: DeviceIntRect) -> RenderTask {
+    pub fn new_box_shadow(key: BoxShadowPrimitiveCacheKey,
+                          size: DeviceIntSize,
+                          prim_index: PrimitiveIndex) -> RenderTask {
         RenderTask {
-            id: RenderTaskId::Dynamic(RenderTaskKey::CopyFramebuffer(key)),
+            cache_key: Some(RenderTaskKey::BoxShadow(key)),
+            children: Vec::new(),
+            location: RenderTaskLocation::Dynamic(None, size),
+            kind: RenderTaskKind::CachePrimitive(prim_index),
+        }
+    }
+
+    pub fn new_readback(screen_rect: DeviceIntRect) -> RenderTask {
+        RenderTask {
+            cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, screen_rect.size),
             kind: RenderTaskKind::Readback(screen_rect),
         }
     }
 
-    pub fn new_mask(task_rect: DeviceIntRect,
-                    mask_key: MaskCacheKey,
+    pub fn new_mask(key: Option<ClipId>,
+                    task_rect: DeviceIntRect,
                     raw_clips: &[ClipWorkItem],
                     extra_clip: Option<ClipWorkItem>)
                     -> Option<RenderTask> {
         // Filter out all the clip instances that don't contribute to the result
         let mut inner_rect = Some(task_rect);
         let clips: Vec<_> = raw_clips.iter()
                                      .chain(extra_clip.iter())
                                      .filter(|&&(_, ref clip_info)| {
@@ -217,17 +277,17 @@ impl RenderTask {
                info.image.is_none() &&
                info.complex_clip_range.get_count() == 1 &&
                info.layer_clip_range.get_count() == 0 {
                 geometry_kind = MaskGeometryKind::CornersOnly;
             }
         }
 
         Some(RenderTask {
-            id: RenderTaskId::Dynamic(RenderTaskKey::CacheMask(mask_key)),
+            cache_key: key.map(RenderTaskKey::CacheMask),
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, task_rect.size),
             kind: RenderTaskKind::CacheMask(CacheMaskTask {
                 actual_rect: task_rect,
                 inner_rect: inner_rect.unwrap_or(DeviceIntRect::zero()),
                 clips,
                 geometry_kind,
             }),
@@ -244,71 +304,85 @@ impl RenderTask {
     //           |
     //    VerticalBlurTask: Apply the separable vertical blur to the primitive.
     //           ^
     //           |
     //    HorizontalBlurTask: Apply the separable horizontal blur to the vertical blur.
     //           |
     //           +---- This is stored as the input task to the primitive shader.
     //
-    pub fn new_blur(key: PrimitiveCacheKey,
-                    size: DeviceIntSize,
+    pub fn new_blur(size: DeviceIntSize,
                     blur_radius: DeviceIntLength,
-                    prim_index: PrimitiveIndex) -> RenderTask {
-        let prim_cache_task = RenderTask::new_prim_cache(key,
-                                                         size,
+                    prim_index: PrimitiveIndex,
+                    render_tasks: &mut RenderTaskTree) -> RenderTask {
+        let prim_cache_task = RenderTask::new_prim_cache(size,
                                                          prim_index);
+        let prim_cache_task_id = render_tasks.add(prim_cache_task);
 
         let blur_target_size = size + DeviceIntSize::new(2 * blur_radius.0,
                                                          2 * blur_radius.0);
 
         let blur_task_v = RenderTask {
-            id: RenderTaskId::Dynamic(RenderTaskKey::VerticalBlur(blur_radius.0, prim_index)),
-            children: vec![prim_cache_task],
+            cache_key: None,
+            children: vec![prim_cache_task_id],
             location: RenderTaskLocation::Dynamic(None, blur_target_size),
-            kind: RenderTaskKind::VerticalBlur(blur_radius, prim_index),
+            kind: RenderTaskKind::VerticalBlur(blur_radius),
         };
 
+        let blur_task_v_id = render_tasks.add(blur_task_v);
+
         let blur_task_h = RenderTask {
-            id: RenderTaskId::Dynamic(RenderTaskKey::HorizontalBlur(blur_radius.0, prim_index)),
-            children: vec![blur_task_v],
+            cache_key: None,
+            children: vec![blur_task_v_id],
             location: RenderTaskLocation::Dynamic(None, blur_target_size),
-            kind: RenderTaskKind::HorizontalBlur(blur_radius, prim_index),
+            kind: RenderTaskKind::HorizontalBlur(blur_radius),
         };
 
         blur_task_h
     }
 
-    pub fn as_alpha_batch<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
+    pub fn as_alpha_batch_mut<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref mut task) => task,
             RenderTaskKind::CachePrimitive(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) => unreachable!(),
+            RenderTaskKind::HorizontalBlur(..) |
+            RenderTaskKind::Alias(..) => unreachable!(),
+        }
+    }
+
+    pub fn as_alpha_batch<'a>(&'a self) -> &'a AlphaRenderTask {
+        match self.kind {
+            RenderTaskKind::Alpha(ref task) => task,
+            RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::VerticalBlur(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::HorizontalBlur(..) |
+            RenderTaskKind::Alias(..) => unreachable!(),
         }
     }
 
     // Write (up to) 8 floats of data specific to the type
     // of render task that is provided to the GPU shaders
     // via a vertex texture.
     pub fn write_task_data(&self) -> RenderTaskData {
-        let (target_rect, target_index) = self.get_target_rect();
-
         // NOTE: The ordering and layout of these structures are
         //       required to match both the GPU structures declared
         //       in prim_shared.glsl, and also the uses in submit_batch()
         //       in renderer.rs.
         // TODO(gw): Maybe there's a way to make this stuff a bit
         //           more type-safe. Although, it will always need
         //           to be kept in sync with the GLSL code anyway.
 
         match self.kind {
             RenderTaskKind::Alpha(ref task) => {
+                let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         task.screen_origin.x as f32,
                         task.screen_origin.y as f32,
@@ -317,16 +391,17 @@ impl RenderTask {
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                     ],
                 }
             }
             RenderTaskKind::CachePrimitive(..) => {
+                let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
                         0.0,
@@ -335,16 +410,17 @@ impl RenderTask {
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                     ],
                 }
             }
             RenderTaskKind::CacheMask(ref task) => {
+                let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         (target_rect.origin.x + target_rect.size.width) as f32,
                         (target_rect.origin.y + target_rect.size.height) as f32,
                         task.actual_rect.origin.x as f32,
                         task.actual_rect.origin.y as f32,
@@ -352,18 +428,19 @@ impl RenderTask {
                         0.0,
                         task.inner_rect.origin.x as f32,
                         task.inner_rect.origin.y as f32,
                         (task.inner_rect.origin.x + task.inner_rect.size.width) as f32,
                         (task.inner_rect.origin.y + task.inner_rect.size.height) as f32,
                     ],
                 }
             }
-            RenderTaskKind::VerticalBlur(blur_radius, _) |
-            RenderTaskKind::HorizontalBlur(blur_radius, _) => {
+            RenderTaskKind::VerticalBlur(blur_radius) |
+            RenderTaskKind::HorizontalBlur(blur_radius) => {
+                let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
                         blur_radius.0 as f32,
@@ -372,16 +449,17 @@ impl RenderTask {
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                     ]
                 }
             }
             RenderTaskKind::Readback(..) => {
+                let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
                         0.0,
@@ -389,62 +467,52 @@ impl RenderTask {
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                     ]
                 }
             }
+            RenderTaskKind::Alias(..) => {
+                RenderTaskData {
+                    data: [0.0; 12],
+                }
+            }
         }
     }
 
-    fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
+    pub fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
         match self.location {
             RenderTaskLocation::Fixed => {
                 (DeviceIntRect::zero(), RenderTargetIndex(0))
             },
             RenderTaskLocation::Dynamic(origin_and_target_index, size) => {
                 let (origin, target_index) = origin_and_target_index.expect("Should have been allocated by now!");
                 (DeviceIntRect::new(origin, size), target_index)
             }
         }
     }
 
-    pub fn assign_to_passes(mut self, pass_index: usize, passes: &mut Vec<RenderPass>) {
-        for child in self.children.drain(..) {
-            child.assign_to_passes(pass_index - 1,
-                                   passes);
-        }
-
-        // Sanity check - can be relaxed if needed
-        match self.location {
-            RenderTaskLocation::Fixed => {
-                debug_assert!(pass_index == passes.len() - 1);
-            }
-            RenderTaskLocation::Dynamic(..) => {
-                debug_assert!(pass_index < passes.len() - 1);
-            }
-        }
-
-        let pass = &mut passes[pass_index];
-        pass.add_render_task(self);
-    }
-
-    pub fn max_depth(&self, depth: usize, max_depth: &mut usize) {
-        let depth = depth + 1;
-        *max_depth = cmp::max(*max_depth, depth);
-        for child in &self.children {
-            child.max_depth(depth, max_depth);
-        }
-    }
-
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             RenderTaskKind::Alpha(..) |
             RenderTaskKind::CachePrimitive(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) => RenderTargetKind::Color,
             RenderTaskKind::CacheMask(..) => RenderTargetKind::Alpha,
+            RenderTaskKind::Alias(..) => {
+                panic!("BUG: target_kind() called on invalidated task");
+            }
         }
     }
+
+    pub fn set_alias(&mut self, id: RenderTaskId) {
+        debug_assert!(self.cache_key.is_some());
+        // TODO(gw): We can easily handle invalidation of tasks that
+        //           contain children in the future. Since we don't
+        //           have any cases of that yet, just assert to simplify
+        //           the current implementation.
+        debug_assert!(self.children.is_empty());
+        self.kind = RenderTaskKind::Alias(id);
+    }
 }
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -21,32 +21,32 @@ use gpu_cache::{GpuBlockData, GpuCacheUp
 use internal_types::{FastHashMap, CacheTextureId, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{TextureUpdateList, RenderTargetMode, TextureUpdateSource};
 use internal_types::{ORTHO_NEAR_PLANE, ORTHO_FAR_PLANE, SourceTexture};
 use internal_types::{BatchTextures, TextureSampler};
 use profiler::{Profiler, BackendProfileCounters};
 use profiler::{GpuProfileTag, RendererProfileTimers, RendererProfileCounters};
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
-use render_task::RenderTaskData;
+use render_task::RenderTaskTree;
 use std;
 use std::cmp;
 use std::collections::VecDeque;
 use std::f32;
 use std::marker::PhantomData;
 use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use rayon::ThreadPool;
 use rayon::Configuration as ThreadPoolConfig;
-use tiling::{AlphaBatchKind, BlurCommand, CompositePrimitiveInstance, Frame, PrimitiveBatch, RenderTarget};
+use tiling::{AlphaBatchKind, BlurCommand, Frame, PrimitiveBatch, RenderTarget};
 use tiling::{AlphaRenderTarget, CacheClipInstance, PrimitiveInstance, ColorRenderTarget, RenderTargetKind};
 use time::precise_time_ns;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use util::TransformedRectKind;
 use api::{ColorF, Epoch, PipelineId, RenderApiSender, RenderNotifier};
 use api::{ExternalImageId, ExternalImageType, ImageFormat};
 use api::{DeviceIntRect, DeviceUintRect, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
 use api::{BlobImageRenderer, channel, FontRenderMode};
@@ -722,39 +722,35 @@ fn create_prim_shader(name: &'static str
                               MAX_VERTEX_TEXTURE_WIDTH);
 
     for feature in features {
         prefix.push_str(&format!("#define WR_FEATURE_{}\n", feature));
     }
 
     debug!("PrimShader {}", name);
 
-    let includes = &["prim_shared"];
-
     let vertex_descriptor = match vertex_format {
         VertexFormat::PrimitiveInstances => DESC_PRIM_INSTANCES,
         VertexFormat::Blur => DESC_BLUR,
         VertexFormat::Clip => DESC_CLIP,
     };
 
-    device.create_program_with_prefix(name,
-                                      includes,
-                                      Some(prefix),
-                                      &vertex_descriptor)
+    device.create_program(name,
+                          &prefix,
+                          &vertex_descriptor)
 }
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
     let prefix = format!("#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n
-                          #define WR_FEATURE_TRANSFORM",
+                          #define WR_FEATURE_TRANSFORM\n",
                           MAX_VERTEX_TEXTURE_WIDTH);
 
     debug!("ClipShader {}", name);
 
-    let includes = &["prim_shared", "clip_shared"];
-    device.create_program_with_prefix(name, includes, Some(prefix), &DESC_CLIP)
+    device.create_program(name, &prefix, &DESC_CLIP)
 }
 
 struct GpuDataTextures {
     layer_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
 }
 
 impl GpuDataTextures {
@@ -762,17 +758,17 @@ impl GpuDataTextures {
         GpuDataTextures {
             layer_texture: VertexDataTexture::new(device),
             render_task_texture: VertexDataTexture::new(device),
         }
     }
 
     fn init_frame(&mut self, device: &mut Device, frame: &mut Frame) {
         self.layer_texture.init(device, &mut frame.layer_texture_data);
-        self.render_task_texture.init(device, &mut frame.render_task_data);
+        self.render_task_texture.init(device, &mut frame.render_tasks.task_data);
 
         device.bind_texture(TextureSampler::Layers, self.layer_texture.id);
         device.bind_texture(TextureSampler::RenderTasks, self.render_task_texture.id);
     }
 }
 
 #[derive(Clone, Debug, PartialEq)]
 pub enum ReadPixelsFormat {
@@ -1598,31 +1594,16 @@ impl Renderer {
 
     pub fn layers_are_bouncing_back(&self) -> bool {
         match self.current_frame {
             None => false,
             Some(ref current_frame) => !current_frame.layers_bouncing_back.is_empty(),
         }
     }
 
-/*
-    fn update_shaders(&mut self) {
-        let update_uniforms = !self.pending_shader_updates.is_empty();
-
-        for path in self.pending_shader_updates.drain(..) {
-            panic!("todo");
-            //self.device.refresh_shader(path);
-        }
-
-        if update_uniforms {
-            self.update_uniform_locations();
-        }
-    }
-*/
-
     fn update_gpu_cache(&mut self, frame: &mut Frame) {
         let _gm = GpuMarker::new(self.device.rc_gl(), "gpu cache update");
         for update_list in self.pending_gpu_cache_updates.drain(..) {
             self.gpu_cache_texture.update(&mut self.device, &update_list);
         }
         self.update_deferred_resolves(frame);
         self.gpu_cache_texture.flush(&mut self.device);
     }
@@ -1737,31 +1718,31 @@ impl Renderer {
         }
 
         self.profile_counters.vertices.add(6 * data.len());
     }
 
     fn submit_batch(&mut self,
                     batch: &PrimitiveBatch,
                     projection: &Transform3D<f32>,
-                    render_task_data: &[RenderTaskData],
+                    render_tasks: &RenderTaskTree,
                     render_target: Option<(TextureId, i32)>,
                     target_dimensions: DeviceUintSize) {
         let transform_kind = batch.key.flags.transform_kind();
         let needs_clipping = batch.key.flags.needs_clipping();
         debug_assert!(!needs_clipping ||
                       match batch.key.blend_mode {
                           BlendMode::Alpha |
                           BlendMode::PremultipliedAlpha |
                           BlendMode::Subpixel(..) => true,
                           BlendMode::None => false,
                       });
 
         let marker = match batch.key.kind {
-            AlphaBatchKind::Composite => {
+            AlphaBatchKind::Composite { .. } => {
                 self.ps_composite.bind(&mut self.device, projection);
                 GPU_TAG_PRIM_COMPOSITE
             }
             AlphaBatchKind::HardwareComposite => {
                 self.ps_hw_composite.bind(&mut self.device, projection);
                 GPU_TAG_PRIM_HW_COMPOSITE
             }
             AlphaBatchKind::SplitComposite => {
@@ -1840,87 +1821,88 @@ impl Renderer {
             }
             AlphaBatchKind::CacheImage => {
                 self.ps_cache_image.bind(&mut self.device, transform_kind, projection);
                 GPU_TAG_PRIM_CACHE_IMAGE
             }
         };
 
         // Handle special case readback for composites.
-        if batch.key.kind == AlphaBatchKind::Composite {
-            // composites can't be grouped together because
-            // they may overlap and affect each other.
-            debug_assert!(batch.instances.len() == 1);
-            let instance = CompositePrimitiveInstance::from(&batch.instances[0]);
-            let cache_texture = self.texture_resolver.resolve(&SourceTexture::CacheRGBA8);
+        match batch.key.kind {
+            AlphaBatchKind::Composite { task_id, source_id, backdrop_id } => {
+                // composites can't be grouped together because
+                // they may overlap and affect each other.
+                debug_assert!(batch.instances.len() == 1);
+                let cache_texture = self.texture_resolver.resolve(&SourceTexture::CacheRGBA8);
 
-            // TODO(gw): This code branch is all a bit hacky. We rely
-            // on pulling specific values from the render target data
-            // and also cloning the single primitive instance to be
-            // able to pass to draw_instanced_batch(). We should
-            // think about a cleaner way to achieve this!
+                // Before submitting the composite batch, do the
+                // framebuffer readbacks that are needed for each
+                // composite operation in this batch.
+                let cache_texture_dimensions = self.device.get_texture_dimensions(cache_texture);
 
-            // Before submitting the composite batch, do the
-            // framebuffer readbacks that are needed for each
-            // composite operation in this batch.
-            let cache_texture_dimensions = self.device.get_texture_dimensions(cache_texture);
+                let source = render_tasks.get(source_id);
+                let backdrop = render_tasks.get(task_id);
+                let readback = render_tasks.get(backdrop_id);
 
-            let backdrop = &render_task_data[instance.task_index.0 as usize];
-            let readback = &render_task_data[instance.backdrop_task_index.0 as usize];
-            let source = &render_task_data[instance.src_task_index.0 as usize];
+                let (readback_rect, readback_layer) = readback.get_target_rect();
+                let (backdrop_rect, _) = backdrop.get_target_rect();
+                let backdrop_screen_origin = backdrop.as_alpha_batch().screen_origin;
+                let source_screen_origin = source.as_alpha_batch().screen_origin;
 
-            // Bind the FBO to blit the backdrop to.
-            // Called per-instance in case the layer (and therefore FBO)
-            // changes. The device will skip the GL call if the requested
-            // target is already bound.
-            let cache_draw_target = (cache_texture, readback.data[4] as i32);
-            self.device.bind_draw_target(Some(cache_draw_target), Some(cache_texture_dimensions));
+                // Bind the FBO to blit the backdrop to.
+                // Called per-instance in case the layer (and therefore FBO)
+                // changes. The device will skip the GL call if the requested
+                // target is already bound.
+                let cache_draw_target = (cache_texture, readback_layer.0 as i32);
+                self.device.bind_draw_target(Some(cache_draw_target), Some(cache_texture_dimensions));
 
-            let src_x = backdrop.data[0] - backdrop.data[4] + source.data[4];
-            let src_y = backdrop.data[1] - backdrop.data[5] + source.data[5];
+                let src_x = backdrop_rect.origin.x - backdrop_screen_origin.x + source_screen_origin.x;
+                let src_y = backdrop_rect.origin.y - backdrop_screen_origin.y + source_screen_origin.y;
 
-            let dest_x = readback.data[0];
-            let dest_y = readback.data[1];
+                let dest_x = readback_rect.origin.x;
+                let dest_y = readback_rect.origin.y;
 
-            let width = readback.data[2];
-            let height = readback.data[3];
+                let width = readback_rect.size.width;
+                let height = readback_rect.size.height;
 
-            let mut src = DeviceIntRect::new(DeviceIntPoint::new(src_x as i32, src_y as i32),
-                                             DeviceIntSize::new(width as i32, height as i32));
-            let mut dest = DeviceIntRect::new(DeviceIntPoint::new(dest_x as i32, dest_y as i32),
-                                              DeviceIntSize::new(width as i32, height as i32));
+                let mut src = DeviceIntRect::new(DeviceIntPoint::new(src_x as i32, src_y as i32),
+                                                 DeviceIntSize::new(width as i32, height as i32));
+                let mut dest = DeviceIntRect::new(DeviceIntPoint::new(dest_x as i32, dest_y as i32),
+                                                  DeviceIntSize::new(width as i32, height as i32));
 
-            // Need to invert the y coordinates and flip the image vertically when
-            // reading back from the framebuffer.
-            if render_target.is_none() {
-                src.origin.y = target_dimensions.height as i32 - src.size.height - src.origin.y;
-                dest.origin.y += dest.size.height;
-                dest.size.height = -dest.size.height;
+                // Need to invert the y coordinates and flip the image vertically when
+                // reading back from the framebuffer.
+                if render_target.is_none() {
+                    src.origin.y = target_dimensions.height as i32 - src.size.height - src.origin.y;
+                    dest.origin.y += dest.size.height;
+                    dest.size.height = -dest.size.height;
+                }
+
+                self.device.blit_render_target(render_target,
+                                               Some(src),
+                                               dest);
+
+                // Restore draw target to current pass render target + layer.
+                self.device.bind_draw_target(render_target, Some(target_dimensions));
             }
-
-            self.device.blit_render_target(render_target,
-                                           Some(src),
-                                           dest);
-
-            // Restore draw target to current pass render target + layer.
-            self.device.bind_draw_target(render_target, Some(target_dimensions));
+            _ => {}
         }
 
         let _gm = self.gpu_profile.add_marker(marker);
         self.draw_instanced_batch(&batch.instances,
                                   VertexArrayKind::Primitive,
                                   &batch.key.textures);
     }
 
     fn draw_color_target(&mut self,
                          render_target: Option<(TextureId, i32)>,
                          target: &ColorRenderTarget,
                          target_size: DeviceUintSize,
                          clear_color: Option<[f32; 4]>,
-                         render_task_data: &[RenderTaskData],
+                         render_tasks: &RenderTaskTree,
                          projection: &Transform3D<f32>) {
         {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
             self.device.bind_draw_target(render_target, Some(target_size));
             self.device.disable_depth();
             self.device.enable_depth_write();
             self.device.set_blend(false);
             self.device.set_blend_mode_alpha();
@@ -2021,17 +2003,17 @@ impl Renderer {
             // z-buffer efficiency!
             for batch in target.alpha_batcher
                                .batch_list
                                .opaque_batches
                                .iter()
                                .rev() {
                 self.submit_batch(batch,
                                   &projection,
-                                  render_task_data,
+                                  render_tasks,
                                   render_target,
                                   target_size);
             }
 
             self.device.disable_depth_write();
 
             for batch in &target.alpha_batcher.batch_list.alpha_batches {
                 if batch.key.blend_mode != prev_blend_mode {
@@ -2052,17 +2034,17 @@ impl Renderer {
                             self.device.set_blend_mode_subpixel(color);
                         }
                     }
                     prev_blend_mode = batch.key.blend_mode;
                 }
 
                 self.submit_batch(batch,
                                   &projection,
-                                  render_task_data,
+                                  render_tasks,
                                   render_target,
                                   target_size);
             }
 
             self.device.disable_depth();
             self.device.set_blend(false);
         }
     }
@@ -2339,17 +2321,17 @@ impl Renderer {
                 for (target_index, target) in pass.color_targets.targets.iter().enumerate() {
                     let render_target = pass.color_texture_id.map(|texture_id| {
                         (texture_id, target_index as i32)
                     });
                     self.draw_color_target(render_target,
                                            target,
                                            *size,
                                            clear_color,
-                                           &frame.render_task_data,
+                                           &frame.render_tasks,
                                            &projection);
 
                 }
 
                 self.texture_resolver.set_cache_textures(pass.alpha_texture_id, pass.color_texture_id);
 
                 // Return the texture IDs to the pool for next frame.
                 if let Some(texture_id) = pass.color_texture_id.take() {
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -3,38 +3,37 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use border::{BorderCornerInstance, BorderCornerSide};
 use device::TextureId;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use internal_types::BatchTextures;
 use internal_types::{FastHashMap, SourceTexture};
 use mask_cache::MaskCacheInfo;
-use prim_store::{CLIP_DATA_GPU_BLOCKS, DeferredResolve, PrimitiveCacheKey};
+use prim_store::{CLIP_DATA_GPU_BLOCKS, DeferredResolve};
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use profiler::FrameProfileCounters;
-use render_task::{AlphaRenderItem, MaskGeometryKind, MaskSegment, RenderTask, RenderTaskData};
-use render_task::{RenderTaskId, RenderTaskIndex, RenderTaskKey, RenderTaskKind};
-use render_task::RenderTaskLocation;
+use render_task::{AlphaRenderItem, MaskGeometryKind, MaskSegment};
+use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
+use render_task::{RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::ResourceCache;
-use std::{f32, i32, mem, usize};
+use std::{f32, i32, usize};
 use texture_allocator::GuillotineAllocator;
 use util::{TransformedRect, TransformedRectKind};
 use api::{BuiltDisplayList, ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize, FontInstance};
 use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
 use api::{LayerToWorldTransform, MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
 use api::{TileOffset, WorldToLayerTransform, YuvColorSpace, YuvFormat, LayerVector2D};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
-const OPAQUE_TASK_INDEX: RenderTaskIndex = RenderTaskIndex(i32::MAX as usize);
-
+const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(i32::MAX as u32);
 
 pub type DisplayListMap = FastHashMap<PipelineId, BuiltDisplayList>;
 
 trait AlphaBatchHelpers {
     fn get_blend_mode(&self,
                       needs_blending: bool,
                       metadata: &PrimitiveMetadata) -> BlendMode;
 }
@@ -93,86 +92,20 @@ pub enum PrimitiveFlags {
 #[derive(Debug, Copy, Clone)]
 pub struct RenderTargetIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub struct RenderPassIndex(isize);
 
 #[derive(Debug)]
 struct DynamicTaskInfo {
-    index: RenderTaskIndex,
+    task_id: RenderTaskId,
     rect: DeviceIntRect,
 }
 
-#[derive(Debug)]
-pub struct RenderTaskCollection {
-    pub render_task_data: Vec<RenderTaskData>,
-    dynamic_tasks: FastHashMap<(RenderTaskKey, RenderPassIndex), DynamicTaskInfo>,
-}
-
-impl RenderTaskCollection {
-    pub fn new(static_render_task_count: usize) -> RenderTaskCollection {
-        RenderTaskCollection {
-            render_task_data: vec![RenderTaskData::empty(); static_render_task_count],
-            dynamic_tasks: FastHashMap::default(),
-        }
-    }
-
-    fn add(&mut self, task: &RenderTask, pass: RenderPassIndex) -> RenderTaskIndex {
-        match task.id {
-            RenderTaskId::Static(index) => {
-                self.render_task_data[index.0] = task.write_task_data();
-                index
-            }
-            RenderTaskId::Dynamic(key) => {
-                let index = RenderTaskIndex(self.render_task_data.len());
-                let key = (key, pass);
-                debug_assert!(!self.dynamic_tasks.contains_key(&key));
-                self.dynamic_tasks.insert(key, DynamicTaskInfo {
-                    index,
-                    rect: match task.location {
-                        RenderTaskLocation::Fixed => panic!("Dynamic tasks should not have fixed locations!"),
-                        RenderTaskLocation::Dynamic(Some((origin, _)), size) => DeviceIntRect::new(origin, size),
-                        RenderTaskLocation::Dynamic(None, _) => panic!("Expect the task to be already allocated here"),
-                    },
-                });
-                self.render_task_data.push(task.write_task_data());
-                index
-            }
-        }
-    }
-
-    fn get_dynamic_allocation(&self, pass_index: RenderPassIndex, key: RenderTaskKey) -> Option<&DeviceIntRect> {
-        let key = (key, pass_index);
-        self.dynamic_tasks.get(&key)
-                          .map(|task| &task.rect)
-    }
-
-    fn get_static_task_index(&self, id: &RenderTaskId) -> RenderTaskIndex {
-        match id {
-            &RenderTaskId::Static(index) => index,
-            &RenderTaskId::Dynamic(..) => panic!("This is a bug - expected a static render task!"),
-        }
-    }
-
-    fn get_task_index(&self, id: &RenderTaskId, pass_index: RenderPassIndex) -> RenderTaskIndex {
-        match id {
-            &RenderTaskId::Static(index) => index,
-            &RenderTaskId::Dynamic(key) => {
-                self.dynamic_tasks[&(key, pass_index)].index
-            }
-        }
-    }
-}
-
-struct AlphaBatchTask {
-    task_id: RenderTaskId,
-    items: Vec<AlphaRenderItem>,
-}
-
 pub struct BatchList {
     pub alpha_batches: Vec<PrimitiveBatch>,
     pub opaque_batches: Vec<PrimitiveBatch>,
 }
 
 impl BatchList {
     fn new() -> BatchList {
         BatchList {
@@ -202,31 +135,34 @@ impl BatchList {
         };
 
         let mut selected_batch_index = None;
 
         // Composites always get added to their own batch.
         // This is because the result of a composite can affect
         // the input to the next composite. Perhaps we can
         // optimize this in the future.
-        if key.kind != AlphaBatchKind::Composite {
-            'outer: for (batch_index, batch) in batches.iter()
-                                                       .enumerate()
-                                                       .rev()
-                                                       .take(10) {
-                if batch.key.is_compatible_with(key) {
-                    selected_batch_index = Some(batch_index);
-                    break;
-                }
+        match key.kind {
+            AlphaBatchKind::Composite { .. } => {}
+            _ => {
+                'outer: for (batch_index, batch) in batches.iter()
+                                                           .enumerate()
+                                                           .rev()
+                                                           .take(10) {
+                    if batch.key.is_compatible_with(key) {
+                        selected_batch_index = Some(batch_index);
+                        break;
+                    }
 
-                // check for intersections
-                if check_intersections {
-                    for item_rect in &batch.item_rects {
-                        if item_rect.intersects(item_bounding_rect) {
-                            break 'outer;
+                    // check for intersections
+                    if check_intersections {
+                        for item_rect in &batch.item_rects {
+                            if item_rect.intersects(item_bounding_rect) {
+                                break 'outer;
+                            }
                         }
                     }
                 }
             }
         }
 
         if selected_batch_index.is_none() {
             let new_batch = PrimitiveBatch::new(key.clone());
@@ -251,36 +187,36 @@ impl BatchList {
             batch.instances.reverse();
         }
     }
 }
 
 /// Encapsulates the logic of building batches for items that are blended.
 pub struct AlphaBatcher {
     pub batch_list: BatchList,
-    tasks: Vec<AlphaBatchTask>,
+    tasks: Vec<RenderTaskId>,
 }
 
 impl AlphaRenderItem {
     fn add_to_batch(&self,
                     batch_list: &mut BatchList,
                     ctx: &RenderTargetContext,
                     gpu_cache: &mut GpuCache,
-                    render_tasks: &RenderTaskCollection,
-                    child_pass_index: RenderPassIndex,
-                    task_index: RenderTaskIndex,
+                    render_tasks: &RenderTaskTree,
+                    task_id: RenderTaskId,
+                    task_address: RenderTaskAddress,
                     deferred_resolves: &mut Vec<DeferredResolve>) {
         match *self {
             AlphaRenderItem::Blend(stacking_context_index, src_id, filter, z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let key = AlphaBatchKey::new(AlphaBatchKind::Blend,
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::PremultipliedAlpha,
                                              BatchTextures::no_texture());
-                let src_task_index = render_tasks.get_static_task_index(&src_id);
+                let src_task_address = render_tasks.get_task_address(src_id);
 
                 let (filter_mode, amount) = match filter {
                     // TODO: Implement blur filter #1351
                     FilterOp::Blur(..) => (0, 0.0),
                     FilterOp::Contrast(amount) => (1, amount),
                     FilterOp::Grayscale(amount) => (2, amount),
                     FilterOp::HueRotate(angle) => (3, angle),
                     FilterOp::Invert(amount) => (4, amount),
@@ -289,60 +225,60 @@ impl AlphaRenderItem {
                     FilterOp::Brightness(amount) => (7, amount),
                     FilterOp::Opacity(PropertyBinding::Value(amount)) => (8, amount),
                     FilterOp::Opacity(_) => unreachable!(),
                 };
 
                 let amount = (amount * 65535.0).round() as i32;
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
 
-                let instance = CompositePrimitiveInstance::new(task_index,
-                                                               src_task_index,
-                                                               RenderTaskIndex(0),
+                let instance = CompositePrimitiveInstance::new(task_address,
+                                                               src_task_address,
+                                                               RenderTaskAddress(0),
                                                                filter_mode,
                                                                amount,
                                                                z);
 
                 batch.add_instance(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::HardwareComposite(stacking_context_index, src_id, composite_op, z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
-                let src_task_index = render_tasks.get_static_task_index(&src_id);
+                let src_task_address = render_tasks.get_task_address(src_id);
                 let key = AlphaBatchKey::new(AlphaBatchKind::HardwareComposite,
                                              AlphaBatchKeyFlags::empty(),
                                              composite_op.to_blend_mode(),
                                              BatchTextures::no_texture());
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
 
-                let instance = CompositePrimitiveInstance::new(task_index,
-                                                               src_task_index,
-                                                               RenderTaskIndex(0),
+                let instance = CompositePrimitiveInstance::new(task_address,
+                                                               src_task_address,
+                                                               RenderTaskAddress(0),
                                                                0,
                                                                0,
                                                                z);
 
                 batch.add_instance(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Composite(stacking_context_index,
+                                       source_id,
                                        backdrop_id,
-                                       src_id,
                                        mode,
                                        z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
-                let key = AlphaBatchKey::new(AlphaBatchKind::Composite,
+                let key = AlphaBatchKey::new(AlphaBatchKind::Composite { task_id, source_id, backdrop_id },
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::Alpha,
                                              BatchTextures::no_texture());
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
-                let backdrop_task = render_tasks.get_task_index(&backdrop_id, child_pass_index);
-                let src_task_index = render_tasks.get_static_task_index(&src_id);
+                let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
+                let source_task_address = render_tasks.get_task_address(source_id);
 
-                let instance = CompositePrimitiveInstance::new(task_index,
-                                                               src_task_index,
-                                                               backdrop_task,
+                let instance = CompositePrimitiveInstance::new(task_address,
+                                                               source_task_address,
+                                                               backdrop_task_address,
                                                                mode as u32 as i32,
                                                                0,
                                                                z);
 
                 batch.add_instance(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Primitive(clip_scroll_group_index_opt, prim_index, z) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
@@ -358,35 +294,30 @@ impl AlphaRenderItem {
                 let mut flags = AlphaBatchKeyFlags::empty();
                 if needs_clipping {
                     flags |= NEEDS_CLIPPING;
                 }
                 if transform_kind == TransformedRectKind::AxisAligned {
                     flags |= AXIS_ALIGNED;
                 }
                 let item_bounding_rect = ctx.prim_store.cpu_bounding_rects[prim_index.0].as_ref().unwrap();
-                let clip_task_index = match prim_metadata.clip_task {
-                    Some(ref clip_task) => {
-                        render_tasks.get_task_index(&clip_task.id, child_pass_index)
-                    }
-                    None => {
-                        OPAQUE_TASK_INDEX
-                    }
-                };
+                let clip_task_address = prim_metadata.clip_task_id.map_or(OPAQUE_TASK_ADDRESS, |id| {
+                    render_tasks.get_task_address(id)
+                });
                 let needs_blending = !prim_metadata.opacity.is_opaque ||
                                      needs_clipping ||
                                      transform_kind == TransformedRectKind::Complex;
                 let blend_mode = ctx.prim_store.get_blend_mode(needs_blending, prim_metadata);
 
                 let prim_cache_address = prim_metadata.gpu_location
                                                       .as_int(gpu_cache);
 
                 let base_instance = SimplePrimitiveInstance::new(prim_cache_address,
-                                                                 task_index,
-                                                                 clip_task_index,
+                                                                 task_address,
+                                                                 clip_task_address,
                                                                  packed_layer_index,
                                                                  z);
 
                 let no_textures = BatchTextures::no_texture();
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Border => {
                         let border_cpu = &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
@@ -497,23 +428,22 @@ impl AlphaRenderItem {
 
                             let key = AlphaBatchKey::new(AlphaBatchKind::TextRun, flags, blend_mode, textures);
                             let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
                             batch.add_instances(&instances);
                         }
                     }
                     PrimitiveKind::TextShadow => {
-                        let cache_task_id = prim_metadata.render_task.as_ref().expect("no render task!").id;
-                        let cache_task_index = render_tasks.get_task_index(&cache_task_id,
-                                                                           child_pass_index);
+                        let cache_task_id = prim_metadata.render_task_id.expect("no render task!");
+                        let cache_task_address = render_tasks.get_task_address(cache_task_id);
                         let textures = BatchTextures::render_target_cache();
                         let key = AlphaBatchKey::new(AlphaBatchKind::CacheImage, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, cache_task_index.0 as i32, 0));
+                        batch.add_instance(base_instance.build(0, cache_task_address.0 as i32, 0));
                     }
                     PrimitiveKind::AlignedGradient => {
                         let gradient_cpu = &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
                         let key = AlphaBatchKey::new(AlphaBatchKind::AlignedGradient, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
                         for part_index in 0..(gradient_cpu.stops_count - 1) {
                             batch.add_instance(base_instance.build(part_index as i32, 0, 0));
                         }
@@ -588,43 +518,42 @@ impl AlphaRenderItem {
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
                         batch.add_instance(base_instance.build(uv_rect_addresses[0],
                                                                uv_rect_addresses[1],
                                                                uv_rect_addresses[2]));
                     }
                     PrimitiveKind::BoxShadow => {
                         let box_shadow = &ctx.prim_store.cpu_box_shadows[prim_metadata.cpu_prim_index.0];
-                        let cache_task_id = &prim_metadata.render_task.as_ref().unwrap().id;
-                        let cache_task_index = render_tasks.get_task_index(cache_task_id,
-                                                                           child_pass_index);
+                        let cache_task_id = prim_metadata.render_task_id.unwrap();
+                        let cache_task_address = render_tasks.get_task_address(cache_task_id);
 
                         let key = AlphaBatchKey::new(AlphaBatchKind::BoxShadow, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
                         for rect_index in 0..box_shadow.rects.len() {
                             batch.add_instance(base_instance.build(rect_index as i32,
-                                                                   cache_task_index.0 as i32, 0));
+                                                                   cache_task_address.0 as i32, 0));
                         }
                     }
                 }
             }
             AlphaRenderItem::SplitComposite(sc_index, task_id, gpu_handle, z) => {
                 let key = AlphaBatchKey::new(AlphaBatchKind::SplitComposite,
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::PremultipliedAlpha,
                                              BatchTextures::no_texture());
                 let stacking_context = &ctx.stacking_context_store[sc_index.0];
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
-                let source_task = render_tasks.get_task_index(&task_id, child_pass_index);
+                let source_task_address = render_tasks.get_task_address(task_id);
                 let gpu_address = gpu_handle.as_int(gpu_cache);
 
-                let instance = CompositePrimitiveInstance::new(task_index,
-                                                               source_task,
-                                                               RenderTaskIndex(0),
+                let instance = CompositePrimitiveInstance::new(task_address,
+                                                               source_task_address,
+                                                               RenderTaskAddress(0),
                                                                gpu_address,
                                                                0,
                                                                z);
 
                 batch.add_instance(PrimitiveInstance::from(instance));
             }
         }
     }
@@ -633,36 +562,37 @@ impl AlphaRenderItem {
 impl AlphaBatcher {
     fn new() -> AlphaBatcher {
         AlphaBatcher {
             tasks: Vec::new(),
             batch_list: BatchList::new(),
         }
     }
 
-    fn add_task(&mut self, task: AlphaBatchTask) {
-        self.tasks.push(task);
+    fn add_task(&mut self, task_id: RenderTaskId) {
+        self.tasks.push(task_id);
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
              gpu_cache: &mut GpuCache,
-             render_tasks: &RenderTaskCollection,
-             child_pass_index: RenderPassIndex,
+             render_tasks: &RenderTaskTree,
              deferred_resolves: &mut Vec<DeferredResolve>) {
-        for task in &self.tasks {
-            let task_index = render_tasks.get_static_task_index(&task.task_id);
+        for task_id in &self.tasks {
+            let task_id = *task_id;
+            let task = render_tasks.get(task_id).as_alpha_batch();
+            let task_address = render_tasks.get_task_address(task_id);
 
             for item in &task.items {
                 item.add_to_batch(&mut self.batch_list,
                                   ctx,
                                   gpu_cache,
                                   render_tasks,
-                                  child_pass_index,
-                                  task_index,
+                                  task_id,
+                                  task_address,
                                   deferred_resolves);
             }
         }
 
         self.batch_list.finalize();
     }
 
     pub fn is_empty(&self) -> bool {
@@ -688,25 +618,25 @@ impl ClipBatcher {
             rectangles: Vec::new(),
             images: FastHashMap::default(),
             border_clears: Vec::new(),
             borders: Vec::new(),
         }
     }
 
     fn add<'a>(&mut self,
-               task_index: RenderTaskIndex,
+               task_address: RenderTaskAddress,
                clips: &[(PackedLayerIndex, MaskCacheInfo)],
                resource_cache: &ResourceCache,
                gpu_cache: &GpuCache,
                geometry_kind: MaskGeometryKind) {
 
         for &(packed_layer_index, ref info) in clips.iter() {
             let instance = CacheClipInstance {
-                render_task_index: task_index.0 as i32,
+                render_task_address: task_address.0 as i32,
                 layer_index: packed_layer_index.0 as i32,
                 segment: 0,
                 clip_data_address: GpuCacheAddress::invalid(),
                 resource_address: GpuCacheAddress::invalid(),
             };
 
             if !info.complex_clip_range.is_empty() {
                 let base_gpu_address = gpu_cache.get_address(&info.complex_clip_range.location);
@@ -841,25 +771,23 @@ impl TextureAllocator {
 }
 
 pub trait RenderTarget {
     fn new(size: DeviceUintSize) -> Self;
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint>;
     fn build(&mut self,
              _ctx: &RenderTargetContext,
              _gpu_cache: &mut GpuCache,
-             _render_tasks: &mut RenderTaskCollection,
-             _child_pass_index: RenderPassIndex,
+             _render_tasks: &mut RenderTaskTree,
              _deferred_resolves: &mut Vec<DeferredResolve>) {}
     fn add_task(&mut self,
-                task: RenderTask,
+                task_id: RenderTaskId,
                 ctx: &RenderTargetContext,
                 gpu_cache: &GpuCache,
-                render_tasks: &RenderTaskCollection,
-                pass_index: RenderPassIndex);
+                render_tasks: &RenderTaskTree);
     fn used_rect(&self) -> DeviceIntRect;
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum RenderTargetKind {
     Color,   // RGBA32
     Alpha,   // R8
 }
@@ -884,32 +812,29 @@ impl<T: RenderTarget> RenderTargetList<T
 
     pub fn target_count(&self) -> usize {
         self.targets.len()
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
              gpu_cache: &mut GpuCache,
-             render_tasks: &mut RenderTaskCollection,
-             pass_index: RenderPassIndex,
+             render_tasks: &mut RenderTaskTree,
              deferred_resolves: &mut Vec<DeferredResolve>) {
         for target in &mut self.targets {
-            let child_pass_index = RenderPassIndex(pass_index.0 - 1);
-            target.build(ctx, gpu_cache, render_tasks, child_pass_index, deferred_resolves);
+            target.build(ctx, gpu_cache, render_tasks, deferred_resolves);
         }
     }
 
     fn add_task(&mut self,
-                task: RenderTask,
+                task_id: RenderTaskId,
                 ctx: &RenderTargetContext,
                 gpu_cache: &GpuCache,
-                render_tasks: &mut RenderTaskCollection,
-                pass_index: RenderPassIndex) {
-        self.targets.last_mut().unwrap().add_task(task, ctx, gpu_cache, render_tasks, pass_index);
+                render_tasks: &mut RenderTaskTree) {
+        self.targets.last_mut().unwrap().add_task(task_id, ctx, gpu_cache, render_tasks);
     }
 
     fn allocate(&mut self, alloc_size: DeviceUintSize) -> (DeviceUintPoint, RenderTargetIndex) {
         let existing_origin = self.targets
                                   .last_mut()
                                   .and_then(|target| target.allocate(alloc_size));
 
         let origin = match existing_origin {
@@ -970,93 +895,84 @@ impl RenderTarget for ColorRenderTarget 
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator.used_rect
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
              gpu_cache: &mut GpuCache,
-             render_tasks: &mut RenderTaskCollection,
-             child_pass_index: RenderPassIndex,
+             render_tasks: &mut RenderTaskTree,
              deferred_resolves: &mut Vec<DeferredResolve>) {
         self.alpha_batcher.build(ctx,
                                  gpu_cache,
                                  render_tasks,
-                                 child_pass_index,
                                  deferred_resolves);
     }
 
     fn add_task(&mut self,
-                task: RenderTask,
+                task_id: RenderTaskId,
                 ctx: &RenderTargetContext,
                 gpu_cache: &GpuCache,
-                render_tasks: &RenderTaskCollection,
-                pass_index: RenderPassIndex) {
+                render_tasks: &RenderTaskTree) {
+        let task = render_tasks.get(task_id);
+
         match task.kind {
-            RenderTaskKind::Alpha(mut info) => {
-                self.alpha_batcher.add_task(AlphaBatchTask {
-                    task_id: task.id,
-                    items: mem::replace(&mut info.items, Vec::new()),
-                });
+            RenderTaskKind::Alias(..) => {
+                panic!("BUG: add_task() called on invalidated task");
             }
-            RenderTaskKind::VerticalBlur(_, prim_index) => {
+            RenderTaskKind::Alpha(..) => {
+                self.alpha_batcher.add_task(task_id);
+            }
+            RenderTaskKind::VerticalBlur(..) => {
                 // Find the child render task that we are applying
                 // a vertical blur on.
-                // TODO(gw): Consider a simpler way for render tasks to find
-                //           their child tasks than having to construct the
-                //           correct id here.
-                let child_pass_index = RenderPassIndex(pass_index.0 - 1);
-                let task_key = RenderTaskKey::CachePrimitive(PrimitiveCacheKey::TextShadow(prim_index));
-                let src_id = RenderTaskId::Dynamic(task_key);
                 self.vertical_blurs.push(BlurCommand {
-                    task_id: render_tasks.get_task_index(&task.id, pass_index).0 as i32,
-                    src_task_id: render_tasks.get_task_index(&src_id, child_pass_index).0 as i32,
+                    task_id: task_id.0 as i32,
+                    src_task_id: task.children[0].0 as i32,
                     blur_direction: BlurDirection::Vertical as i32,
                 });
             }
-            RenderTaskKind::HorizontalBlur(blur_radius, prim_index) => {
+            RenderTaskKind::HorizontalBlur(..) => {
                 // Find the child render task that we are applying
                 // a horizontal blur on.
-                let child_pass_index = RenderPassIndex(pass_index.0 - 1);
-                let src_id = RenderTaskId::Dynamic(RenderTaskKey::VerticalBlur(blur_radius.0, prim_index));
                 self.horizontal_blurs.push(BlurCommand {
-                    task_id: render_tasks.get_task_index(&task.id, pass_index).0 as i32,
-                    src_task_id: render_tasks.get_task_index(&src_id, child_pass_index).0 as i32,
+                    task_id: task_id.0 as i32,
+                    src_task_id: task.children[0].0 as i32,
                     blur_direction: BlurDirection::Horizontal as i32,
                 });
             }
             RenderTaskKind::CachePrimitive(prim_index) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
 
                 let prim_address = prim_metadata.gpu_location.as_int(gpu_cache);
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::BoxShadow => {
                         let instance = SimplePrimitiveInstance::new(prim_address,
-                                                                    render_tasks.get_task_index(&task.id, pass_index),
-                                                                    RenderTaskIndex(0),
+                                                                    render_tasks.get_task_address(task_id),
+                                                                    RenderTaskAddress(0),
                                                                     PackedLayerIndex(0),
                                                                     0);     // z is disabled for rendering cache primitives
                         self.box_shadow_cache_prims.push(instance.build(0, 0, 0));
                     }
                     PrimitiveKind::TextShadow => {
                         let prim = &ctx.prim_store.cpu_text_shadows[prim_metadata.cpu_prim_index.0];
 
                         // todo(gw): avoid / recycle this allocation...
                         let mut instances = Vec::new();
 
-                        let task_index = render_tasks.get_task_index(&task.id, pass_index);
+                        let task_index = render_tasks.get_task_address(task_id);
 
                         for sub_prim_index in &prim.primitives {
                             let sub_metadata = ctx.prim_store.get_metadata(*sub_prim_index);
                             let sub_prim_address = sub_metadata.gpu_location.as_int(gpu_cache);
                             let instance = SimplePrimitiveInstance::new(sub_prim_address,
                                                                         task_index,
-                                                                        RenderTaskIndex(0),
+                                                                        RenderTaskAddress(0),
                                                                         PackedLayerIndex(0),
                                                                         0);     // z is disabled for rendering cache primitives
 
                             match sub_metadata.prim_kind {
                                 PrimitiveKind::TextRun => {
                                     // Add instances that reference the text run GPU location. Also supply
                                     // the parent text-shadow prim address as a user data field, allowing
                                     // the shader to fetch the text-shadow parameters.
@@ -1135,91 +1051,74 @@ impl RenderTarget for AlphaRenderTarget 
         }
     }
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator.used_rect
     }
 
     fn add_task(&mut self,
-                task: RenderTask,
+                task_id: RenderTaskId,
                 ctx: &RenderTargetContext,
                 gpu_cache: &GpuCache,
-                render_tasks: &RenderTaskCollection,
-                pass_index: RenderPassIndex) {
+                render_tasks: &RenderTaskTree) {
+        let task = render_tasks.get(task_id);
         match task.kind {
+            RenderTaskKind::Alias(..) => {
+                panic!("BUG: add_task() called on invalidated task");
+            }
             RenderTaskKind::Alpha(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::CachePrimitive(..) |
             RenderTaskKind::Readback(..) => {
                 panic!("Should not be added to alpha target!");
             }
             RenderTaskKind::CacheMask(ref task_info) => {
-                let task_index = render_tasks.get_task_index(&task.id, pass_index);
-                self.clip_batcher.add(task_index,
+                let task_address = render_tasks.get_task_address(task_id);
+                self.clip_batcher.add(task_address,
                                       &task_info.clips,
                                       &ctx.resource_cache,
                                       gpu_cache,
                                       task_info.geometry_kind);
             }
         }
     }
 }
 
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
 ///
 /// A render pass can have several render targets if there wasn't enough space in one
 /// target to do all of the rendering for that pass.
 pub struct RenderPass {
-    pass_index: RenderPassIndex,
     pub is_framebuffer: bool,
-    tasks: Vec<RenderTask>,
+    tasks: Vec<RenderTaskId>,
     pub color_targets: RenderTargetList<ColorRenderTarget>,
     pub alpha_targets: RenderTargetList<AlphaRenderTarget>,
     pub color_texture_id: Option<TextureId>,
     pub alpha_texture_id: Option<TextureId>,
+    dynamic_tasks: FastHashMap<RenderTaskKey, DynamicTaskInfo>,
 }
 
 impl RenderPass {
-    pub fn new(pass_index: isize, is_framebuffer: bool, size: DeviceUintSize) -> RenderPass {
+    pub fn new(is_framebuffer: bool, size: DeviceUintSize) -> RenderPass {
         RenderPass {
-            pass_index: RenderPassIndex(pass_index),
             is_framebuffer,
             color_targets: RenderTargetList::new(size, is_framebuffer),
             alpha_targets: RenderTargetList::new(size, false),
             tasks: vec![],
             color_texture_id: None,
             alpha_texture_id: None,
+            dynamic_tasks: FastHashMap::default(),
         }
     }
 
-    pub fn add_render_task(&mut self, task: RenderTask) {
-        self.tasks.push(task);
-    }
-
-    fn add_task(&mut self,
-                task: RenderTask,
-                ctx: &RenderTargetContext,
-                gpu_cache: &GpuCache,
-                render_tasks: &mut RenderTaskCollection) {
-        match task.target_kind() {
-            RenderTargetKind::Color => self.color_targets.add_task(task, ctx, gpu_cache, render_tasks, self.pass_index),
-            RenderTargetKind::Alpha => self.alpha_targets.add_task(task, ctx, gpu_cache, render_tasks, self.pass_index),
-        }
-    }
-
-    fn allocate_target(&mut self,
-                       kind: RenderTargetKind,
-                       alloc_size: DeviceUintSize) -> (DeviceUintPoint, RenderTargetIndex) {
-        match kind {
-            RenderTargetKind::Color => self.color_targets.allocate(alloc_size),
-            RenderTargetKind::Alpha => self.alpha_targets.allocate(alloc_size),
-        }
+    pub fn add_render_task(&mut self, task_id: RenderTaskId) {
+        self.tasks.push(task_id);
     }
 
     pub fn needs_render_target_kind(&self, kind: RenderTargetKind) -> bool {
         if self.is_framebuffer {
             false
         } else {
             self.required_target_count(kind) > 0
         }
@@ -1231,67 +1130,90 @@ impl RenderPass {
             RenderTargetKind::Color => self.color_targets.target_count(),
             RenderTargetKind::Alpha => self.alpha_targets.target_count(),
         }
     }
 
     pub fn build(&mut self,
                  ctx: &RenderTargetContext,
                  gpu_cache: &mut GpuCache,
-                 render_tasks: &mut RenderTaskCollection,
+                 render_tasks: &mut RenderTaskTree,
                  deferred_resolves: &mut Vec<DeferredResolve>) {
         profile_scope!("RenderPass::build");
 
         // Step through each task, adding to batches as appropriate.
-        let tasks = mem::replace(&mut self.tasks, Vec::new());
-        for mut task in tasks {
-            let target_kind = task.target_kind();
+        for task_id in &self.tasks {
+            let task_id = *task_id;
+
+            let target_kind = {
+                let task = render_tasks.get_mut(task_id);
+                let target_kind = task.target_kind();
 
-            // Find a target to assign this task to, or create a new
-            // one if required.
-            match task.location {
-                RenderTaskLocation::Fixed => {}
-                RenderTaskLocation::Dynamic(ref mut origin, ref size) => {
-                    // See if this task is a duplicate.
-                    // If so, just skip adding it!
-                    match task.id {
-                        RenderTaskId::Static(..) => {}
-                        RenderTaskId::Dynamic(key) => {
-                            // Look up cache primitive key in the render
-                            // task data array. If a matching key exists
-                            // (that is in this pass) there is no need
-                            // to draw it again!
-                            if let Some(rect) = render_tasks.get_dynamic_allocation(self.pass_index, key) {
-                                debug_assert_eq!(rect.size, *size);
+                // Find a target to assign this task to, or create a new
+                // one if required.
+                match task.location {
+                    RenderTaskLocation::Fixed => {}
+                    RenderTaskLocation::Dynamic(_, size) => {
+                        if let Some(cache_key) = task.cache_key {
+                            // See if this task is a duplicate.
+                            // If so, just skip adding it!
+                            if let Some(task_info) = self.dynamic_tasks.get(&cache_key) {
+                                task.set_alias(task_info.task_id);
+                                debug_assert_eq!(task_info.rect.size, size);
                                 continue;
                             }
                         }
-                    }
 
-                    let alloc_size = DeviceUintSize::new(size.width as u32, size.height as u32);
-                    let (alloc_origin, target_index) = self.allocate_target(target_kind, alloc_size);
+                        let alloc_size = DeviceUintSize::new(size.width as u32, size.height as u32);
+                        let (alloc_origin, target_index) = match target_kind {
+                            RenderTargetKind::Color => self.color_targets.allocate(alloc_size),
+                            RenderTargetKind::Alpha => self.alpha_targets.allocate(alloc_size),
+                        };
+
+                        let origin = Some((DeviceIntPoint::new(alloc_origin.x as i32,
+                                                               alloc_origin.y as i32),
+                                           target_index));
+                        task.location = RenderTaskLocation::Dynamic(origin, size);
 
-                    *origin = Some((DeviceIntPoint::new(alloc_origin.x as i32,
-                                                        alloc_origin.y as i32),
-                                    target_index));
+                        // If this task is cacheable / sharable, store it in the task hash
+                        // for this pass.
+                        if let Some(cache_key) = task.cache_key {
+                            self.dynamic_tasks.insert(cache_key, DynamicTaskInfo {
+                                task_id,
+                                rect: match task.location {
+                                    RenderTaskLocation::Fixed => panic!("Dynamic tasks should not have fixed locations!"),
+                                    RenderTaskLocation::Dynamic(Some((origin, _)), size) => DeviceIntRect::new(origin, size),
+                                    RenderTaskLocation::Dynamic(None, _) => panic!("Expect the task to be already allocated here"),
+                                },
+                            });
+                        }
+                    }
                 }
-            }
+
+                target_kind
+            };
 
-            render_tasks.add(&task, self.pass_index);
-            self.add_task(task, ctx, gpu_cache, render_tasks);
+            match target_kind {
+                RenderTargetKind::Color => self.color_targets.add_task(task_id, ctx, gpu_cache, render_tasks),
+                RenderTargetKind::Alpha => self.alpha_targets.add_task(task_id, ctx, gpu_cache, render_tasks),
+            }
         }
 
-        self.color_targets.build(ctx, gpu_cache, render_tasks, self.pass_index, deferred_resolves);
-        self.alpha_targets.build(ctx, gpu_cache, render_tasks, self.pass_index, deferred_resolves);
+        self.color_targets.build(ctx, gpu_cache, render_tasks, deferred_resolves);
+        self.alpha_targets.build(ctx, gpu_cache, render_tasks, deferred_resolves);
     }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum AlphaBatchKind {
-    Composite,
+    Composite {
+        task_id: RenderTaskId,
+        source_id: RenderTaskId,
+        backdrop_id: RenderTaskId,
+    },
     HardwareComposite,
     SplitComposite,
     Blend,
     Rectangle,
     TextRun,
     Image(ImageBufferKind),
     YuvImage(ImageBufferKind, YuvFormat, YuvColorSpace),
     AlignedGradient,
@@ -1377,17 +1299,17 @@ pub struct BlurCommand {
 }
 
 /// A clipping primitive drawn into the clipping mask.
 /// Could be an image or a rectangle, which defines the
 /// way `address` is treated.
 #[derive(Clone, Copy, Debug)]
 #[repr(C)]
 pub struct CacheClipInstance {
-    render_task_index: i32,
+    render_task_address: i32,
     layer_index: i32,
     segment: i32,
     clip_data_address: GpuCacheAddress,
     resource_address: GpuCacheAddress,
 }
 
 // 32 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
@@ -1406,18 +1328,18 @@ struct SimplePrimitiveInstance {
     pub task_index: i32,
     pub clip_task_index: i32,
     pub layer_index: i32,
     pub z_sort_index: i32,
 }
 
 impl SimplePrimitiveInstance {
     fn new(specific_prim_address: i32,
-           task_index: RenderTaskIndex,
-           clip_task_index: RenderTaskIndex,
+           task_index: RenderTaskAddress,
+           clip_task_index: RenderTaskAddress,
            layer_index: PackedLayerIndex,
            z_sort_index: i32) -> SimplePrimitiveInstance {
         SimplePrimitiveInstance {
             specific_prim_address,
             task_index: task_index.0 as i32,
             clip_task_index: clip_task_index.0 as i32,
             layer_index: layer_index.0 as i32,
             z_sort_index,
@@ -1436,72 +1358,59 @@ impl SimplePrimitiveInstance {
                 data1,
                 data2,
             ]
         }
     }
 }
 
 pub struct CompositePrimitiveInstance {
-    pub task_index: RenderTaskIndex,
-    pub src_task_index: RenderTaskIndex,
-    pub backdrop_task_index: RenderTaskIndex,
+    pub task_address: RenderTaskAddress,
+    pub src_task_address: RenderTaskAddress,
+    pub backdrop_task_address: RenderTaskAddress,
     pub data0: i32,
     pub data1: i32,
     pub z: i32,
 }
 
 impl CompositePrimitiveInstance {
-    fn new(task_index: RenderTaskIndex,
-           src_task_index: RenderTaskIndex,
-           backdrop_task_index: RenderTaskIndex,
+    fn new(task_address: RenderTaskAddress,
+           src_task_address: RenderTaskAddress,
+           backdrop_task_address: RenderTaskAddress,
            data0: i32,
            data1: i32,
            z: i32) -> CompositePrimitiveInstance {
         CompositePrimitiveInstance {
-            task_index,
-            src_task_index,
-            backdrop_task_index,
+            task_address,
+            src_task_address,
+            backdrop_task_address,
             data0,
             data1,
             z,
         }
     }
 }
 
 impl From<CompositePrimitiveInstance> for PrimitiveInstance {
     fn from(instance: CompositePrimitiveInstance) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
-                instance.task_index.0 as i32,
-                instance.src_task_index.0 as i32,
-                instance.backdrop_task_index.0 as i32,
+                instance.task_address.0 as i32,
+                instance.src_task_address.0 as i32,
+                instance.backdrop_task_address.0 as i32,
                 instance.z,
                 instance.data0,
                 instance.data1,
                 0,
                 0,
             ]
         }
     }
 }
 
-impl<'a> From<&'a PrimitiveInstance> for CompositePrimitiveInstance {
-    fn from(instance: &'a PrimitiveInstance) -> CompositePrimitiveInstance {
-        CompositePrimitiveInstance {
-            task_index: RenderTaskIndex(instance.data[0] as usize),
-            src_task_index: RenderTaskIndex(instance.data[1] as usize),
-            backdrop_task_index: RenderTaskIndex(instance.data[2] as usize),
-            z: instance.data[3],
-            data0: instance.data[4],
-            data1: instance.data[5],
-        }
-    }
-}
-
 #[derive(Debug)]
 pub struct PrimitiveBatch {
     pub key: AlphaBatchKey,
     pub instances: Vec<PrimitiveInstance>,
     pub item_rects: Vec<DeviceIntRect>,
 }
 
 impl PrimitiveBatch {
@@ -1703,17 +1612,18 @@ pub struct Frame {
     pub window_size: DeviceUintSize,
     pub background_color: Option<ColorF>,
     pub device_pixel_ratio: f32,
     pub cache_size: DeviceUintSize,
     pub passes: Vec<RenderPass>,
     pub profile_counters: FrameProfileCounters,
 
     pub layer_texture_data: Vec<PackedLayer>,
-    pub render_task_data: Vec<RenderTaskData>,
+
+    pub render_tasks: RenderTaskTree,
 
     // List of updates that need to be pushed to the
     // gpu resource cache.
     pub gpu_cache_updates: Option<GpuCacheUpdateList>,
 
     // List of textures that we don't know about yet
     // from the backend thread. The render thread
     // will use a callback to resolve these and
--- a/gfx/webrender/tests/angle_shader_validation.rs
+++ b/gfx/webrender/tests/angle_shader_validation.rs
@@ -1,85 +1,141 @@
 extern crate angle;
-#[macro_use]
-extern crate lazy_static;
 extern crate webrender;
 
 use angle::hl::{BuiltInResources, Output, ShaderSpec, ShaderValidator};
 
-include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
-
-
 // from glslang
 const FRAGMENT_SHADER: u32 = 0x8B30;
 const VERTEX_SHADER: u32 = 0x8B31;
 
+struct Shader {
+    name: &'static str,
+    features: &'static [&'static str],
+}
+
+const SHADER_PREFIX: &str = "#define WR_MAX_VERTEX_TEXTURE_WIDTH 1024\n";
+
+const CLIP_FEATURES: &[&str] = &["TRANSFORM"];
+const CACHE_FEATURES: &[&str] = &[""];
+const PRIM_FEATURES: &[&str] = &["", "TRANSFORM"];
+
+const SHADERS: &[Shader] = &[
+    // Clip mask shaders
+    Shader {
+        name: "cs_clip_rectangle", features: CLIP_FEATURES,
+    },
+    Shader {
+        name: "cs_clip_image", features: CLIP_FEATURES,
+    },
+    Shader {
+        name: "cs_clip_border", features: CLIP_FEATURES,
+    },
+
+    // Cache shaders
+    Shader {
+        name: "cs_blur", features: CACHE_FEATURES,
+    },
+    Shader {
+        name: "cs_text_run", features: CACHE_FEATURES,
+    },
+    Shader {
+        name: "cs_box_shadow", features: CACHE_FEATURES,
+    },
+
+    // Prim shaders
+    Shader {
+        name: "ps_line", features: &["", "TRANSFORM", "CACHE"],
+    },
+    Shader {
+        name: "ps_border_corner", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_border_edge", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_box_shadow", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_gradient", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_angle_gradient", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_radial_gradient", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_cache_image", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_blend", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_composite", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_hardware_composite", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_split_composite", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_image", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_yuv_image", features: PRIM_FEATURES,
+    },
+    Shader {
+        name: "ps_text_run", features: &["", "TRANSFORM", "SUBPIXEL_AA_FEATURE"],
+    },
+    Shader {
+        name: "ps_rectangle", features: &["", "TRANSFORM", "CLIP_FEATURE", "TRANSFORM,CLIP_FEATURE"],
+    },
+];
+
+const VERSION_STRING: &str = "#version 300 es\n";
 
 #[test]
 fn validate_shaders() {
     angle::hl::initialize().unwrap();
 
-    let shared_src = SHADERS.get("shared").unwrap();
-    let prim_shared_src = SHADERS.get("prim_shared").unwrap();
-    let clip_shared_src = SHADERS.get("clip_shared").unwrap();
-
-    for (filename, file_source) in SHADERS.iter() {
-        let is_prim = filename.starts_with("ps_");
-        let is_clip = filename.starts_with("cs_");
-        let is_vert = filename.ends_with(".vs");
-        let is_frag = filename.ends_with(".fs");
-        if !(is_prim ^ is_clip) || !(is_vert ^ is_frag) {
-            continue;
-        }
-
+    let resources = BuiltInResources::default();
+    let vs_validator = ShaderValidator::new(VERTEX_SHADER,
+                                            ShaderSpec::Gles3,
+                                            Output::Essl,
+                                            &resources).unwrap();
 
-        let base_filename = filename.splitn(2, '.').next().unwrap();
-        let mut shader_prefix = format!("#version 300 es\n
-            // Base shader: {}\n
-            #define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n",
-            base_filename, webrender::renderer::MAX_VERTEX_TEXTURE_WIDTH);
-
-        if is_vert {
-            shader_prefix.push_str("#define WR_VERTEX_SHADER\n");
-        } else {
-            shader_prefix.push_str("#define WR_FRAGMENT_SHADER\n");
-        }
-
-        let mut build_configs = vec!["#define WR_FEATURE_TRANSFORM\n"];
-        if is_prim {
-            // the transform feature may be disabled for the prim shaders
-            build_configs.push("// WR_FEATURE_TRANSFORM disabled\n");
-        }
+    let fs_validator = ShaderValidator::new(FRAGMENT_SHADER,
+                                            ShaderSpec::Gles3,
+                                            Output::Essl,
+                                            &resources).unwrap();
 
-        for config_prefix in build_configs {
-            let mut shader_source = String::new();
-            shader_source.push_str(shader_prefix.as_str());
-            shader_source.push_str(config_prefix);
-            shader_source.push_str(shared_src);
-            shader_source.push_str(prim_shared_src);
-            if is_clip {
-                shader_source.push_str(clip_shared_src);
-            }
-            if let Some(optional_src) = SHADERS.get(base_filename) {
-                shader_source.push_str(optional_src);
-            }
-            shader_source.push_str(file_source);
-
+    for shader in SHADERS {
+        for config in shader.features {
+            let mut features = String::new();
+            features.push_str(SHADER_PREFIX);
 
-            let gl_type = if is_vert { VERTEX_SHADER } else { FRAGMENT_SHADER };
-            let resources = BuiltInResources::default();
-            let validator = ShaderValidator::new(gl_type,
-                                                 ShaderSpec::Gles3,
-                                                 Output::Essl,
-                                                 &resources).unwrap();
+            for feature in config.split(",") {
+                features.push_str(&format!("#define WR_FEATURE_{}", feature));
+            }
 
-            match validator.compile_and_translate(&[&shader_source]) {
-                Ok(_) => {
-                    println!("Shader translated succesfully: {}", filename);
-                },
-                Err(_) => {
-                    panic!("Shader compilation failed: {}\n{}",
-                        filename, validator.info_log());
-                },
-            }
+            let (vs, fs) = webrender::build_shader_strings(VERSION_STRING,
+                                                           &features,
+                                                           shader.name,
+                                                           &None);
+
+            validate(&vs_validator, shader.name, vs);
+            validate(&fs_validator, shader.name, fs);
         }
     }
 }
+
+fn validate(validator: &ShaderValidator, name: &str, source: String) {
+    match validator.compile_and_translate(&[&source]) {
+        Ok(_) => {
+            println!("Shader translated succesfully: {}", name);
+        },
+        Err(_) => {
+            panic!("Shader compilation failed: {}\n{}", name, validator.info_log());
+        },
+    }
+}
\ No newline at end of file