Bug 1444946 - Update webrender to commit 486ee5f3aefb0172c2c5703e19f833e63eb295b9. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 16 Mar 2018 09:02:26 -0400
changeset 768524 187d1f3fd2f77cbc18dfeb873dfeee039bb0ee6f
parent 768523 47e1787284fbfad3d32eb7081ffdda58d2b086de
child 768525 47887422f37194bf8f001b40dc907ed25174cc28
push id102894
push userkgupta@mozilla.com
push dateFri, 16 Mar 2018 13:08:01 +0000
reviewersjrmuizel
bugs1444946
milestone61.0a1
Bug 1444946 - Update webrender to commit 486ee5f3aefb0172c2c5703e19f833e63eb295b9. r?jrmuizel MozReview-Commit-ID: EwPVlYte5Wo
gfx/webrender/examples/alpha_perf.rs
gfx/webrender/examples/animation.rs
gfx/webrender/res/brush_blend.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_mix_blend.glsl
gfx/webrender/res/cs_blur.glsl
gfx/webrender/res/cs_clip_box_shadow.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/debug_render.rs
gfx/webrender/src/debug_server.rs
gfx/webrender/src/device.rs
gfx/webrender/src/display_list_flattener.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/hit_test.rs
gfx/webrender/src/image.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/shade.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/color.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_bindings/revision.txt
gfx/wrench/src/cgfont_to_data.rs
gfx/wrench/src/rawtest.rs
gfx/wrench/src/yaml_frame_reader.rs
gfx/wrench/src/yaml_frame_writer.rs
--- a/gfx/webrender/examples/alpha_perf.rs
+++ b/gfx/webrender/examples/alpha_perf.rs
@@ -24,20 +24,17 @@ impl Example for App {
         _api: &RenderApi,
         builder: &mut DisplayListBuilder,
         _resources: &mut ResourceUpdates,
         _framebuffer_size: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
         let bounds = (0, 0).to(1920, 1080);
-        let info = LayoutPrimitiveInfo {
-            local_clip: LocalClip::Rect(bounds),
-            .. LayoutPrimitiveInfo::new(bounds)
-        };
+        let info = LayoutPrimitiveInfo::new(bounds);
 
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
             None,
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
--- a/gfx/webrender/examples/animation.rs
+++ b/gfx/webrender/examples/animation.rs
@@ -36,43 +36,45 @@ impl Example for App {
         builder: &mut DisplayListBuilder,
         _resources: &mut ResourceUpdates,
         _framebuffer_size: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
         // Create a 200x200 stacking context with an animated transform property.
         let bounds = (0, 0).to(200, 200);
-        let complex_clip = ComplexClipRegion {
-            rect: bounds,
-            radii: BorderRadius::uniform(50.0),
-            mode: ClipMode::Clip,
-        };
-        let info = LayoutPrimitiveInfo {
-            local_clip: LocalClip::RoundedRect(bounds, complex_clip),
-            .. LayoutPrimitiveInfo::new(bounds)
-        };
 
         let filters = vec![
             FilterOp::Opacity(PropertyBinding::Binding(self.opacity_key), self.opacity),
         ];
 
+        let info = LayoutPrimitiveInfo::new(bounds);
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
             Some(PropertyBinding::Binding(self.property_key)),
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
             filters,
         );
 
+        let complex_clip = ComplexClipRegion {
+            rect: bounds,
+            radii: BorderRadius::uniform(50.0),
+            mode: ClipMode::Clip,
+        };
+        let clip_id = builder.define_clip(bounds, vec![complex_clip], None);
+        builder.push_clip_id(clip_id);
+
         // Fill it with a white rect
         builder.push_rect(&info, ColorF::new(0.0, 1.0, 0.0, 1.0));
 
+        builder.pop_clip_id();
+
         builder.pop_stacking_context();
     }
 
     fn on_event(&mut self, win_event: glutin::WindowEvent, api: &RenderApi, document_id: DocumentId) -> bool {
         match win_event {
             glutin::WindowEvent::KeyboardInput {
                 input: glutin::KeyboardInput {
                     state: glutin::ElementState::Pressed,
--- a/gfx/webrender/res/brush_blend.glsl
+++ b/gfx/webrender/res/brush_blend.glsl
@@ -1,23 +1,23 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define VECS_PER_SPECIFIC_BRUSH 1
+#define VECS_PER_SPECIFIC_BRUSH 2
 #define FORCE_NO_PERSPECTIVE
 
 #include shared,prim_shared,brush
 
 varying vec3 vUv;
 
 flat varying float vAmount;
 flat varying int vOp;
-flat varying mat4 vColorMat;
-flat varying vec4 vColorOffset;
+flat varying mat3 vColorMat;
+flat varying vec3 vColorOffset;
 flat varying vec4 vUvClipBounds;
 
 #ifdef WR_VERTEX_SHADER
 
 void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
@@ -30,130 +30,131 @@ void brush_vs(
               src_task.common_data.task_rect.p0 -
               src_task.content_origin;
     vUv = vec3(uv / texture_size, src_task.common_data.texture_layer_index);
 
     vec2 uv0 = src_task.common_data.task_rect.p0;
     vec2 uv1 = uv0 + src_task.common_data.task_rect.size;
     vUvClipBounds = vec4(uv0, uv1) / texture_size.xyxy;
 
-    vOp = user_data.y;
-
     float lumR = 0.2126;
     float lumG = 0.7152;
     float lumB = 0.0722;
     float oneMinusLumR = 1.0 - lumR;
     float oneMinusLumG = 1.0 - lumG;
     float oneMinusLumB = 1.0 - lumB;
 
-    vec4 amount = fetch_from_resource_cache_1(prim_address);
-    vAmount = amount.x;
+    float amount = float(user_data.z) / 65536.0;
+    float invAmount = 1.0 - amount;
+
+    vOp = user_data.y;
+    vAmount = amount;
 
     switch (vOp) {
         case 2: {
             // Grayscale
-            vColorMat = mat4(vec4(lumR + oneMinusLumR * amount.y, lumR - lumR * amount.y, lumR - lumR * amount.y, 0.0),
-                             vec4(lumG - lumG * amount.y, lumG + oneMinusLumG * amount.y, lumG - lumG * amount.y, 0.0),
-                             vec4(lumB - lumB * amount.y, lumB - lumB * amount.y, lumB + oneMinusLumB * amount.y, 0.0),
-                             vec4(0.0, 0.0, 0.0, 1.0));
-            vColorOffset = vec4(0.0);
+            vColorMat = mat3(
+                vec3(lumR + oneMinusLumR * invAmount, lumR - lumR * invAmount, lumR - lumR * invAmount),
+                vec3(lumG - lumG * invAmount, lumG + oneMinusLumG * invAmount, lumG - lumG * invAmount),
+                vec3(lumB - lumB * invAmount, lumB - lumB * invAmount, lumB + oneMinusLumB * invAmount)
+            );
+            vColorOffset = vec3(0.0);
             break;
         }
         case 3: {
             // HueRotate
-            float c = cos(amount.x);
-            float s = sin(amount.x);
-            vColorMat = mat4(vec4(lumR + oneMinusLumR * c - lumR * s, lumR - lumR * c + 0.143 * s, lumR - lumR * c - oneMinusLumR * s, 0.0),
-                            vec4(lumG - lumG * c - lumG * s, lumG + oneMinusLumG * c + 0.140 * s, lumG - lumG * c + lumG * s, 0.0),
-                            vec4(lumB - lumB * c + oneMinusLumB * s, lumB - lumB * c - 0.283 * s, lumB + oneMinusLumB * c + lumB * s, 0.0),
-                            vec4(0.0, 0.0, 0.0, 1.0));
-            vColorOffset = vec4(0.0);
+            float c = cos(amount);
+            float s = sin(amount);
+            vColorMat = mat3(
+                vec3(lumR + oneMinusLumR * c - lumR * s, lumR - lumR * c + 0.143 * s, lumR - lumR * c - oneMinusLumR * s),
+                vec3(lumG - lumG * c - lumG * s, lumG + oneMinusLumG * c + 0.140 * s, lumG - lumG * c + lumG * s),
+                vec3(lumB - lumB * c + oneMinusLumB * s, lumB - lumB * c - 0.283 * s, lumB + oneMinusLumB * c + lumB * s)
+            );
+            vColorOffset = vec3(0.0);
             break;
         }
         case 5: {
             // Saturate
-            vColorMat = mat4(vec4(amount.y * lumR + amount.x, amount.y * lumR, amount.y * lumR, 0.0),
-                             vec4(amount.y * lumG, amount.y * lumG + amount.x, amount.y * lumG, 0.0),
-                             vec4(amount.y * lumB, amount.y * lumB, amount.y * lumB + amount.x, 0.0),
-                             vec4(0.0, 0.0, 0.0, 1.0));
-            vColorOffset = vec4(0.0);
+            vColorMat = mat3(
+                vec3(invAmount * lumR + amount, invAmount * lumR, invAmount * lumR),
+                vec3(invAmount * lumG, invAmount * lumG + amount, invAmount * lumG),
+                vec3(invAmount * lumB, invAmount * lumB, invAmount * lumB + amount)
+            );
+            vColorOffset = vec3(0.0);
             break;
         }
         case 6: {
             // Sepia
-            vColorMat = mat4(vec4(0.393 + 0.607 * amount.y, 0.349 - 0.349 * amount.y, 0.272 - 0.272 * amount.y, 0.0),
-                             vec4(0.769 - 0.769 * amount.y, 0.686 + 0.314 * amount.y, 0.534 - 0.534 * amount.y, 0.0),
-                             vec4(0.189 - 0.189 * amount.y, 0.168 - 0.168 * amount.y, 0.131 + 0.869 * amount.y, 0.0),
-                             vec4(0.0, 0.0, 0.0, 1.0));
-            vColorOffset = vec4(0.0);
+            vColorMat = mat3(
+                vec3(0.393 + 0.607 * invAmount, 0.349 - 0.349 * invAmount, 0.272 - 0.272 * invAmount),
+                vec3(0.769 - 0.769 * invAmount, 0.686 + 0.314 * invAmount, 0.534 - 0.534 * invAmount),
+                vec3(0.189 - 0.189 * invAmount, 0.168 - 0.168 * invAmount, 0.131 + 0.869 * invAmount)
+            );
+            vColorOffset = vec3(0.0);
             break;
         }
         case 10: {
             // Color Matrix
-            vec4 mat_data[4] = fetch_from_resource_cache_4(user_data.z);
+            vec4 mat_data[3] = fetch_from_resource_cache_3(user_data.z);
             vec4 offset_data = fetch_from_resource_cache_1(user_data.z + 4);
-            vColorMat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]);
-            vColorOffset = offset_data;
+            vColorMat = mat3(mat_data[0].xyz, mat_data[1].xyz, mat_data[2].xyz);
+            vColorOffset = offset_data.rgb;
             break;
         }
         default: break;
     }
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
-vec4 Contrast(vec4 Cs, float amount) {
-    return vec4(Cs.rgb * amount - 0.5 * amount + 0.5, Cs.a);
+vec3 Contrast(vec3 Cs, float amount) {
+    return Cs.rgb * amount - 0.5 * amount + 0.5;
 }
 
-vec4 Invert(vec4 Cs, float amount) {
-    return vec4(mix(Cs.rgb, vec3(1.0) - Cs.rgb, amount), Cs.a);
+vec3 Invert(vec3 Cs, float amount) {
+    return mix(Cs.rgb, vec3(1.0) - Cs.rgb, amount);
 }
 
-vec4 Brightness(vec4 Cs, float amount) {
+vec3 Brightness(vec3 Cs, float amount) {
     // Apply the brightness factor.
     // Resulting color needs to be clamped to output range
     // since we are pre-multiplying alpha in the shader.
-    return vec4(clamp(Cs.rgb * amount, vec3(0.0), vec3(1.0)), Cs.a);
-}
-
-vec4 Opacity(vec4 Cs, float amount) {
-    return vec4(Cs.rgb, Cs.a * amount);
+    return clamp(Cs.rgb * amount, vec3(0.0), vec3(1.0));
 }
 
 vec4 brush_fs() {
     vec4 Cs = texture(sColor0, vUv);
 
+    if (Cs.a == 0.0) {
+        return vec4(0.0); // could also `discard`
+    }
+
     // Un-premultiply the input.
-    Cs.rgb /= Cs.a;
-
-    vec4 color;
+    float alpha = Cs.a;
+    vec3 color = Cs.rgb / Cs.a;
 
     switch (vOp) {
         case 0:
-            color = Cs;
             break;
         case 1:
-            color = Contrast(Cs, vAmount);
+            color = Contrast(color, vAmount);
             break;
         case 4:
-            color = Invert(Cs, vAmount);
+            color = Invert(color, vAmount);
             break;
         case 7:
-            color = Brightness(Cs, vAmount);
+            color = Brightness(color, vAmount);
             break;
-        case 8:
-            color = Opacity(Cs, vAmount);
+        case 8: // Opacity
+            alpha *= vAmount;
             break;
         default:
-            color = vColorMat * Cs + vColorOffset;
+            color = vColorMat * color + vColorOffset;
     }
 
     // Fail-safe to ensure that we don't sample outside the rendered
     // portion of a blend source.
-    color.a *= point_inside_rect(vUv.xy, vUvClipBounds.xy, vUvClipBounds.zw);
+    alpha *= point_inside_rect(vUv.xy, vUvClipBounds.xy, vUvClipBounds.zw);
 
     // Pre-multiply the alpha into the output value.
-    color.rgb *= color.a;
-
-    return color;
+    return alpha * vec4(color, 1.0);
 }
 #endif
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -1,36 +1,49 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define VECS_PER_SPECIFIC_BRUSH 1
+#define VECS_PER_SPECIFIC_BRUSH 2
 
 #include shared,prim_shared,brush
 
 #ifdef WR_FEATURE_ALPHA_PASS
 varying vec2 vLocalPos;
 #endif
 
 varying vec3 vUv;
 flat varying vec4 vUvBounds;
-flat varying vec4 vColor;
 
 #ifdef WR_FEATURE_ALPHA_PASS
 flat varying vec2 vSelect;
+flat varying vec4 vUvClipBounds;
+flat varying vec4 vColor;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 
 #ifdef WR_FEATURE_ALPHA_PASS
     #define IMAGE_SOURCE_COLOR              0
     #define IMAGE_SOURCE_ALPHA              1
     #define IMAGE_SOURCE_MASK_FROM_COLOR    2
 #endif
 
+struct ImageBrush {
+    RectWithSize rendered_task_rect;
+    vec4 color;
+};
+
+ImageBrush fetch_image_primitive(int address) {
+    vec4[2] data = fetch_from_resource_cache_2(address);
+    RectWithSize rendered_task_rect = RectWithSize(data[0].xy, data[0].zw);
+    ImageBrush brush = ImageBrush(rendered_task_rect, data[1]);
+    return brush;
+}
+
 void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     ivec3 user_data,
     PictureTask pic_task
 ) {
     // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
@@ -41,29 +54,62 @@ void brush_vs(
     vec2 texture_size = vec2(textureSize(sColor0, 0));
 #endif
 
     ImageResource res = fetch_image_resource(user_data.x);
     vec2 uv0 = res.uv_rect.p0;
     vec2 uv1 = res.uv_rect.p1;
 
     vUv.z = res.layer;
-    vColor = res.color;
-
-    vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
-    vUv.xy = mix(uv0, uv1, f);
-    vUv.xy /= texture_size;
 
     // Handle case where the UV coords are inverted (e.g. from an
     // external image).
+    vec2 min_uv = min(uv0, uv1);
+    vec2 max_uv = max(uv0, uv1);
+
     vUvBounds = vec4(
-        min(uv0, uv1) + vec2(0.5),
-        max(uv0, uv1) - vec2(0.5)
+        min_uv + vec2(0.5),
+        max_uv - vec2(0.5)
     ) / texture_size.xyxy;
 
+    vec2 f;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    ImageBrush image = fetch_image_primitive(prim_address);
+    vColor = image.color;
+
+    // Derive the texture coordinates for this image, based on
+    // whether the source image is a local-space or screen-space
+    // image.
+    switch (user_data.z) {
+        case RASTER_SCREEN:
+            f = (vi.snapped_device_pos - image.rendered_task_rect.p0) / image.rendered_task_rect.size;
+
+            vUvClipBounds = vec4(
+                min_uv,
+                max_uv
+            ) / texture_size.xyxy;
+            break;
+        case RASTER_LOCAL:
+        default: {
+            f = (vi.local_pos - local_rect.p0) / local_rect.size;
+
+            // Set the clip bounds to a value that won't have any
+            // effect for local space images.
+            vUvClipBounds = vec4(0.0, 0.0, 1.0, 1.0);
+            break;
+        }
+    }
+#else
+    f = (vi.local_pos - local_rect.p0) / local_rect.size;
+#endif
+
+    vUv.xy = mix(uv0, uv1, f);
+    vUv.xy /= texture_size;
+
 #ifdef WR_FEATURE_ALPHA_PASS
     switch (user_data.y) {
         case IMAGE_SOURCE_COLOR:
             vSelect = vec2(0.0, 0.0);
             break;
         case IMAGE_SOURCE_ALPHA:
             vSelect = vec2(0.0, 1.0);
             break;
@@ -81,15 +127,19 @@ void brush_vs(
 vec4 brush_fs() {
     vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
 
     vec4 texel = TEX_SAMPLE(sColor0, vec3(uv, vUv.z));
 
 #ifdef WR_FEATURE_ALPHA_PASS
     vec4 mask = mix(texel.rrrr, texel.aaaa, vSelect.x);
     vec4 color = mix(texel, vColor * mask, vSelect.y) * init_transform_fs(vLocalPos);
+
+    // Fail-safe to ensure that we don't sample outside the rendered
+    // portion of a picture source.
+    color.a *= point_inside_rect(vUv.xy, vUvClipBounds.xy, vUvClipBounds.zw);
 #else
     vec4 color = texel;
 #endif
 
     return color;
 }
 #endif
--- a/gfx/webrender/res/brush_mix_blend.glsl
+++ b/gfx/webrender/res/brush_mix_blend.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define VECS_PER_SPECIFIC_BRUSH 1
+#define VECS_PER_SPECIFIC_BRUSH 2
 
 #include shared,prim_shared,brush
 
 varying vec3 vSrcUv;
 varying vec3 vBackdropUv;
 flat varying int vOp;
 
 #ifdef WR_VERTEX_SHADER
@@ -32,22 +32,16 @@ void brush_vs(
     vec2 backdrop_uv = vi.snapped_device_pos +
                        backdrop_task.task_rect.p0 -
                        src_task.content_origin;
     vBackdropUv = vec3(backdrop_uv / texture_size, backdrop_task.texture_layer_index);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
-float gauss(float x, float sigma) {
-    if (sigma == 0.0)
-        return 1.0;
-    return (1.0 / sqrt(6.283185307179586 * sigma * sigma)) * exp(-(x * x) / (2.0 * sigma * sigma));
-}
-
 vec3 Multiply(vec3 Cb, vec3 Cs) {
     return Cb * Cs;
 }
 
 vec3 Screen(vec3 Cb, vec3 Cs) {
     return Cb + Cs - (Cb * Cs);
 }
 
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -19,26 +19,24 @@ flat varying int vBlurRadius;
 
 in int aBlurRenderTaskAddress;
 in int aBlurSourceTaskAddress;
 in int aBlurDirection;
 
 struct BlurTask {
     RenderTaskCommonData common_data;
     float blur_radius;
-    vec4 color;
 };
 
 BlurTask fetch_blur_task(int address) {
     RenderTaskData task_data = fetch_render_task_data(address);
 
     BlurTask task = BlurTask(
         task_data.common_data,
-        task_data.data1.x,
-        task_data.data2
+        task_data.data1.x
     );
 
     return task;
 }
 
 void main(void) {
     BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
     RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress);
--- a/gfx/webrender/res/cs_clip_box_shadow.glsl
+++ b/gfx/webrender/res/cs_clip_box_shadow.glsl
@@ -6,29 +6,34 @@
 
 varying vec3 vPos;
 varying vec2 vUv;
 flat varying vec4 vUvBounds;
 flat varying float vLayer;
 flat varying vec4 vEdge;
 flat varying vec4 vUvBounds_NoClamp;
 flat varying float vClipMode;
+flat varying int vStretchMode;
+
+#define MODE_STRETCH        0
+#define MODE_SIMPLE         1
 
 #ifdef WR_VERTEX_SHADER
 
 struct BoxShadowData {
     vec2 src_rect_size;
     float clip_mode;
+    int stretch_mode;
     RectWithSize dest_rect;
 };
 
 BoxShadowData fetch_data(ivec2 address) {
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     RectWithSize dest_rect = RectWithSize(data[1].xy, data[1].zw);
-    BoxShadowData bs_data = BoxShadowData(data[0].xy, data[0].z, dest_rect);
+    BoxShadowData bs_data = BoxShadowData(data[0].xy, data[0].z, int(data[0].w), dest_rect);
     return bs_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
     ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
     BoxShadowData bs_data = fetch_data(cmi.clip_data_address);
@@ -36,40 +41,63 @@ void main(void) {
 
     ClipVertexInfo vi = write_clip_tile_vertex(bs_data.dest_rect,
                                                scroll_node,
                                                area);
 
     vLayer = res.layer;
     vPos = vi.local_pos;
     vClipMode = bs_data.clip_mode;
+    vStretchMode = bs_data.stretch_mode;
 
     vec2 uv0 = res.uv_rect.p0;
     vec2 uv1 = res.uv_rect.p1;
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 local_pos = vPos.xy / vPos.z;
 
-    vEdge.xy = vec2(0.5);
-    vEdge.zw = (bs_data.dest_rect.size / bs_data.src_rect_size) - vec2(0.5);
-    vUv = (local_pos - bs_data.dest_rect.p0) / bs_data.src_rect_size;
+    switch (bs_data.stretch_mode) {
+        case MODE_STRETCH: {
+            vEdge.xy = vec2(0.5);
+            vEdge.zw = (bs_data.dest_rect.size / bs_data.src_rect_size) - vec2(0.5);
+            vUv = (local_pos - bs_data.dest_rect.p0) / bs_data.src_rect_size;
+            break;
+        }
+        case MODE_SIMPLE:
+        default: {
+            vec2 f = (local_pos - bs_data.dest_rect.p0) / bs_data.dest_rect.size;
+            vUv = mix(uv0, uv1, f) / texture_size;
+            break;
+        }
+    }
 
     vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
     vUvBounds_NoClamp = vec4(uv0, uv1) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     vec2 local_pos = vPos.xy / vPos.z;
+    vec2 uv;
 
-    vec2 uv = clamp(vUv.xy, vec2(0.0), vEdge.xy);
-    uv += max(vec2(0.0), vUv.xy - vEdge.zw);
+    switch (vStretchMode) {
+        case MODE_STRETCH: {
+            uv = clamp(vUv.xy, vec2(0.0), vEdge.xy);
+            uv += max(vec2(0.0), vUv.xy - vEdge.zw);
+            uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+            break;
+        }
+        case MODE_SIMPLE:
+        default: {
+            uv = vUv.xy;
+            break;
+        }
+    }
 
-    uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
     uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
 
     float in_shadow_rect = point_inside_rect(
         local_pos,
         vLocalBounds.xy,
         vLocalBounds.zw
     );
 
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -11,16 +11,19 @@
 #define LINE_STYLE_DOTTED       1
 #define LINE_STYLE_DASHED       2
 #define LINE_STYLE_WAVY         3
 
 #define SUBPX_DIR_NONE        0
 #define SUBPX_DIR_HORIZONTAL  1
 #define SUBPX_DIR_VERTICAL    2
 
+#define RASTER_LOCAL            0
+#define RASTER_SCREEN           1
+
 #define EPSILON     0.0001
 
 uniform sampler2DArray sCacheA8;
 uniform sampler2DArray sCacheRGBA8;
 
 // An A8 target for standalone tasks that is available to all passes.
 uniform sampler2DArray sSharedCacheA8;
 
@@ -680,30 +683,29 @@ GlyphResource fetch_glyph_resource(int a
     vec4 data[2] = fetch_from_resource_cache_2(address);
     return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
 }
 
 struct ImageResource {
     RectWithEndpoint uv_rect;
     float layer;
     vec3 user_data;
-    vec4 color;
 };
 
 ImageResource fetch_image_resource(int address) {
     //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
-    vec4 data[3] = fetch_from_resource_cache_3(address);
+    vec4 data[2] = fetch_from_resource_cache_2(address);
     RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw, data[2]);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
 }
 
 ImageResource fetch_image_resource_direct(ivec2 address) {
-    vec4 data[3] = fetch_from_resource_cache_3_direct(address);
+    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw, data[2]);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
 }
 
 struct TextRun {
     vec4 color;
     vec4 bg_color;
     vec2 offset;
 };
 
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -8,23 +8,23 @@ use api::{DeviceIntPoint, LayerPoint, Su
 use api::{LayerToWorldTransform, WorldPixel};
 use border::{BorderCornerInstance, BorderCornerSide, BorderEdgeKind};
 use clip::{ClipSource, ClipStore, ClipWorkItem};
 use clip_scroll_tree::{CoordinateSystemId};
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, ClipChainRectIndex};
-use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex};
+use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, RasterizationSpace};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 use picture::{ContentOrigin, PictureCompositeMode, PictureKind, PicturePrimitive};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{CachedGradient, ImageSource, PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
-use prim_store::{BrushPrimitive, BrushKind, DeferredResolve, EdgeAaSegmentMask, PrimitiveRun};
+use prim_store::{BrushPrimitive, BrushKind, DeferredResolve, EdgeAaSegmentMask, PictureIndex, PrimitiveRun};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind};
 use renderer::BLOCKS_PER_UV_RECT;
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache};
 use std::{usize, f32, i32};
 use tiling::{RenderTargetContext};
 use util::{MatrixHelpers, TransformedRectKind};
 
@@ -368,17 +368,22 @@ impl AlphaBatchContainer {
             text_run_cache_prims: FastHashMap::default(),
             opaque_batches: Vec::new(),
             alpha_batches: Vec::new(),
             target_rect,
         }
     }
 
     fn merge(&mut self, builder: AlphaBatchBuilder) {
-        self.text_run_cache_prims.extend(builder.text_run_cache_prims);
+        for (key, value) in builder.text_run_cache_prims {
+            self.text_run_cache_prims
+                .entry(key)
+                .or_insert(vec![])
+                .extend(value);
+        }
 
         for other_batch in builder.batch_list.opaque_batch_list.batches {
             let batch_index = self.opaque_batches.iter().position(|batch| {
                 batch.key.is_compatible_with(&other_batch.key)
             });
 
             match batch_index {
                 Some(batch_index) => {
@@ -513,17 +518,18 @@ impl AlphaBatchBuilder {
             ];
             let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
             let key = BatchKey::new(
                 BatchKind::SplitComposite,
                 BlendMode::PremultipliedAlpha,
                 BatchTextures::no_texture(),
             );
             let pic_metadata = &ctx.prim_store.cpu_metadata[prim_index.0];
-            let pic = &ctx.prim_store.cpu_pictures[pic_metadata.cpu_prim_index.0];
+            let brush = &ctx.prim_store.cpu_brushes[pic_metadata.cpu_prim_index.0];
+            let pic = &ctx.prim_store.pictures[brush.get_picture_index().0];
             let batch = self.batch_list.get_suitable_batch(key, &pic_metadata.screen_rect.as_ref().expect("bug").clipped);
 
             let render_task_id = pic.surface.expect("BUG: unexpected surface in splitting");
             let source_task_address = render_tasks.get_task_address(render_task_id);
             let gpu_address = gpu_handle.as_int(gpu_cache);
 
             let instance = CompositePrimitiveInstance::new(
                 task_address,
@@ -666,40 +672,375 @@ impl AlphaBatchBuilder {
             specified_blend_mode
         } else {
             BlendMode::None
         };
 
         match prim_metadata.prim_kind {
             PrimitiveKind::Brush => {
                 let brush = &ctx.prim_store.cpu_brushes[prim_metadata.cpu_prim_index.0];
-                if let Some((batch_kind, textures, user_data)) = brush.get_batch_params(
-                    ctx.resource_cache,
-                    gpu_cache,
-                    deferred_resolves,
-                    &ctx.cached_gradients,
-                ) {
-                    self.add_brush_to_batch(
-                        brush,
-                        prim_metadata,
-                        batch_kind,
-                        specified_blend_mode,
-                        non_segmented_blend_mode,
-                        textures,
-                        clip_chain_rect_index,
-                        clip_task_address,
-                        &task_relative_bounding_rect,
-                        prim_cache_address,
-                        scroll_id,
-                        task_address,
-                        transform_kind,
-                        z,
-                        render_tasks,
-                        user_data,
-                    );
+
+                match brush.kind {
+                    BrushKind::Picture { pic_index } => {
+                        let picture =
+                            &ctx.prim_store.pictures[pic_index.0];
+
+                        match picture.surface {
+                            Some(cache_task_id) => {
+                                let cache_task_address = render_tasks.get_task_address(cache_task_id);
+                                let textures = BatchTextures::render_target_cache();
+
+                                match picture.kind {
+                                    PictureKind::TextShadow { .. } => {
+                                        let kind = BatchKind::Brush(
+                                            BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
+                                        );
+                                        let key = BatchKey::new(kind, non_segmented_blend_mode, textures);
+                                        let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+
+                                        let uv_rect_address = render_tasks[cache_task_id]
+                                            .get_texture_handle()
+                                            .as_int(gpu_cache);
+
+                                        let instance = BrushInstance {
+                                            picture_address: task_address,
+                                            prim_address: prim_cache_address,
+                                            clip_chain_rect_index,
+                                            scroll_id,
+                                            clip_task_address,
+                                            z,
+                                            segment_index: 0,
+                                            edge_flags: EdgeAaSegmentMask::empty(),
+                                            brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
+                                            user_data: [
+                                                uv_rect_address,
+                                                BrushImageSourceKind::Color as i32,
+                                                RasterizationSpace::Local as i32,
+                                            ],
+                                        };
+                                        batch.push(PrimitiveInstance::from(instance));
+                                    }
+                                    PictureKind::Image {
+                                        composite_mode,
+                                        secondary_render_task_id,
+                                        is_in_3d_context,
+                                        reference_frame_index,
+                                        real_local_rect,
+                                        ref extra_gpu_data_handle,
+                                        ..
+                                    } => {
+                                        // If this picture is participating in a 3D rendering context,
+                                        // then don't add it to any batches here. Instead, create a polygon
+                                        // for it and add it to the current plane splitter.
+                                        if is_in_3d_context {
+                                            // Push into parent plane splitter.
+
+                                            let real_xf = &ctx.clip_scroll_tree
+                                                .nodes[reference_frame_index.0]
+                                                .world_content_transform
+                                                .into();
+                                            let polygon = make_polygon(
+                                                real_local_rect,
+                                                &real_xf,
+                                                prim_index.0,
+                                            );
+
+                                            splitter.add(polygon);
+
+                                            return;
+                                        }
+
+                                        // Depending on the composite mode of the picture, we generate the
+                                        // old style Composite primitive instances. In the future, we'll
+                                        // remove these and pass them through the brush batching pipeline.
+                                        // This will allow us to unify some of the shaders, apply clip masks
+                                        // when compositing pictures, and also correctly apply pixel snapping
+                                        // to picture compositing operations.
+                                        let source_id = cache_task_id;
+
+                                        match composite_mode.expect("bug: only composites here") {
+                                            PictureCompositeMode::Filter(filter) => {
+                                                match filter {
+                                                    FilterOp::Blur(..) => {
+                                                        let kind = BatchKind::Brush(
+                                                            BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
+                                                        );
+                                                        let key = BatchKey::new(kind, non_segmented_blend_mode, textures);
+                                                        let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+
+                                                        let uv_rect_address = render_tasks[cache_task_id]
+                                                            .get_texture_handle()
+                                                            .as_int(gpu_cache);
+
+                                                        let instance = BrushInstance {
+                                                            picture_address: task_address,
+                                                            prim_address: prim_cache_address,
+                                                            clip_chain_rect_index,
+                                                            scroll_id,
+                                                            clip_task_address,
+                                                            z,
+                                                            segment_index: 0,
+                                                            edge_flags: EdgeAaSegmentMask::empty(),
+                                                            brush_flags: BrushFlags::empty(),
+                                                            user_data: [
+                                                                uv_rect_address,
+                                                                BrushImageSourceKind::Color as i32,
+                                                                RasterizationSpace::Screen as i32,
+                                                            ],
+                                                        };
+                                                        batch.push(PrimitiveInstance::from(instance));
+                                                    }
+                                                    FilterOp::DropShadow(offset, _, _) => {
+                                                        let kind = BatchKind::Brush(
+                                                            BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
+                                                        );
+                                                        let key = BatchKey::new(kind, non_segmented_blend_mode, textures);
+
+                                                        let uv_rect_address = render_tasks[cache_task_id]
+                                                            .get_texture_handle()
+                                                            .as_int(gpu_cache);
+
+                                                        let instance = BrushInstance {
+                                                            picture_address: task_address,
+                                                            prim_address: prim_cache_address,
+                                                            clip_chain_rect_index,
+                                                            scroll_id,
+                                                            clip_task_address,
+                                                            z,
+                                                            segment_index: 0,
+                                                            edge_flags: EdgeAaSegmentMask::empty(),
+                                                            brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
+                                                            user_data: [
+                                                                uv_rect_address,
+                                                                BrushImageSourceKind::ColorAlphaMask as i32,
+                                                                // TODO(gw): This is totally wrong, but the drop-shadow code itself
+                                                                //           is completely wrong, and doesn't work correctly with
+                                                                //           transformed Picture sources. I'm leaving this as is for
+                                                                //           now, and will fix drop-shadows properly, as a follow up.
+                                                                RasterizationSpace::Local as i32,
+                                                            ],
+                                                        };
+
+                                                        {
+                                                            let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+                                                            batch.push(PrimitiveInstance::from(instance));
+                                                        }
+
+                                                        let secondary_id = secondary_render_task_id.expect("no secondary!?");
+                                                        let saved_index = render_tasks[secondary_id].saved_index.expect("no saved index!?");
+                                                        debug_assert_ne!(saved_index, SavedTargetIndex::PENDING);
+                                                        let secondary_task_address = render_tasks.get_task_address(secondary_id);
+                                                        let secondary_textures = BatchTextures {
+                                                            colors: [
+                                                                SourceTexture::RenderTaskCache(saved_index),
+                                                                SourceTexture::Invalid,
+                                                                SourceTexture::Invalid,
+                                                            ],
+                                                        };
+                                                        let key = BatchKey::new(
+                                                            BatchKind::HardwareComposite,
+                                                            BlendMode::PremultipliedAlpha,
+                                                            secondary_textures,
+                                                        );
+                                                        let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+                                                        let content_rect = prim_metadata.local_rect.translate(&-offset);
+                                                        let rect =
+                                                            (content_rect * LayerToWorldScale::new(1.0) * ctx.device_pixel_scale).round()
+                                                                                                                                 .to_i32();
+
+                                                        let instance = CompositePrimitiveInstance::new(
+                                                            task_address,
+                                                            secondary_task_address,
+                                                            RenderTaskAddress(0),
+                                                            rect.origin.x,
+                                                            rect.origin.y,
+                                                            z,
+                                                            rect.size.width,
+                                                            rect.size.height,
+                                                        );
+
+                                                        batch.push(PrimitiveInstance::from(instance));
+                                                    }
+                                                    _ => {
+                                                        let key = BatchKey::new(
+                                                            BatchKind::Brush(BrushBatchKind::Blend),
+                                                            BlendMode::PremultipliedAlpha,
+                                                            BatchTextures::render_target_cache(),
+                                                        );
+
+                                                        let filter_mode = match filter {
+                                                            FilterOp::Blur(..) => 0,
+                                                            FilterOp::Contrast(..) => 1,
+                                                            FilterOp::Grayscale(..) => 2,
+                                                            FilterOp::HueRotate(..) => 3,
+                                                            FilterOp::Invert(..) => 4,
+                                                            FilterOp::Saturate(..) => 5,
+                                                            FilterOp::Sepia(..) => 6,
+                                                            FilterOp::Brightness(..) => 7,
+                                                            FilterOp::Opacity(..) => 8,
+                                                            FilterOp::DropShadow(..) => 9,
+                                                            FilterOp::ColorMatrix(..) => 10,
+                                                        };
+
+                                                        let user_data = match filter {
+                                                            FilterOp::Contrast(amount) |
+                                                            FilterOp::Grayscale(amount) |
+                                                            FilterOp::Invert(amount) |
+                                                            FilterOp::Saturate(amount) |
+                                                            FilterOp::Sepia(amount) |
+                                                            FilterOp::Brightness(amount) |
+                                                            FilterOp::Opacity(_, amount) => {
+                                                                (amount * 65536.0) as i32
+                                                            }
+                                                            FilterOp::HueRotate(angle) => {
+                                                                (0.01745329251 * angle * 65536.0) as i32
+                                                            }
+                                                            // Go through different paths
+                                                            FilterOp::Blur(..) |
+                                                            FilterOp::DropShadow(..) => {
+                                                                unreachable!();
+                                                            }
+                                                            FilterOp::ColorMatrix(_) => {
+                                                                extra_gpu_data_handle.as_int(gpu_cache)
+                                                            }
+                                                        };
+
+                                                        let instance = BrushInstance {
+                                                            picture_address: task_address,
+                                                            prim_address: prim_cache_address,
+                                                            clip_chain_rect_index,
+                                                            scroll_id,
+                                                            clip_task_address,
+                                                            z,
+                                                            segment_index: 0,
+                                                            edge_flags: EdgeAaSegmentMask::empty(),
+                                                            brush_flags: BrushFlags::empty(),
+                                                            user_data: [
+                                                                cache_task_address.0 as i32,
+                                                                filter_mode,
+                                                                user_data,
+                                                            ],
+                                                        };
+
+                                                        let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+                                                        batch.push(PrimitiveInstance::from(instance));
+                                                    }
+                                                }
+                                            }
+                                            PictureCompositeMode::MixBlend(mode) => {
+                                                let backdrop_id = secondary_render_task_id.expect("no backdrop!?");
+
+                                                let key = BatchKey::new(
+                                                    BatchKind::Brush(
+                                                        BrushBatchKind::MixBlend {
+                                                            task_id,
+                                                            source_id,
+                                                            backdrop_id,
+                                                        },
+                                                    ),
+                                                    BlendMode::PremultipliedAlpha,
+                                                    BatchTextures::no_texture(),
+                                                );
+                                                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+                                                let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
+                                                let source_task_address = render_tasks.get_task_address(source_id);
+
+                                                let instance = BrushInstance {
+                                                    picture_address: task_address,
+                                                    prim_address: prim_cache_address,
+                                                    clip_chain_rect_index,
+                                                    scroll_id,
+                                                    clip_task_address,
+                                                    z,
+                                                    segment_index: 0,
+                                                    edge_flags: EdgeAaSegmentMask::empty(),
+                                                    brush_flags: BrushFlags::empty(),
+                                                    user_data: [
+                                                        mode as u32 as i32,
+                                                        backdrop_task_address.0 as i32,
+                                                        source_task_address.0 as i32,
+                                                    ],
+                                                };
+
+                                                batch.push(PrimitiveInstance::from(instance));
+                                            }
+                                            PictureCompositeMode::Blit => {
+                                                let kind = BatchKind::Brush(
+                                                    BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
+                                                );
+                                                let key = BatchKey::new(kind, non_segmented_blend_mode, textures);
+                                                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+
+                                                let uv_rect_address = render_tasks[cache_task_id]
+                                                    .get_texture_handle()
+                                                    .as_int(gpu_cache);
+
+                                                let instance = BrushInstance {
+                                                    picture_address: task_address,
+                                                    prim_address: prim_cache_address,
+                                                    clip_chain_rect_index,
+                                                    scroll_id,
+                                                    clip_task_address,
+                                                    z,
+                                                    segment_index: 0,
+                                                    edge_flags: EdgeAaSegmentMask::empty(),
+                                                    brush_flags: BrushFlags::empty(),
+                                                    user_data: [
+                                                        uv_rect_address,
+                                                        BrushImageSourceKind::Color as i32,
+                                                        RasterizationSpace::Screen as i32,
+                                                    ],
+                                                };
+                                                batch.push(PrimitiveInstance::from(instance));
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                            None => {
+                                // If this picture is being drawn into an existing target (i.e. with
+                                // no composition operation), recurse and add to the current batch list.
+                                self.add_pic_to_batch(
+                                    picture,
+                                    task_id,
+                                    ctx,
+                                    gpu_cache,
+                                    render_tasks,
+                                    deferred_resolves,
+                                );
+                            }
+                        }
+                    }
+                    _ => {
+                        if let Some((batch_kind, textures, user_data)) = brush.get_batch_params(
+                                ctx.resource_cache,
+                                gpu_cache,
+                                deferred_resolves,
+                                &ctx.cached_gradients,
+                        ) {
+                            self.add_brush_to_batch(
+                                brush,
+                                prim_metadata,
+                                batch_kind,
+                                specified_blend_mode,
+                                non_segmented_blend_mode,
+                                textures,
+                                clip_chain_rect_index,
+                                clip_task_address,
+                                &task_relative_bounding_rect,
+                                prim_cache_address,
+                                scroll_id,
+                                task_address,
+                                transform_kind,
+                                z,
+                                render_tasks,
+                                user_data,
+                            );
+                        }
+                    }
                 }
             }
             PrimitiveKind::Border => {
                 let border_cpu =
                     &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
                 // TODO(gw): Select correct blend mode for edges and corners!!
                 let corner_kind = BatchKind::Transformable(
                     transform_kind,
@@ -880,307 +1221,16 @@ impl AlphaBatchBuilder {
                                 glyph.index_in_text_run,
                                 glyph.uv_rect_address.as_int(),
                                 subpx_dir as u32 as i32,
                             ));
                         }
                     },
                 );
             }
-            PrimitiveKind::Picture => {
-                let picture =
-                    &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
-
-                match picture.surface {
-                    Some(cache_task_id) => {
-                        let cache_task_address = render_tasks.get_task_address(cache_task_id);
-                        let textures = BatchTextures::render_target_cache();
-
-                        match picture.kind {
-                            PictureKind::TextShadow { .. } => {
-                                let kind = BatchKind::Brush(
-                                    BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
-                                );
-                                let key = BatchKey::new(kind, non_segmented_blend_mode, textures);
-                                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-
-                                let uv_rect_address = render_tasks[cache_task_id]
-                                    .get_texture_handle()
-                                    .as_int(gpu_cache);
-
-                                let instance = BrushInstance {
-                                    picture_address: task_address,
-                                    prim_address: prim_cache_address,
-                                    clip_chain_rect_index,
-                                    scroll_id,
-                                    clip_task_address,
-                                    z,
-                                    segment_index: 0,
-                                    edge_flags: EdgeAaSegmentMask::empty(),
-                                    brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
-                                    user_data: [
-                                        uv_rect_address,
-                                        BrushImageSourceKind::Color as i32,
-                                        0,
-                                    ],
-                                };
-                                batch.push(PrimitiveInstance::from(instance));
-                            }
-                            PictureKind::Image {
-                                composite_mode,
-                                secondary_render_task_id,
-                                is_in_3d_context,
-                                reference_frame_index,
-                                real_local_rect,
-                                ref extra_gpu_data_handle,
-                                ..
-                            } => {
-                                // If this picture is participating in a 3D rendering context,
-                                // then don't add it to any batches here. Instead, create a polygon
-                                // for it and add it to the current plane splitter.
-                                if is_in_3d_context {
-                                    // Push into parent plane splitter.
-
-                                    let real_xf = &ctx.clip_scroll_tree
-                                        .nodes[reference_frame_index.0]
-                                        .world_content_transform
-                                        .into();
-                                    let polygon = make_polygon(
-                                        real_local_rect,
-                                        &real_xf,
-                                        prim_index.0,
-                                    );
-
-                                    splitter.add(polygon);
-
-                                    return;
-                                }
-
-                                // Depending on the composite mode of the picture, we generate the
-                                // old style Composite primitive instances. In the future, we'll
-                                // remove these and pass them through the brush batching pipeline.
-                                // This will allow us to unify some of the shaders, apply clip masks
-                                // when compositing pictures, and also correctly apply pixel snapping
-                                // to picture compositing operations.
-                                let source_id = cache_task_id;
-
-                                match composite_mode.expect("bug: only composites here") {
-                                    PictureCompositeMode::Filter(filter) => {
-                                        match filter {
-                                            FilterOp::Blur(..) => {
-                                                let src_task_address = render_tasks.get_task_address(source_id);
-                                                let key = BatchKey::new(
-                                                    BatchKind::HardwareComposite,
-                                                    BlendMode::PremultipliedAlpha,
-                                                    BatchTextures::render_target_cache(),
-                                                );
-                                                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                                                let item_bounding_rect = prim_metadata.screen_rect.expect("bug!!").clipped;
-                                                let instance = CompositePrimitiveInstance::new(
-                                                    task_address,
-                                                    src_task_address,
-                                                    RenderTaskAddress(0),
-                                                    item_bounding_rect.origin.x,
-                                                    item_bounding_rect.origin.y,
-                                                    z,
-                                                    item_bounding_rect.size.width,
-                                                    item_bounding_rect.size.height,
-                                                );
-
-                                                batch.push(PrimitiveInstance::from(instance));
-                                            }
-                                            FilterOp::DropShadow(offset, _, _) => {
-                                                let kind = BatchKind::Brush(
-                                                    BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
-                                                );
-                                                let key = BatchKey::new(kind, non_segmented_blend_mode, textures);
-
-                                                let uv_rect_address = render_tasks[cache_task_id]
-                                                    .get_texture_handle()
-                                                    .as_int(gpu_cache);
-
-                                                let instance = BrushInstance {
-                                                    picture_address: task_address,
-                                                    prim_address: prim_cache_address,
-                                                    clip_chain_rect_index,
-                                                    scroll_id,
-                                                    clip_task_address,
-                                                    z,
-                                                    segment_index: 0,
-                                                    edge_flags: EdgeAaSegmentMask::empty(),
-                                                    brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
-                                                    user_data: [
-                                                        uv_rect_address,
-                                                        BrushImageSourceKind::ColorAlphaMask as i32,
-                                                        0,
-                                                    ],
-                                                };
-
-                                                {
-                                                    let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                                                    batch.push(PrimitiveInstance::from(instance));
-                                                }
-
-                                                let secondary_id = secondary_render_task_id.expect("no secondary!?");
-                                                let saved_index = render_tasks[secondary_id].saved_index.expect("no saved index!?");
-                                                debug_assert_ne!(saved_index, SavedTargetIndex::PENDING);
-                                                let secondary_task_address = render_tasks.get_task_address(secondary_id);
-                                                let secondary_textures = BatchTextures {
-                                                    colors: [
-                                                        SourceTexture::RenderTaskCache(saved_index),
-                                                        SourceTexture::Invalid,
-                                                        SourceTexture::Invalid,
-                                                    ],
-                                                };
-                                                let key = BatchKey::new(
-                                                    BatchKind::HardwareComposite,
-                                                    BlendMode::PremultipliedAlpha,
-                                                    secondary_textures,
-                                                );
-                                                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                                                let content_rect = prim_metadata.local_rect.translate(&-offset);
-                                                let rect =
-                                                    (content_rect * LayerToWorldScale::new(1.0) * ctx.device_pixel_scale).round()
-                                                                                                                         .to_i32();
-
-                                                let instance = CompositePrimitiveInstance::new(
-                                                    task_address,
-                                                    secondary_task_address,
-                                                    RenderTaskAddress(0),
-                                                    rect.origin.x,
-                                                    rect.origin.y,
-                                                    z,
-                                                    rect.size.width,
-                                                    rect.size.height,
-                                                );
-
-                                                batch.push(PrimitiveInstance::from(instance));
-                                            }
-                                            _ => {
-                                                let key = BatchKey::new(
-                                                    BatchKind::Brush(BrushBatchKind::Blend),
-                                                    BlendMode::PremultipliedAlpha,
-                                                    BatchTextures::render_target_cache(),
-                                                );
-
-                                                let (filter_mode, extra_cache_address) = match filter {
-                                                    FilterOp::Blur(..) => (0, 0),
-                                                    FilterOp::Contrast(..) => (1, 0),
-                                                    FilterOp::Grayscale(..) => (2, 0),
-                                                    FilterOp::HueRotate(..) => (3, 0),
-                                                    FilterOp::Invert(..) => (4, 0),
-                                                    FilterOp::Saturate(..) => (5, 0),
-                                                    FilterOp::Sepia(..) => (6, 0),
-                                                    FilterOp::Brightness(..) => (7, 0),
-                                                    FilterOp::Opacity(..) => (8, 0),
-                                                    FilterOp::DropShadow(..) => (9, 0),
-                                                    FilterOp::ColorMatrix(..) => {
-                                                        (10, extra_gpu_data_handle.as_int(gpu_cache))
-                                                    }
-                                                };
-
-                                                let instance = BrushInstance {
-                                                    picture_address: task_address,
-                                                    prim_address: prim_cache_address,
-                                                    clip_chain_rect_index,
-                                                    scroll_id,
-                                                    clip_task_address,
-                                                    z,
-                                                    segment_index: 0,
-                                                    edge_flags: EdgeAaSegmentMask::empty(),
-                                                    brush_flags: BrushFlags::empty(),
-                                                    user_data: [
-                                                        cache_task_address.0 as i32,
-                                                        filter_mode,
-                                                        extra_cache_address,
-                                                    ],
-                                                };
-
-                                                let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                                                batch.push(PrimitiveInstance::from(instance));
-                                            }
-                                        }
-                                    }
-                                    PictureCompositeMode::MixBlend(mode) => {
-                                        let backdrop_id = secondary_render_task_id.expect("no backdrop!?");
-
-                                        let key = BatchKey::new(
-                                            BatchKind::Brush(
-                                                BrushBatchKind::MixBlend {
-                                                    task_id,
-                                                    source_id,
-                                                    backdrop_id,
-                                                },
-                                            ),
-                                            BlendMode::PremultipliedAlpha,
-                                            BatchTextures::no_texture(),
-                                        );
-                                        let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                                        let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
-                                        let source_task_address = render_tasks.get_task_address(source_id);
-
-                                        let instance = BrushInstance {
-                                            picture_address: task_address,
-                                            prim_address: prim_cache_address,
-                                            clip_chain_rect_index,
-                                            scroll_id,
-                                            clip_task_address,
-                                            z,
-                                            segment_index: 0,
-                                            edge_flags: EdgeAaSegmentMask::empty(),
-                                            brush_flags: BrushFlags::empty(),
-                                            user_data: [
-                                                mode as u32 as i32,
-                                                backdrop_task_address.0 as i32,
-                                                source_task_address.0 as i32,
-                                            ],
-                                        };
-
-                                        batch.push(PrimitiveInstance::from(instance));
-                                    }
-                                    PictureCompositeMode::Blit => {
-                                        let src_task_address = render_tasks.get_task_address(source_id);
-                                        let key = BatchKey::new(
-                                            BatchKind::HardwareComposite,
-                                            BlendMode::PremultipliedAlpha,
-                                            BatchTextures::render_target_cache(),
-                                        );
-                                        let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                                        let item_bounding_rect = prim_metadata.screen_rect.expect("bug!!").clipped;
-                                        let instance = CompositePrimitiveInstance::new(
-                                            task_address,
-                                            src_task_address,
-                                            RenderTaskAddress(0),
-                                            item_bounding_rect.origin.x,
-                                            item_bounding_rect.origin.y,
-                                            z,
-                                            item_bounding_rect.size.width,
-                                            item_bounding_rect.size.height,
-                                        );
-
-                                        batch.push(PrimitiveInstance::from(instance));
-                                    }
-                                }
-                            }
-                        }
-                    }
-                    None => {
-                        // If this picture is being drawn into an existing target (i.e. with
-                        // no composition operation), recurse and add to the current batch list.
-                        self.add_pic_to_batch(
-                            picture,
-                            task_id,
-                            ctx,
-                            gpu_cache,
-                            render_tasks,
-                            deferred_resolves,
-                        );
-                    }
-                }
-            }
         }
     }
 
     fn add_brush_to_batch(
         &mut self,
         brush: &BrushPrimitive,
         prim_metadata: &PrimitiveMetadata,
         batch_kind: BrushBatchKind,
@@ -1270,16 +1320,27 @@ impl AlphaBatchBuilder {
                 let batch = self.batch_list.get_suitable_batch(batch_key, task_relative_bounding_rect);
                 batch.push(PrimitiveInstance::from(base_instance));
             }
         }
     }
 }
 
 impl BrushPrimitive {
+    pub fn get_picture_index(&self) -> PictureIndex {
+        match self.kind {
+            BrushKind::Picture { pic_index } => {
+                pic_index
+            }
+            _ => {
+                panic!("bug: not a picture brush!!");
+            }
+        }
+    }
+
     fn get_batch_params(
         &self,
         resource_cache: &ResourceCache,
         gpu_cache: &mut GpuCache,
         deferred_resolves: &mut Vec<DeferredResolve>,
         cached_gradients: &[CachedGradient],
     ) -> Option<(BrushBatchKind, BatchTextures, [i32; 3])> {
         match self.kind {
@@ -1304,22 +1365,22 @@ impl BrushPrimitive {
                     let textures = BatchTextures::color(cache_item.texture_id);
 
                     Some((
                         BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                         textures,
                         [
                             cache_item.uv_rect_handle.as_int(gpu_cache),
                             BrushImageSourceKind::Color as i32,
-                            0,
+                            RasterizationSpace::Local as i32,
                         ],
                     ))
                 }
             }
-            BrushKind::Picture => {
+            BrushKind::Picture { .. } => {
                 panic!("bug: get_batch_key is handled at higher level for pictures");
             }
             BrushKind::Solid { .. } => {
                 Some((
                     BrushBatchKind::Solid,
                     BatchTextures::no_texture(),
                     [0; 3],
                 ))
@@ -1419,25 +1480,21 @@ trait AlphaBatchHelpers {
         metadata: &PrimitiveMetadata,
     ) -> BlendMode;
 }
 
 impl AlphaBatchHelpers for PrimitiveStore {
     fn get_blend_mode(&self, metadata: &PrimitiveMetadata) -> BlendMode {
         match metadata.prim_kind {
             // Can only resolve the TextRun's blend mode once glyphs are fetched.
+            PrimitiveKind::Border |
             PrimitiveKind::TextRun => {
                 BlendMode::PremultipliedAlpha
             }
 
-            PrimitiveKind::Border |
-            PrimitiveKind::Picture => {
-                BlendMode::PremultipliedAlpha
-            }
-
             PrimitiveKind::Brush => {
                 let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
                 match brush.kind {
                     BrushKind::Clear => {
                         BlendMode::PremultipliedDestOut
                     }
                     BrushKind::Image { alpha_type, .. } => {
                         match alpha_type {
@@ -1445,17 +1502,17 @@ impl AlphaBatchHelpers for PrimitiveStor
                             AlphaType::Alpha => BlendMode::Alpha,
                         }
                     }
                     BrushKind::Solid { .. } |
                     BrushKind::Line { .. } |
                     BrushKind::YuvImage { .. } |
                     BrushKind::RadialGradient { .. } |
                     BrushKind::LinearGradient { .. } |
-                    BrushKind::Picture => {
+                    BrushKind::Picture { .. } => {
                         BlendMode::PremultipliedAlpha
                     }
                 }
             }
             PrimitiveKind::Image => {
                 let image_cpu = &self.cpu_images[metadata.cpu_prim_index.0];
                 match image_cpu.alpha_type {
                     AlphaType::PremultipliedAlpha => BlendMode::PremultipliedAlpha,
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -1,29 +1,30 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadius, BoxShadowClipMode, ClipMode, ColorF, ComplexClipRegion};
-use api::{LayerPrimitiveInfo, LayerRect, LayerSize, LayerVector2D, LayoutSize, LocalClip};
-use api::{DeviceIntSize};
+use api::{BorderRadius, BoxShadowClipMode, ClipMode, ColorF, DeviceIntSize, LayerPrimitiveInfo};
+use api::{LayerRect, LayerSize, LayerVector2D, LayoutSize};
 use clip::ClipSource;
 use display_list_flattener::DisplayListFlattener;
 use gpu_cache::GpuCacheHandle;
+use gpu_types::BoxShadowStretchMode;
 use prim_store::{BrushKind, BrushPrimitive, PrimitiveContainer};
 use prim_store::ScrollNodeAndClipChain;
 use resource_cache::CacheItem;
 use util::RectHelpers;
 
 #[derive(Debug)]
 pub struct BoxShadowClipSource {
     // Parameters that define the shadow and are constant.
     pub shadow_radius: BorderRadius,
     pub blur_radius: f32,
     pub clip_mode: BoxShadowClipMode,
+    pub stretch_mode: BoxShadowStretchMode,
 
     // The current cache key (in device-pixels), and handles
     // to the cached clip region and blurred texture.
     pub cache_key: Option<(DeviceIntSize, BoxShadowCacheKey)>,
     pub cache_item: CacheItem,
     pub clip_data_handle: GpuCacheHandle,
 
     // Local-space size of the required render task size.
@@ -108,69 +109,49 @@ impl<'a> DisplayListFlattener<'a> {
             // Trivial reject of box-shadows that are not visible.
             if box_offset.x == 0.0 &&
                box_offset.y == 0.0 &&
                spread_amount == 0.0 {
                 return;
             }
 
             let mut clips = Vec::with_capacity(2);
-            clips.push(ClipSource::Rectangle(*prim_info.local_clip.clip_rect()));
-
-            let fast_info = match clip_mode {
+            let (final_prim_rect, clip_radius) = match clip_mode {
                 BoxShadowClipMode::Outset => {
                     if !shadow_rect.is_well_formed_and_nonempty() {
                         return;
                     }
 
                     // TODO(gw): Add a fast path for ClipOut + zero border radius!
                     clips.push(ClipSource::new_rounded_rect(
                         prim_info.rect,
                         border_radius,
                         ClipMode::ClipOut
                     ));
 
-                    LayerPrimitiveInfo::with_clip(
-                        shadow_rect,
-                        LocalClip::RoundedRect(
-                            shadow_rect,
-                            ComplexClipRegion::new(
-                                shadow_rect,
-                                shadow_radius,
-                                ClipMode::Clip,
-                            ),
-                        ),
-                    )
+                    (shadow_rect, shadow_radius)
                 }
                 BoxShadowClipMode::Inset => {
                     if shadow_rect.is_well_formed_and_nonempty() {
                         clips.push(ClipSource::new_rounded_rect(
                             shadow_rect,
                             shadow_radius,
                             ClipMode::ClipOut
                         ));
                     }
 
-                    LayerPrimitiveInfo::with_clip(
-                        prim_info.rect,
-                        LocalClip::RoundedRect(
-                            prim_info.rect,
-                            ComplexClipRegion::new(
-                                prim_info.rect,
-                                border_radius,
-                                ClipMode::Clip
-                            ),
-                        ),
-                    )
+                    (prim_info.rect, border_radius)
                 }
             };
 
+            clips.push(ClipSource::new_rounded_rect(final_prim_rect, clip_radius, ClipMode::Clip));
+
             self.add_primitive(
                 clip_and_scroll,
-                &fast_info,
+                &LayerPrimitiveInfo::with_clip_rect(final_prim_rect, prim_info.clip_rect),
                 clips,
                 PrimitiveContainer::Brush(
                     BrushPrimitive::new(BrushKind::Solid {
                             color: *color,
                         },
                         None,
                     )
                 ),
@@ -217,20 +198,17 @@ impl<'a> DisplayListFlattener<'a> {
                         return;
                     }
 
                     // Add the box-shadow clip source.
                     extra_clips.push(shadow_clip_source);
 
                     // Outset shadows are expanded by the shadow
                     // region from the original primitive.
-                    LayerPrimitiveInfo::with_clip_rect(
-                        dest_rect,
-                        *prim_info.local_clip.clip_rect()
-                    )
+                    LayerPrimitiveInfo::with_clip_rect(dest_rect, prim_info.clip_rect)
                 }
                 BoxShadowClipMode::Inset => {
                     // If the inner shadow rect contains the prim
                     // rect, no pixels will be shadowed.
                     if border_radius.is_zero() &&
                        shadow_rect.inflate(-blur_radius, -blur_radius).contains_rect(&prim_info.rect) {
                         return;
                     }
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -6,17 +6,17 @@ use api::{BorderRadius, ClipMode, Comple
 use api::{ImageRendering, LayerRect, LayerSize, LayoutPoint, LayoutVector2D, LocalClip};
 use api::{BoxShadowClipMode, LayerPoint, LayerToWorldScale};
 use border::{BorderCornerClipSource, ensure_no_corner_overlap};
 use box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowClipSource, BoxShadowCacheKey};
 use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId};
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
-use gpu_types::ClipScrollNodeIndex;
+use gpu_types::{BoxShadowStretchMode, ClipScrollNodeIndex};
 use prim_store::{ClipData, ImageMaskData};
 use render_task::to_cache_size;
 use resource_cache::{CacheItem, ImageRequest, ResourceCache};
 use util::{LayerToWorldFastTransform, MaxRect, calculate_screen_bounding_rect};
 use util::extract_inner_rect_safe;
 use std::sync::Arc;
 
 pub type ClipStore = FreeList<ClipSources>;
@@ -176,31 +176,36 @@ impl ClipSource {
                 min_shadow_rect_size.height + fract_size.height,
             ),
         );
 
         // If the width or height ends up being bigger than the original
         // primitive shadow rect, just blur the entire rect and draw that
         // as a simple blit. This is necessary for correctness, since the
         // blur of one corner may affect the blur in another corner.
-        minimal_shadow_rect.size.width = minimal_shadow_rect.size.width.min(shadow_rect.size.width);
-        minimal_shadow_rect.size.height = minimal_shadow_rect.size.height.min(shadow_rect.size.height);
+        let mut stretch_mode = BoxShadowStretchMode::Stretch;
+        if shadow_rect.size.width < minimal_shadow_rect.size.width ||
+           shadow_rect.size.height < minimal_shadow_rect.size.height {
+            minimal_shadow_rect.size = shadow_rect.size;
+            stretch_mode = BoxShadowStretchMode::Simple;
+        }
 
         // Expand the shadow rect by enough room for the blur to take effect.
         let shadow_rect_alloc_size = LayerSize::new(
             2.0 * blur_region + minimal_shadow_rect.size.width.ceil(),
             2.0 * blur_region + minimal_shadow_rect.size.height.ceil(),
         );
 
         ClipSource::BoxShadow(BoxShadowClipSource {
             shadow_rect_alloc_size,
             shadow_radius,
             prim_shadow_rect,
             blur_radius,
             clip_mode,
+            stretch_mode,
             cache_item: CacheItem::invalid(),
             cache_key: None,
             clip_data_handle: GpuCacheHandle::new(),
             minimal_shadow_rect,
         })
     }
 }
 
@@ -307,17 +312,17 @@ impl ClipSources {
                         let data = ImageMaskData { local_rect: mask.rect };
                         data.write_gpu_blocks(request);
                     }
                     ClipSource::BoxShadow(ref info) => {
                         request.push([
                             info.shadow_rect_alloc_size.width,
                             info.shadow_rect_alloc_size.height,
                             info.clip_mode as i32 as f32,
-                            0.0,
+                            info.stretch_mode as i32 as f32,
                         ]);
                         request.push(info.prim_shadow_rect);
                     }
                     ClipSource::Rectangle(rect) => {
                         let data = ClipData::uniform(rect, 0.0, ClipMode::Clip);
                         data.write(&mut request);
                     }
                     ClipSource::RoundedRectangle(ref rect, ref radius, mode) => {
@@ -416,42 +421,21 @@ impl From<LayerRect> for Geometry {
     fn from(local_rect: LayerRect) -> Self {
         Geometry {
             local_rect,
             device_rect: DeviceIntRect::zero(),
         }
     }
 }
 
-pub trait Contains {
-    fn contains(&self, point: &LayoutPoint) -> bool;
-}
-
-impl Contains for LocalClip {
-    fn contains(&self, point: &LayoutPoint) -> bool {
-        if !self.clip_rect().contains(point) {
-            return false;
-        }
-        match self {
-            &LocalClip::Rect(..) => true,
-            &LocalClip::RoundedRect(_, complex_clip) => complex_clip.contains(point),
-        }
-    }
-}
-
-impl Contains for ComplexClipRegion {
-    fn contains(&self, point: &LayoutPoint) -> bool {
-        rounded_rectangle_contains_point(point, &self.rect, &self.radii)
-    }
-}
-
-pub fn rounded_rectangle_contains_point(point: &LayoutPoint,
-                                        rect: &LayerRect,
-                                        radii: &BorderRadius)
-                                        -> bool {
+pub fn rounded_rectangle_contains_point(
+    point: &LayoutPoint,
+    rect: &LayerRect,
+    radii: &BorderRadius
+) -> bool {
     if !rect.contains(point) {
         return false;
     }
 
     let top_left_center = rect.origin + radii.top_left.to_vector();
     if top_left_center.x > point.x && top_left_center.y > point.y &&
        !Ellipse::new(radii.top_left).contains(*point - top_left_center.to_vector()) {
         return false;
--- a/gfx/webrender/src/debug_render.rs
+++ b/gfx/webrender/src/debug_render.rs
@@ -277,44 +277,44 @@ impl DebugRenderer {
                 0.0,
                 ORTHO_NEAR_PLANE,
                 ORTHO_FAR_PLANE,
             );
 
             // Triangles
             if !self.tri_vertices.is_empty() {
                 device.bind_program(&self.color_program);
-                device.set_uniforms(&self.color_program, &projection, 0);
+                device.set_uniforms(&self.color_program, &projection);
                 device.bind_vao(&self.tri_vao);
                 device.update_vao_indices(&self.tri_vao, &self.tri_indices, VertexUsageHint::Dynamic);
                 device.update_vao_main_vertices(
                     &self.tri_vao,
                     &self.tri_vertices,
                     VertexUsageHint::Dynamic,
                 );
                 device.draw_triangles_u32(0, self.tri_indices.len() as i32);
             }
 
             // Lines
             if !self.line_vertices.is_empty() {
                 device.bind_program(&self.color_program);
-                device.set_uniforms(&self.color_program, &projection, 0);
+                device.set_uniforms(&self.color_program, &projection);
                 device.bind_vao(&self.line_vao);
                 device.update_vao_main_vertices(
                     &self.line_vao,
                     &self.line_vertices,
                     VertexUsageHint::Dynamic,
                 );
                 device.draw_nonindexed_lines(0, self.line_vertices.len() as i32);
             }
 
             // Glyph
             if !self.font_indices.is_empty() {
                 device.bind_program(&self.font_program);
-                device.set_uniforms(&self.font_program, &projection, 0);
+                device.set_uniforms(&self.font_program, &projection);
                 device.bind_texture(DebugSampler::Font, &self.font_texture);
                 device.bind_vao(&self.font_vao);
                 device.update_vao_indices(&self.font_vao, &self.font_indices, VertexUsageHint::Dynamic);
                 device.update_vao_main_vertices(
                     &self.font_vao,
                     &self.font_vertices,
                     VertexUsageHint::Dynamic,
                 );
--- a/gfx/webrender/src/debug_server.rs
+++ b/gfx/webrender/src/debug_server.rs
@@ -5,17 +5,17 @@
 use api::{ApiMsg, DebugCommand};
 use api::channel::MsgSender;
 use print_tree::PrintTreePrinter;
 use std::sync::mpsc::{channel, Receiver};
 use std::sync::mpsc::Sender;
 use std::thread;
 use ws;
 use base64::encode;
-use image;
+use image_loader;
 
 // Messages that are sent from the render backend to the renderer
 // debug command queue. These are sent in a separate queue so
 // that none of these types are exposed to the RenderApi interfaces.
 // We can't use select!() as it's not stable...
 enum DebugMsg {
     AddSender(ws::Sender),
     RemoveSender(ws::util::Token),
@@ -274,18 +274,18 @@ pub struct Screenshot {
     kind: &'static str,
     data: String
 }
 
 impl Screenshot {
     pub fn new(width: u32, height: u32, data: Vec<u8>) -> Self {
         let mut output = Vec::with_capacity((width * height) as usize);
         {
-            let encoder = image::png::PNGEncoder::new(&mut output);
-            encoder.encode(&data, width, height, image::ColorType::RGBA(8)).unwrap();
+            let encoder = image_loader::png::PNGEncoder::new(&mut output);
+            encoder.encode(&data, width, height, image_loader::ColorType::RGBA(8)).unwrap();
         }
 
         let data = encode(&output);
         Screenshot {
             kind: "screenshot",
             data
         }
     }
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -628,19 +628,17 @@ impl VertexUsageHint {
         }
     }
 }
 
 #[derive(Copy, Clone, Debug)]
 pub struct UniformLocation(gl::GLint);
 
 impl UniformLocation {
-    pub fn invalid() -> UniformLocation {
-        UniformLocation(-1)
-    }
+    pub const INVALID: Self = UniformLocation(-1);
 }
 
 pub struct Capabilities {
     pub supports_multisampling: bool,
 }
 
 #[derive(Clone, Debug)]
 pub enum ShaderError {
@@ -651,16 +649,17 @@ pub enum ShaderError {
 pub struct Device {
     gl: Rc<gl::Gl>,
     // device state
     bound_textures: [gl::GLuint; 16],
     bound_program: gl::GLuint,
     bound_vao: gl::GLuint,
     bound_read_fbo: FBOId,
     bound_draw_fbo: FBOId,
+    program_mode_id: UniformLocation,
     default_read_fbo: gl::GLuint,
     default_draw_fbo: gl::GLuint,
 
     device_pixel_ratio: f32,
     upload_method: UploadMethod,
 
     // HW or API capabilties
     capabilities: Capabilities,
@@ -713,16 +712,17 @@ impl Device {
                 supports_multisampling: false, //TODO
             },
 
             bound_textures: [0; 16],
             bound_program: 0,
             bound_vao: 0,
             bound_read_fbo: FBOId(0),
             bound_draw_fbo: FBOId(0),
+            program_mode_id: UniformLocation::INVALID,
             default_read_fbo: 0,
             default_draw_fbo: 0,
 
             max_texture_size,
             renderer_name,
             cached_programs,
             frame_id: FrameId(0),
             extensions,
@@ -796,16 +796,17 @@ impl Device {
         for i in 0 .. self.bound_textures.len() {
             self.bound_textures[i] = 0;
             self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
             self.gl.bind_texture(gl::TEXTURE_2D, 0);
         }
 
         // Shader state
         self.bound_program = 0;
+        self.program_mode_id = UniformLocation::INVALID;
         self.gl.use_program(0);
 
         // Vertex state
         self.bound_vao = 0;
         self.gl.bind_vertex_array(0);
 
         // FBO state
         self.bound_read_fbo = FBOId(self.default_read_fbo);
@@ -921,16 +922,17 @@ impl Device {
     }
 
     pub fn bind_program(&mut self, program: &Program) {
         debug_assert!(self.inside_frame);
 
         if self.bound_program != program.id {
             self.gl.use_program(program.id);
             self.bound_program = program.id;
+            self.program_mode_id = UniformLocation(program.u_mode);
         }
     }
 
     //TODO: remove once the Angle workaround is no longer needed
     pub fn reset_angle_sampler_metadata(&mut self, texture: &Texture) {
         self.bind_texture(DEFAULT_TEXTURE, texture);
         self.gl.tex_parameter_f(
             texture.target,
@@ -1464,35 +1466,31 @@ impl Device {
             }
         }
     }
 
     pub fn get_uniform_location(&self, program: &Program, name: &str) -> UniformLocation {
         UniformLocation(self.gl.get_uniform_location(program.id, name))
     }
 
-    pub fn set_uniform_2f(&self, uniform: UniformLocation, x: f32, y: f32) {
-        debug_assert!(self.inside_frame);
-        let UniformLocation(location) = uniform;
-        self.gl.uniform_2f(location, x, y);
-    }
-
     pub fn set_uniforms(
         &self,
         program: &Program,
         transform: &Transform3D<f32>,
-        mode: i32,
     ) {
         debug_assert!(self.inside_frame);
         self.gl
             .uniform_matrix_4fv(program.u_transform, false, &transform.to_row_major_array());
         self.gl
             .uniform_1f(program.u_device_pixel_ratio, self.device_pixel_ratio);
-        self.gl
-            .uniform_1i(program.u_mode, mode);
+    }
+
+    pub fn switch_mode(&self, mode: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.uniform_1i(self.program_mode_id.0, mode);
     }
 
     pub fn create_pbo(&mut self) -> PBO {
         let id = self.gl.gen_buffers(1)[0];
         PBO { id }
     }
 
     pub fn delete_pbo(&mut self, mut pbo: PBO) {
--- a/gfx/webrender/src/display_list_flattener.rs
+++ b/gfx/webrender/src/display_list_flattener.rs
@@ -1,38 +1,38 @@
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{AlphaType, BorderDetails, BorderDisplayItem, BuiltDisplayListIter};
-use api::{ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion, DeviceIntPoint, DeviceIntRect};
-use api::{DeviceIntSize, DevicePixelScale, DeviceUintRect, DeviceUintSize};
-use api::{DisplayItemRef, Epoch, ExtendMode, ExternalScrollId, FilterOp};
-use api::{FontInstanceKey, FontRenderMode, GlyphInstance, GlyphOptions, GradientStop};
-use api::{IframeDisplayItem, ImageDisplayItem, ImageKey, ImageRendering, ItemRange, LayerPoint};
-use api::{LayerPrimitiveInfo, LayerRect, LayerSize, LayerVector2D, LayoutSize, LayoutTransform};
-use api::{LayoutVector2D, LineOrientation, LineStyle, LocalClip, PipelineId};
-use api::{PropertyBinding, RepeatMode, ScrollFrameDisplayItem, ScrollPolicy, ScrollSensitivity};
-use api::{Shadow, SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, TexelRect};
-use api::{TileOffset, TransformStyle, YuvColorSpace, YuvData};
+use api::{AlphaType, BorderDetails, BorderDisplayItem, BuiltDisplayListIter, ClipAndScrollInfo};
+use api::{ClipId, ColorF, ComplexClipRegion, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use api::{DevicePixelScale, DeviceUintRect, DisplayItemRef, Epoch, ExtendMode, ExternalScrollId};
+use api::{FilterOp, FontInstanceKey, FontRenderMode, GlyphInstance, GlyphOptions, GradientStop};
+use api::{IframeDisplayItem, ImageKey, ImageRendering, ItemRange, LayerPoint, LayerPrimitiveInfo};
+use api::{LayerRect, LayerSize, LayerVector2D, LayoutRect, LayoutSize, LayoutTransform};
+use api::{LayoutVector2D, LineOrientation, LineStyle, LocalClip, PipelineId, PropertyBinding};
+use api::{RepeatMode, ScrollFrameDisplayItem, ScrollPolicy, ScrollSensitivity, Shadow};
+use api::{SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, TexelRect, TileOffset};
+use api::{TransformStyle, YuvColorSpace, YuvData};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore};
 use clip_scroll_node::{ClipScrollNode, NodeType, StickyFrameInfo};
 use clip_scroll_tree::{ClipChainIndex, ClipScrollNodeIndex, ClipScrollTree};
-use euclid::{SideOffsets2D, rect, vec2};
+use euclid::{SideOffsets2D, vec2};
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use glyph_rasterizer::FontInstance;
 use hit_test::{HitTestingItem, HitTestingRun};
+use image::{decompose_image, TiledImageInfo};
 use internal_types::{FastHashMap, FastHashSet};
-use picture::{PictureCompositeMode, PictureKind, PicturePrimitive};
+use picture::{PictureCompositeMode, PictureKind};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentDescriptor, CachedGradient};
 use prim_store::{CachedGradientIndex, ImageCacheKey, ImagePrimitiveCpu, ImageSource};
-use prim_store::{PrimitiveContainer, PrimitiveIndex, PrimitiveKind, PrimitiveStore};
+use prim_store::{PictureIndex, PrimitiveContainer, PrimitiveIndex, PrimitiveKind, PrimitiveStore};
 use prim_store::{ScrollNodeAndClipChain, TextRunPrimitiveCpu};
 use render_backend::{DocumentView};
 use resource_cache::{FontInstanceMap, ImageRequest, TiledImageMap};
 use scene::{Scene, ScenePipeline, StackingContextHelpers};
 use scene_builder::{BuiltScene, SceneRequest};
 use std::{f32, mem, usize};
 use tiling::{CompositeOps, ScrollbarPrimitive};
 use util::{MaxRect, RectHelpers, recycle_vec};
@@ -179,17 +179,17 @@ pub struct DisplayListFlattener<'a> {
     /// A stack of scroll nodes used during display list processing to properly
     /// parent new scroll nodes.
     reference_frame_stack: Vec<(ClipId, ClipScrollNodeIndex)>,
 
     /// A stack of stacking context properties.
     sc_stack: Vec<FlattenedStackingContext>,
 
     /// A stack of the current pictures.
-    picture_stack: Vec<PrimitiveIndex>,
+    picture_stack: Vec<PictureIndex>,
 
     /// A list of scrollbar primitives.
     pub scrollbar_prims: Vec<ScrollbarPrimitive>,
 
     /// The store of primitives.
     pub prim_store: PrimitiveStore,
 
     /// Information about all primitives involved in hit testing.
@@ -436,28 +436,26 @@ impl<'a> DisplayListFlattener<'a> {
         item: &DisplayItemRef,
         info: &ScrollFrameDisplayItem,
         pipeline_id: PipelineId,
         clip_and_scroll_ids: &ClipAndScrollInfo,
         reference_frame_relative_offset: &LayerVector2D,
     ) {
         let complex_clips = self.get_complex_clips(pipeline_id, item.complex_clip().0);
         let clip_region = ClipRegion::create_for_clip_node(
-            *item.local_clip().clip_rect(),
+            *item.clip_rect(),
             complex_clips,
             info.image_mask,
             &reference_frame_relative_offset,
         );
         // Just use clip rectangle as the frame rect for this scroll frame.
         // This is useful when calculating scroll extents for the
         // ClipScrollNode::scroll(..) API as well as for properly setting sticky
         // positioning offsets.
-        let frame_rect = item.local_clip()
-            .clip_rect()
-            .translate(&reference_frame_relative_offset);
+        let frame_rect = item.clip_rect().translate(&reference_frame_relative_offset);
         let content_rect = item.rect().translate(&reference_frame_relative_offset);
 
         debug_assert!(info.clip_id != info.scroll_frame_id);
 
         self.add_clip_node(info.clip_id, clip_and_scroll_ids.scroll_node_id, clip_region);
 
         self.add_scroll_frame(
             info.scroll_frame_id,
@@ -578,17 +576,17 @@ impl<'a> DisplayListFlattener<'a> {
         };
 
         self.id_to_index_mapper.initialize_for_pipeline(pipeline);
 
         self.add_clip_node(
             info.clip_id,
             clip_and_scroll_ids.scroll_node_id,
             ClipRegion::create_for_clip_node_with_local_clip(
-                &item.local_clip(),
+                &LocalClip::from(*item.clip_rect()),
                 &reference_frame_relative_offset
             ),
         );
 
         let epoch = self.scene.pipeline_epochs[&iframe_pipeline_id];
         self.pipeline_epochs.push((iframe_pipeline_id, epoch));
 
         let bounds = item.rect();
@@ -633,22 +631,39 @@ impl<'a> DisplayListFlattener<'a> {
 
         let prim_info = item.get_layer_primitive_info(&reference_frame_relative_offset);
         match *item.item() {
             SpecificDisplayItem::Image(ref info) => {
                 match self.tiled_image_map.get(&info.image_key).cloned() {
                     Some(tiling) => {
                         // The image resource is tiled. We have to generate an image primitive
                         // for each tile.
-                        self.decompose_image(
-                            clip_and_scroll,
-                            &prim_info,
-                            info,
-                            tiling.image_size,
-                            tiling.tile_size as u32,
+                        decompose_image(
+                            &TiledImageInfo {
+                                rect: prim_info.rect,
+                                tile_spacing: info.tile_spacing,
+                                stretch_size: info.stretch_size,
+                                device_image_size: tiling.image_size,
+                                device_tile_size: tiling.tile_size as u32,
+                            },
+                            &mut|tile| {
+                                let mut prim_info = prim_info.clone();
+                                prim_info.rect = tile.rect;
+                                self.add_image(
+                                    clip_and_scroll,
+                                    &prim_info,
+                                    tile.stretch_size,
+                                    info.tile_spacing,
+                                    None,
+                                    info.image_key,
+                                    info.image_rendering,
+                                    info.alpha_type,
+                                    Some(tile.tile_offset),
+                                );
+                            }
                         );
                     }
                     None => {
                         self.add_image(
                             clip_and_scroll,
                             &prim_info,
                             info.stretch_size,
                             info.tile_spacing,
@@ -781,17 +796,17 @@ impl<'a> DisplayListFlattener<'a> {
                     info,
                     &clip_and_scroll_ids,
                     &reference_frame_relative_offset
                 );
             }
             SpecificDisplayItem::Clip(ref info) => {
                 let complex_clips = self.get_complex_clips(pipeline_id, item.complex_clip().0);
                 let clip_region = ClipRegion::create_for_clip_node(
-                    *item.local_clip().clip_rect(),
+                    *item.clip_rect(),
                     complex_clips,
                     info.image_mask,
                     &reference_frame_relative_offset,
                 );
                 self.add_clip_node(info.id, clip_and_scroll_ids.scroll_node_id, clip_region);
             }
             SpecificDisplayItem::ClipChain(ref info) => {
                 let items = self.get_clip_chain_items(pipeline_id, item.clip_chain_items())
@@ -838,348 +853,36 @@ impl<'a> DisplayListFlattener<'a> {
             }
             SpecificDisplayItem::PopAllShadows => {
                 self.pop_all_shadows();
             }
         }
         None
     }
 
-    /// Decomposes an image display item that is repeated into an image per individual repetition.
-    /// We need to do this when we are unable to perform the repetition in the shader,
-    /// for example if the image is tiled.
-    ///
-    /// In all of the "decompose" methods below, we independently handle horizontal and vertical
-    /// decomposition. This lets us generate the minimum amount of primitives by, for  example,
-    /// decompositing the repetition horizontally while repeating vertically in the shader (for
-    /// an image where the width is too bug but the height is not).
-    ///
-    /// decompose_image and decompose_image_row handle image repetitions while decompose_tiled_image
-    /// takes care of the decomposition required by the internal tiling of the image.
-    fn decompose_image(
-        &mut self,
-        clip_and_scroll: ScrollNodeAndClipChain,
-        prim_info: &LayerPrimitiveInfo,
-        info: &ImageDisplayItem,
-        image_size: DeviceUintSize,
-        tile_size: u32,
-    ) {
-        let no_vertical_tiling = image_size.height <= tile_size;
-        let no_vertical_spacing = info.tile_spacing.height == 0.0;
-        let item_rect = prim_info.rect;
-        if no_vertical_tiling && no_vertical_spacing {
-            self.decompose_image_row(
-                clip_and_scroll,
-                prim_info,
-                info,
-                image_size,
-                tile_size,
-            );
-            return;
-        }
-
-        // Decompose each vertical repetition into rows.
-        let layout_stride = info.stretch_size.height + info.tile_spacing.height;
-        let num_repetitions = (item_rect.size.height / layout_stride).ceil() as u32;
-        for i in 0 .. num_repetitions {
-            if let Some(row_rect) = rect(
-                item_rect.origin.x,
-                item_rect.origin.y + (i as f32) * layout_stride,
-                item_rect.size.width,
-                info.stretch_size.height,
-            ).intersection(&item_rect)
-            {
-                let mut prim_info = prim_info.clone();
-                prim_info.rect = row_rect;
-                self.decompose_image_row(
-                    clip_and_scroll,
-                    &prim_info,
-                    info,
-                    image_size,
-                    tile_size,
-                );
-            }
-        }
-    }
-
-    fn decompose_image_row(
-        &mut self,
-        clip_and_scroll: ScrollNodeAndClipChain,
-        prim_info: &LayerPrimitiveInfo,
-        info: &ImageDisplayItem,
-        image_size: DeviceUintSize,
-        tile_size: u32,
-    ) {
-        let no_horizontal_tiling = image_size.width <= tile_size;
-        let no_horizontal_spacing = info.tile_spacing.width == 0.0;
-        if no_horizontal_tiling && no_horizontal_spacing {
-            self.decompose_tiled_image(
-                clip_and_scroll,
-                prim_info,
-                info,
-                image_size,
-                tile_size,
-            );
-            return;
-        }
-
-        // Decompose each horizontal repetition.
-        let item_rect = prim_info.rect;
-        let layout_stride = info.stretch_size.width + info.tile_spacing.width;
-        let num_repetitions = (item_rect.size.width / layout_stride).ceil() as u32;
-        for i in 0 .. num_repetitions {
-            if let Some(decomposed_rect) = rect(
-                item_rect.origin.x + (i as f32) * layout_stride,
-                item_rect.origin.y,
-                info.stretch_size.width,
-                item_rect.size.height,
-            ).intersection(&item_rect)
-            {
-                let mut prim_info = prim_info.clone();
-                prim_info.rect = decomposed_rect;
-                self.decompose_tiled_image(
-                    clip_and_scroll,
-                    &prim_info,
-                    info,
-                    image_size,
-                    tile_size,
-                );
-            }
-        }
-    }
-
-    fn decompose_tiled_image(
-        &mut self,
-        clip_and_scroll: ScrollNodeAndClipChain,
-        prim_info: &LayerPrimitiveInfo,
-        info: &ImageDisplayItem,
-        image_size: DeviceUintSize,
-        tile_size: u32,
-    ) {
-        // The image resource is tiled. We have to generate an image primitive
-        // for each tile.
-        // We need to do this because the image is broken up into smaller tiles in the texture
-        // cache and the image shader is not able to work with this type of sparse representation.
-
-        // The tiling logic works as follows:
-        //
-        //  ###################-+  -+
-        //  #    |    |    |//# |   | image size
-        //  #    |    |    |//# |   |
-        //  #----+----+----+--#-+   |  -+
-        //  #    |    |    |//# |   |   | regular tile size
-        //  #    |    |    |//# |   |   |
-        //  #----+----+----+--#-+   |  -+-+
-        //  #////|////|////|//# |   |     | "leftover" height
-        //  ################### |  -+  ---+
-        //  #----+----+----+----+
-        //
-        // In the ascii diagram above, a large image is plit into tiles of almost regular size.
-        // The tiles on the right and bottom edges (hatched in the diagram) are smaller than
-        // the regular tiles and are handled separately in the code see leftover_width/height.
-        // each generated image primitive corresponds to a tile in the texture cache, with the
-        // assumption that the smaller tiles with leftover sizes are sized to fit their own
-        // irregular size in the texture cache.
-        //
-        // For the case where we don't tile along an axis, we can still perform the repetition in
-        // the shader (for this particular axis), and it is worth special-casing for this to avoid
-        // generating many primitives.
-        // This can happen with very tall and thin images used as a repeating background.
-        // Apparently web authors do that...
-
-        let item_rect = prim_info.rect;
-        let needs_repeat_x = info.stretch_size.width < item_rect.size.width;
-        let needs_repeat_y = info.stretch_size.height < item_rect.size.height;
-
-        let tiled_in_x = image_size.width > tile_size;
-        let tiled_in_y = image_size.height > tile_size;
-
-        // If we don't actually tile in this dimension, repeating can be done in the shader.
-        let shader_repeat_x = needs_repeat_x && !tiled_in_x;
-        let shader_repeat_y = needs_repeat_y && !tiled_in_y;
-
-        let tile_size_f32 = tile_size as f32;
-
-        // Note: this rounds down so it excludes the partially filled tiles on the right and
-        // bottom edges (we handle them separately below).
-        let num_tiles_x = (image_size.width / tile_size) as u16;
-        let num_tiles_y = (image_size.height / tile_size) as u16;
-
-        // Ratio between (image space) tile size and image size.
-        let img_dw = tile_size_f32 / (image_size.width as f32);
-        let img_dh = tile_size_f32 / (image_size.height as f32);
-
-        // Strected size of the tile in layout space.
-        let stretched_tile_size = LayerSize::new(
-            img_dw * info.stretch_size.width,
-            img_dh * info.stretch_size.height,
-        );
-
-        // The size in pixels of the tiles on the right and bottom edges, smaller
-        // than the regular tile size if the image is not a multiple of the tile size.
-        // Zero means the image size is a multiple of the tile size.
-        let leftover =
-            DeviceUintSize::new(image_size.width % tile_size, image_size.height % tile_size);
-
-        for ty in 0 .. num_tiles_y {
-            for tx in 0 .. num_tiles_x {
-                self.add_tile_primitive(
-                    clip_and_scroll,
-                    prim_info,
-                    info,
-                    TileOffset::new(tx, ty),
-                    stretched_tile_size,
-                    1.0,
-                    1.0,
-                    shader_repeat_x,
-                    shader_repeat_y,
-                );
-            }
-            if leftover.width != 0 {
-                // Tiles on the right edge that are smaller than the tile size.
-                self.add_tile_primitive(
-                    clip_and_scroll,
-                    prim_info,
-                    info,
-                    TileOffset::new(num_tiles_x, ty),
-                    stretched_tile_size,
-                    (leftover.width as f32) / tile_size_f32,
-                    1.0,
-                    shader_repeat_x,
-                    shader_repeat_y,
-                );
-            }
-        }
-
-        if leftover.height != 0 {
-            for tx in 0 .. num_tiles_x {
-                // Tiles on the bottom edge that are smaller than the tile size.
-                self.add_tile_primitive(
-                    clip_and_scroll,
-                    prim_info,
-                    info,
-                    TileOffset::new(tx, num_tiles_y),
-                    stretched_tile_size,
-                    1.0,
-                    (leftover.height as f32) / tile_size_f32,
-                    shader_repeat_x,
-                    shader_repeat_y,
-                );
-            }
-
-            if leftover.width != 0 {
-                // Finally, the bottom-right tile with a "leftover" size.
-                self.add_tile_primitive(
-                    clip_and_scroll,
-                    prim_info,
-                    info,
-                    TileOffset::new(num_tiles_x, num_tiles_y),
-                    stretched_tile_size,
-                    (leftover.width as f32) / tile_size_f32,
-                    (leftover.height as f32) / tile_size_f32,
-                    shader_repeat_x,
-                    shader_repeat_y,
-                );
-            }
-        }
-    }
-
-    fn add_tile_primitive(
-        &mut self,
-        clip_and_scroll: ScrollNodeAndClipChain,
-        prim_info: &LayerPrimitiveInfo,
-        info: &ImageDisplayItem,
-        tile_offset: TileOffset,
-        stretched_tile_size: LayerSize,
-        tile_ratio_width: f32,
-        tile_ratio_height: f32,
-        shader_repeat_x: bool,
-        shader_repeat_y: bool,
-    ) {
-        // If the the image is tiled along a given axis, we can't have the shader compute
-        // the image repetition pattern. In this case we base the primitive's rectangle size
-        // on the stretched tile size which effectively cancels the repetion (and repetition
-        // has to be emulated by generating more primitives).
-        // If the image is not tiled along this axis, we can perform the repetition in the
-        // shader. in this case we use the item's size in the primitive (on that particular
-        // axis).
-        // See the shader_repeat_x/y code below.
-
-        let stretched_size = LayerSize::new(
-            stretched_tile_size.width * tile_ratio_width,
-            stretched_tile_size.height * tile_ratio_height,
-        );
-
-        let mut prim_rect = LayerRect::new(
-            prim_info.rect.origin +
-                LayerVector2D::new(
-                    tile_offset.x as f32 * stretched_tile_size.width,
-                    tile_offset.y as f32 * stretched_tile_size.height,
-                ),
-            stretched_size,
-        );
-
-        if shader_repeat_x {
-            assert_eq!(tile_offset.x, 0);
-            prim_rect.size.width = prim_info.rect.size.width;
-        }
-
-        if shader_repeat_y {
-            assert_eq!(tile_offset.y, 0);
-            prim_rect.size.height = prim_info.rect.size.height;
-        }
-
-        // Fix up the primitive's rect if it overflows the original item rect.
-        if let Some(prim_rect) = prim_rect.intersection(&prim_info.rect) {
-            let mut prim_info = prim_info.clone();
-            prim_info.rect = prim_rect;
-            self.add_image(
-                clip_and_scroll,
-                &prim_info,
-                stretched_size,
-                info.tile_spacing,
-                None,
-                info.image_key,
-                info.image_rendering,
-                info.alpha_type,
-                Some(tile_offset),
-            );
-        }
-    }
-
     /// Create a primitive and add it to the prim store. This method doesn't
     /// add the primitive to the draw list, so can be used for creating
     /// sub-primitives.
     pub fn create_primitive(
         &mut self,
         info: &LayerPrimitiveInfo,
-        mut clip_sources: Vec<ClipSource>,
+        clip_sources: Vec<ClipSource>,
         container: PrimitiveContainer,
     ) -> PrimitiveIndex {
-        if let &LocalClip::RoundedRect(main, region) = &info.local_clip {
-            clip_sources.push(ClipSource::Rectangle(main));
-
-            clip_sources.push(ClipSource::new_rounded_rect(
-                region.rect,
-                region.radii,
-                region.mode,
-            ));
-        }
-
         let stacking_context = self.sc_stack.last().expect("bug: no stacking context!");
 
         let clip_sources = if clip_sources.is_empty() {
             None
         } else {
             Some(self.clip_store.insert(ClipSources::new(clip_sources)))
         };
 
         let prim_index = self.prim_store.add_primitive(
             &info.rect,
-            &info.local_clip.clip_rect(),
+            &info.clip_rect,
             info.is_backface_visible && stacking_context.is_backface_visible,
             clip_sources,
             info.tag,
             container,
         );
 
         prim_index
     }
@@ -1209,25 +912,19 @@ impl<'a> DisplayListFlattener<'a> {
 
     /// Add an already created primitive to the draw lists.
     pub fn add_primitive_to_draw_list(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ScrollNodeAndClipChain,
     ) {
         // Add primitive to the top-most Picture on the stack.
-        // TODO(gw): Let's consider removing the extra indirection
-        //           needed to get a specific primitive index...
-        let pic_prim_index = self.picture_stack.last().unwrap();
-        let metadata = &self.prim_store.cpu_metadata[pic_prim_index.0];
-        let pic = &mut self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
-        pic.add_primitive(
-            prim_index,
-            clip_and_scroll
-        );
+        let pic_index = self.picture_stack.last().unwrap();
+        let pic = &mut self.prim_store.pictures[pic_index.0];
+        pic.add_primitive(prim_index, clip_and_scroll);
     }
 
     /// Convenience interface that creates a primitive entry and adds it
     /// to the draw list.
     pub fn add_primitive(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayerPrimitiveInfo,
@@ -1260,53 +957,39 @@ impl<'a> DisplayListFlattener<'a> {
         // primitives, we can apply any kind of clip mask
         // to them, as for a normal primitive. This is needed
         // to correctly handle some CSS cases (see #1957).
         let max_clip = LayerRect::max_rect();
 
         // If there is no root picture, create one for the main framebuffer.
         if self.sc_stack.is_empty() {
             // Should be no pictures at all if the stack is empty...
-            debug_assert!(self.prim_store.cpu_pictures.is_empty());
+            debug_assert!(self.prim_store.pictures.is_empty());
             debug_assert_eq!(transform_style, TransformStyle::Flat);
 
             // This picture stores primitive runs for items on the
             // main framebuffer.
-            let pic = PicturePrimitive::new_image(
+            let pic_index = self.prim_store.add_image_picture(
                 None,
                 false,
                 pipeline_id,
                 current_reference_frame_index,
                 None,
             );
 
-            // Add root picture primitive. The provided layer rect
-            // is zero, because we don't yet know the size of the
-            // picture. Instead, this is calculated recursively
-            // when we cull primitives.
-            let prim_index = self.prim_store.add_primitive(
-                &LayerRect::zero(),
-                &max_clip,
-                true,
-                None,
-                None,
-                PrimitiveContainer::Picture(pic),
-            );
-
-            self.picture_stack.push(prim_index);
+            self.picture_stack.push(pic_index);
         } else if composite_ops.mix_blend_mode.is_some() && self.sc_stack.len() > 2 {
             // If we have a mix-blend-mode, and we aren't the primary framebuffer,
             // the stacking context needs to be isolated to blend correctly as per
             // the CSS spec.
             // TODO(gw): The way we detect not being the primary framebuffer (len > 2)
             //           is hacky and depends on how we create a root stacking context
             //           during flattening.
-            let current_pic_prim_index = self.picture_stack.last().unwrap();
-            let pic_cpu_prim_index = self.prim_store.cpu_metadata[current_pic_prim_index.0].cpu_prim_index;
-            let parent_pic = &mut self.prim_store.cpu_pictures[pic_cpu_prim_index.0];
+            let parent_pic_index = self.picture_stack.last().unwrap();
+            let parent_pic = &mut self.prim_store.pictures[parent_pic_index.0];
 
             match parent_pic.kind {
                 PictureKind::Image { ref mut composite_mode, .. } => {
                     // If not already isolated for some other reason,
                     // make this picture as isolated.
                     if composite_mode.is_none() {
                         *composite_mode = Some(PictureCompositeMode::Blit);
                     }
@@ -1337,126 +1020,130 @@ impl<'a> DisplayListFlattener<'a> {
 
         // If this is participating in a 3d context *and* the
         // parent was not a 3d context, then this must be the
         // element that establishes a new 3d context.
         let establishes_3d_context =
             participating_in_3d_context &&
             parent_transform_style == TransformStyle::Flat;
 
-        let rendering_context_3d_prim_index = if establishes_3d_context {
+        let rendering_context_3d_pic_index = if establishes_3d_context {
             // If establishing a 3d context, we need to add a picture
             // that will be the container for all the planes and any
             // un-transformed content.
-            let container = PicturePrimitive::new_image(
+            let container_index = self.prim_store.add_image_picture(
                 None,
                 false,
                 pipeline_id,
                 current_reference_frame_index,
                 None,
             );
 
+            let prim = BrushPrimitive::new_picture(container_index);
+
             let prim_index = self.prim_store.add_primitive(
                 &LayerRect::zero(),
                 &max_clip,
                 is_backface_visible,
                 None,
                 None,
-                PrimitiveContainer::Picture(container),
+                PrimitiveContainer::Brush(prim),
             );
 
-            let parent_pic_prim_index = *self.picture_stack.last().unwrap();
-            let pic_prim_index = self.prim_store.cpu_metadata[parent_pic_prim_index.0].cpu_prim_index;
-            let pic = &mut self.prim_store.cpu_pictures[pic_prim_index.0];
+            let parent_pic_index = *self.picture_stack.last().unwrap();
+
+            let pic = &mut self.prim_store.pictures[parent_pic_index.0];
             pic.add_primitive(
                 prim_index,
                 clip_and_scroll,
             );
 
-            self.picture_stack.push(prim_index);
+            self.picture_stack.push(container_index);
 
-            Some(prim_index)
+            Some(container_index)
         } else {
             None
         };
 
-        let mut parent_pic_prim_index = if !establishes_3d_context && participating_in_3d_context {
+        let mut parent_pic_index = if !establishes_3d_context && participating_in_3d_context {
             // If we're in a 3D context, we will parent the picture
             // to the first stacking context we find that is a
             // 3D rendering context container. This follows the spec
             // by hoisting these items out into the same 3D context
             // for plane splitting.
             self.sc_stack
                 .iter()
                 .rev()
-                .find(|sc| sc.rendering_context_3d_prim_index.is_some())
-                .map(|sc| sc.rendering_context_3d_prim_index.unwrap())
+                .find(|sc| sc.rendering_context_3d_pic_index.is_some())
+                .map(|sc| sc.rendering_context_3d_pic_index.unwrap())
                 .unwrap()
         } else {
             *self.picture_stack.last().unwrap()
         };
 
         // For each filter, create a new image with that composite mode.
         for filter in composite_ops.filters.iter().rev() {
-            let src_prim = PicturePrimitive::new_image(
+            let src_pic_index = self.prim_store.add_image_picture(
                 Some(PictureCompositeMode::Filter(*filter)),
                 false,
                 pipeline_id,
                 current_reference_frame_index,
                 None,
             );
 
+            let src_prim = BrushPrimitive::new_picture(src_pic_index);
+
             let src_prim_index = self.prim_store.add_primitive(
                 &LayerRect::zero(),
                 &max_clip,
                 is_backface_visible,
                 None,
                 None,
-                PrimitiveContainer::Picture(src_prim),
+                PrimitiveContainer::Brush(src_prim),
             );
 
-            let pic_prim_index = self.prim_store.cpu_metadata[parent_pic_prim_index.0].cpu_prim_index;
-            parent_pic_prim_index = src_prim_index;
-            let pic = &mut self.prim_store.cpu_pictures[pic_prim_index.0];
-            pic.add_primitive(
+            let parent_pic = &mut self.prim_store.pictures[parent_pic_index.0];
+            parent_pic_index = src_pic_index;
+            parent_pic.add_primitive(
                 src_prim_index,
                 clip_and_scroll,
             );
 
-            self.picture_stack.push(src_prim_index);
+            self.picture_stack.push(src_pic_index);
         }
 
         // Same for mix-blend-mode.
         if let Some(mix_blend_mode) = composite_ops.mix_blend_mode {
-            let src_prim = PicturePrimitive::new_image(
+            let src_pic_index = self.prim_store.add_image_picture(
                 Some(PictureCompositeMode::MixBlend(mix_blend_mode)),
                 false,
                 pipeline_id,
                 current_reference_frame_index,
                 None,
             );
 
+            let src_prim = BrushPrimitive::new_picture(src_pic_index);
+
             let src_prim_index = self.prim_store.add_primitive(
                 &LayerRect::zero(),
                 &max_clip,
                 is_backface_visible,
                 None,
                 None,
-                PrimitiveContainer::Picture(src_prim),
+                PrimitiveContainer::Brush(src_prim),
             );
 
-            let pic_prim_index = self.prim_store.cpu_metadata[parent_pic_prim_index.0].cpu_prim_index;
-            parent_pic_prim_index = src_prim_index;
-            let pic = &mut self.prim_store.cpu_pictures[pic_prim_index.0];
-            pic.add_primitive(
+            let parent_pic = &mut self.prim_store.pictures[parent_pic_index.0];
+            parent_pic_index = src_pic_index;
+            parent_pic.add_primitive(
                 src_prim_index,
                 clip_and_scroll,
             );
 
-            self.picture_stack.push(src_prim_index);
+            self.picture_stack.push(src_pic_index);
         }
 
         // By default, this picture will be collapsed into
         // the owning target.
         let mut composite_mode = None;
         let mut frame_output_pipeline_id = None;
 
         // If this stacking context if the root of a pipeline, and the caller
@@ -1473,73 +1160,73 @@ impl<'a> DisplayListFlattener<'a> {
             //           there is a large optimization opportunity here.
             //           During culling, we can check if there is actually
             //           perspective present, and skip the plane splitting
             //           completely when that is not the case.
             composite_mode = Some(PictureCompositeMode::Blit);
         }
 
         // Add picture for this actual stacking context contents to render into.
-        let sc_prim = PicturePrimitive::new_image(
+        let pic_index = self.prim_store.add_image_picture(
             composite_mode,
             participating_in_3d_context,
             pipeline_id,
             current_reference_frame_index,
             frame_output_pipeline_id,
         );
 
+        // Create a brush primitive that draws this picture.
+        let sc_prim = BrushPrimitive::new_picture(pic_index);
+
+        // Add the brush to the parent picture.
         let sc_prim_index = self.prim_store.add_primitive(
             &LayerRect::zero(),
             &max_clip,
             is_backface_visible,
             None,
             None,
-            PrimitiveContainer::Picture(sc_prim),
+            PrimitiveContainer::Brush(sc_prim),
         );
 
-        let pic_prim_index = self.prim_store.cpu_metadata[parent_pic_prim_index.0].cpu_prim_index;
-        let sc_pic = &mut self.prim_store.cpu_pictures[pic_prim_index.0];
-        sc_pic.add_primitive(
-            sc_prim_index,
-            clip_and_scroll,
-        );
+        let parent_pic = &mut self.prim_store.pictures[parent_pic_index.0];
+        parent_pic.add_primitive(sc_prim_index, clip_and_scroll);
 
         // Add this as the top-most picture for primitives to be added to.
-        self.picture_stack.push(sc_prim_index);
+        self.picture_stack.push(pic_index);
 
         // TODO(gw): This is super conservative. We can expand on this a lot
         //           once all the picture code is in place and landed.
         let allow_subpixel_aa = composite_ops.count() == 0 &&
                                 transform_style == TransformStyle::Flat;
 
         // Push the SC onto the stack, so we know how to handle things in
         // pop_stacking_context.
         let sc = FlattenedStackingContext {
             composite_ops,
             is_backface_visible,
             pipeline_id,
             allow_subpixel_aa,
             transform_style,
-            rendering_context_3d_prim_index,
+            rendering_context_3d_pic_index,
         };
 
         self.sc_stack.push(sc);
     }
 
     pub fn pop_stacking_context(&mut self) {
         let sc = self.sc_stack.pop().unwrap();
 
         // Always pop at least the main picture for this stacking context.
         let mut pop_count = 1;
 
         // Remove the picture for any filter/mix-blend-mode effects.
         pop_count += sc.composite_ops.count();
 
         // Remove the 3d context container if created
-        if sc.rendering_context_3d_prim_index.is_some() {
+        if sc.rendering_context_3d_pic_index.is_some() {
             pop_count += 1;
         }
 
         for _ in 0 .. pop_count {
             self.picture_stack.pop().expect("bug: mismatched picture stack");
         }
 
         // By the time the stacking context stack is empty, we should
@@ -1689,26 +1376,28 @@ impl<'a> DisplayListFlattener<'a> {
 
     pub fn push_shadow(
         &mut self,
         shadow: Shadow,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayerPrimitiveInfo,
     ) {
         let pipeline_id = self.sc_stack.last().unwrap().pipeline_id;
-        let prim = PicturePrimitive::new_text_shadow(shadow, pipeline_id);
+        let pic_index = self.prim_store.add_shadow_picture(shadow, pipeline_id);
+
+        let prim = BrushPrimitive::new_picture(pic_index);
 
         // Create an empty shadow primitive. Insert it into
         // the draw lists immediately so that it will be drawn
         // before any visual text elements that are added as
         // part of this shadow context.
         let prim_index = self.create_primitive(
             info,
             Vec::new(),
-            PrimitiveContainer::Picture(prim),
+            PrimitiveContainer::Brush(prim),
         );
 
         let pending = vec![(prim_index, clip_and_scroll)];
         self.shadow_prim_stack.push((prim_index, pending));
     }
 
     pub fn pop_all_shadows(&mut self) {
         assert!(self.shadow_prim_stack.len() > 0, "popped shadows, but none were present");
@@ -1828,17 +1517,18 @@ impl<'a> DisplayListFlattener<'a> {
                 orientation,
             },
             None,
         );
 
         let mut fast_shadow_prims = Vec::new();
         for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
-            let picture = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
+            let brush = &self.prim_store.cpu_brushes[shadow_metadata.cpu_prim_index.0];
+            let picture = &self.prim_store.pictures[brush.get_picture_index().0];
             match picture.kind {
                 PictureKind::TextShadow { offset, color, blur_radius, .. } if blur_radius == 0.0 => {
                     fast_shadow_prims.push((idx, offset, color));
                 }
                 _ => {}
             }
         }
 
@@ -1849,18 +1539,17 @@ impl<'a> DisplayListFlattener<'a> {
                     color: shadow_color.premultiplied(),
                     style,
                     orientation,
                 },
                 None,
             );
             let mut info = info.clone();
             info.rect = info.rect.translate(&shadow_offset);
-            info.local_clip =
-              LocalClip::from(info.local_clip.clip_rect().translate(&shadow_offset));
+            info.clip_rect = info.clip_rect.translate(&shadow_offset);
             let prim_index = self.create_primitive(
                 &info,
                 Vec::new(),
                 PrimitiveContainer::Brush(line),
             );
             self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
         }
 
@@ -1876,19 +1565,19 @@ impl<'a> DisplayListFlattener<'a> {
                 self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
             } else {
                 self.pending_shadow_contents.push((prim_index, clip_and_scroll, *info));
             }
         }
 
         for &(shadow_prim_index, _) in &self.shadow_prim_stack {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
-            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
-            let picture =
-                &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
+            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Brush);
+            let brush = &self.prim_store.cpu_brushes[shadow_metadata.cpu_prim_index.0];
+            let picture = &mut self.prim_store.pictures[brush.get_picture_index().0];
 
             match picture.kind {
                 // Only run real blurs here (fast path zero blurs are handled above).
                 PictureKind::TextShadow { blur_radius, .. } if blur_radius > 0.0 => {
                     picture.add_primitive(
                         prim_index,
                         clip_and_scroll,
                     );
@@ -2426,35 +2115,35 @@ impl<'a> DisplayListFlattener<'a> {
         // possible. For any text shadows that have zero blur, create a normal text
         // primitive with the shadow's color and offset. These need to be added
         // *before* the visual text primitive in order to get the correct paint
         // order. Store them in a Vec first to work around borrowck issues.
         // TODO(gw): Refactor to avoid having to store them in a Vec first.
         let mut fast_shadow_prims = Vec::new();
         for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
-            let picture_prim = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
+            let brush = &self.prim_store.cpu_brushes[shadow_metadata.cpu_prim_index.0];
+            let picture_prim = &self.prim_store.pictures[brush.get_picture_index().0];
             match picture_prim.kind {
                 PictureKind::TextShadow { offset, color, blur_radius, .. } if blur_radius == 0.0 => {
                     let mut text_prim = prim.clone();
                     text_prim.font.color = color.into();
                     text_prim.shadow = true;
                     text_prim.offset += offset;
                     fast_shadow_prims.push((idx, text_prim));
                 }
                 _ => {}
             }
         }
 
         for (idx, text_prim) in fast_shadow_prims {
             let rect = info.rect;
             let mut info = info.clone();
             info.rect = rect.translate(&text_prim.offset);
-            info.local_clip =
-              LocalClip::from(info.local_clip.clip_rect().translate(&text_prim.offset));
+            info.clip_rect = info.clip_rect.translate(&text_prim.offset);
             let prim_index = self.create_primitive(
                 &info,
                 Vec::new(),
                 PrimitiveContainer::TextRun(text_prim),
             );
             self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
         }
 
@@ -2480,19 +2169,19 @@ impl<'a> DisplayListFlattener<'a> {
         // primitives. Although we're adding the indices *after* the visual
         // primitive here, they will still draw before the visual text, since
         // the shadow primitive itself has been added to the draw cmd
         // list *before* the visual element, during push_shadow. We need
         // the primitive index of the visual element here before we can add
         // the indices as sub-primitives to the shadow primitives.
         for &(shadow_prim_index, _) in &self.shadow_prim_stack {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
-            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
-            let picture =
-                &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
+            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Brush);
+            let brush = &self.prim_store.cpu_brushes[shadow_metadata.cpu_prim_index.0];
+            let picture = &mut self.prim_store.pictures[brush.get_picture_index().0];
 
             match picture.kind {
                 // Only run real blurs here (fast path zero blurs are handled above).
                 PictureKind::TextShadow { blur_radius, .. } if blur_radius > 0.0 => {
                     picture.add_primitive(
                         prim_index,
                         clip_and_scroll,
                     );
@@ -2662,31 +2351,33 @@ impl PrimitiveInfoTiler for LayerPrimiti
 
         if tile_repeat.width <= 0.0 ||
            tile_repeat.height <= 0.0 {
             return prims;
         }
 
         if tile_repeat.width < self.rect.size.width ||
            tile_repeat.height < self.rect.size.height {
-            let local_clip = self.local_clip.clip_by(&self.rect);
+            let clip_rect = self.clip_rect
+                .intersection(&self.rect)
+                .unwrap_or_else(LayoutRect::zero);
             let rect_p0 = self.rect.origin;
             let rect_p1 = self.rect.bottom_right();
 
             let mut y0 = rect_p0.y;
             while y0 < rect_p1.y {
                 let mut x0 = rect_p0.x;
 
                 while x0 < rect_p1.x {
                     prims.push(LayerPrimitiveInfo {
                         rect: LayerRect::new(
                             LayerPoint::new(x0, y0),
                             tile_size,
                         ),
-                        local_clip,
+                        clip_rect,
                         is_backface_visible: self.is_backface_visible,
                         tag: self.tag,
                     });
 
                     // Mostly a safety against a crazy number of primitives
                     // being generated. If we exceed that amount, just bail
                     // out and only draw the maximum amount.
                     if prims.len() > max_prims {
@@ -2722,15 +2413,15 @@ struct FlattenedStackingContext {
     /// This is a temporary hack while we don't support subpixel AA
     /// on transparent stacking contexts.
     allow_subpixel_aa: bool,
 
     /// CSS transform-style property.
     transform_style: TransformStyle,
 
     /// If Some(..), this stacking context establishes a new
-    /// 3d rendering context, and the value is the primitive
+    /// 3d rendering context, and the value is the picture
     // index of the 3d context container.
-    rendering_context_3d_prim_index: Option<PrimitiveIndex>,
+    rendering_context_3d_pic_index: Option<PictureIndex>,
 }
 
 #[derive(Debug)]
 pub struct ScrollbarInfo(pub ClipScrollNodeIndex, pub LayerRect);
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -163,17 +163,17 @@ impl FrameBuilder {
         profile_counters: &mut FrameProfileCounters,
         device_pixel_scale: DevicePixelScale,
         scene_properties: &SceneProperties,
         local_clip_rects: &mut Vec<LayerRect>,
         node_data: &[ClipScrollNodeData],
     ) -> Option<RenderTaskId> {
         profile_scope!("cull");
 
-        if self.prim_store.cpu_pictures.is_empty() {
+        if self.prim_store.pictures.is_empty() {
             return None
         }
 
         // The root picture is always the first one added.
         let root_clip_scroll_node =
             &clip_scroll_tree.nodes[clip_scroll_tree.root_reference_frame_index().0];
 
         let display_list = &pipelines
@@ -198,34 +198,34 @@ impl FrameBuilder {
             resource_cache,
             gpu_cache,
             cached_gradients: &mut self.cached_gradients,
         };
 
         let pic_context = PictureContext {
             pipeline_id: root_clip_scroll_node.pipeline_id,
             perform_culling: true,
-            prim_runs: mem::replace(&mut self.prim_store.cpu_pictures[0].runs, Vec::new()),
+            prim_runs: mem::replace(&mut self.prim_store.pictures[0].runs, Vec::new()),
             original_reference_frame_index: None,
             display_list,
             draw_text_transformed: true,
             inv_world_transform: None,
         };
 
         let mut pic_state = PictureState::new();
 
         self.prim_store.reset_prim_visibility();
         self.prim_store.prepare_prim_runs(
             &pic_context,
             &mut pic_state,
             &frame_context,
             &mut frame_state,
         );
 
-        let pic = &mut self.prim_store.cpu_pictures[0];
+        let pic = &mut self.prim_store.pictures[0];
         pic.runs = pic_context.prim_runs;
 
         let root_render_task = RenderTask::new_picture(
             RenderTaskLocation::Fixed(frame_context.screen_rect),
             PrimitiveIndex(0),
             RenderTargetKind::Color,
             ContentOrigin::Screen(DeviceIntPoint::zero()),
             PremultipliedColorF::TRANSPARENT,
@@ -293,17 +293,17 @@ impl FrameBuilder {
             .total_primitives
             .set(self.prim_store.prim_count());
 
         resource_cache.begin_frame(frame_id);
         gpu_cache.begin_frame();
 
         let mut node_data = Vec::with_capacity(clip_scroll_tree.nodes.len());
         let total_prim_runs =
-            self.prim_store.cpu_pictures.iter().fold(1, |count, ref pic| count + pic.runs.len());
+            self.prim_store.pictures.iter().fold(1, |count, ref pic| count + pic.runs.len());
         let mut clip_chain_local_clip_rects = Vec::with_capacity(total_prim_runs);
         clip_chain_local_clip_rects.push(LayerRect::max_rect());
 
         clip_scroll_tree.update_tree(
             &self.screen_rect.to_i32(),
             device_pixel_scale,
             &mut self.clip_store,
             resource_cache,
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -1,28 +1,37 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{DevicePoint, LayerToWorldTransform, PremultipliedColorF, WorldToLayerTransform};
+use api::{DevicePoint, LayerToWorldTransform, WorldToLayerTransform};
 use gpu_cache::{GpuCacheAddress, GpuDataRequest};
 use prim_store::EdgeAaSegmentMask;
 use render_task::RenderTaskAddress;
 
 // Contains type that must exactly match the same structures declared in GLSL.
 
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub enum RasterizationSpace {
     Local = 0,
     Screen = 1,
 }
 
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub enum BoxShadowStretchMode {
+    Stretch = 0,
+    Simple = 1,
+}
+
 #[repr(i32)]
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BlurDirection {
     Horizontal = 0,
     Vertical,
 }
@@ -249,28 +258,26 @@ pub enum PictureType {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub struct ImageSource {
     pub p0: DevicePoint,
     pub p1: DevicePoint,
     pub texture_layer: f32,
     pub user_data: [f32; 3],
-    pub color: PremultipliedColorF,
 }
 
 impl ImageSource {
     pub fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
         request.push([
             self.p0.x,
             self.p0.y,
             self.p1.x,
             self.p1.y,
         ]);
         request.push([
             self.texture_layer,
             self.user_data[0],
             self.user_data[1],
             self.user_data[2],
         ]);
-        request.push(self.color);
     }
 }
--- a/gfx/webrender/src/hit_test.rs
+++ b/gfx/webrender/src/hit_test.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ClipMode, HitTestFlags, HitTestItem, HitTestResult, ItemTag, LayerPoint};
-use api::{LayerPrimitiveInfo, LayerRect, LocalClip, PipelineId, WorldPoint};
-use clip::{ClipSource, ClipStore, Contains, rounded_rectangle_contains_point};
+use api::{LayerPrimitiveInfo, LayerRect, PipelineId, WorldPoint};
+use clip::{ClipSource, ClipStore, rounded_rectangle_contains_point};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::{ClipChainIndex, ClipScrollNodeIndex, ClipScrollTree};
 use internal_types::FastHashMap;
 use prim_store::ScrollNodeAndClipChain;
 use util::LayerToWorldFastTransform;
 
 /// A copy of important clip scroll node data to use during hit testing. This a copy of
 /// data from the ClipScrollTree that will persist as a new frame is under construction,
@@ -50,25 +50,25 @@ impl HitTestClipChainDescriptor {
             clips: Vec::new(),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct HitTestingItem {
     rect: LayerRect,
-    clip: LocalClip,
+    clip_rect: LayerRect,
     tag: ItemTag,
 }
 
 impl HitTestingItem {
     pub fn new(tag: ItemTag, info: &LayerPrimitiveInfo) -> HitTestingItem {
         HitTestingItem {
             rect: info.rect,
-            clip: info.local_clip,
+            clip_rect: info.clip_rect,
             tag: tag,
         }
     }
 }
 
 #[derive(Clone)]
 pub struct HitTestingRun(pub Vec<HitTestingItem>, pub ScrollNodeAndClipChain);
 
@@ -234,17 +234,18 @@ impl HitTester {
             let transform = scroll_node.world_content_transform;
             let point_in_layer = match transform.inverse() {
                 Some(inverted) => inverted.transform_point2d(&point),
                 None => continue,
             };
 
             let mut clipped_in = false;
             for item in items.iter().rev() {
-                if !item.rect.contains(&point_in_layer) || !item.clip.contains(&point_in_layer) {
+                if !item.rect.contains(&point_in_layer) ||
+                    !item.clip_rect.contains(&point_in_layer) {
                     continue;
                 }
 
                 let clip_chain_index = clip_and_scroll.clip_chain_index;
                 clipped_in |=
                     self.is_point_clipped_in_for_clip_chain(point, clip_chain_index, &mut test);
                 if !clipped_in {
                     break;
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/image.rs
@@ -0,0 +1,280 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{TileOffset, LayerRect, LayerSize, LayerVector2D, DeviceUintSize};
+use euclid::rect;
+
+pub struct DecomposedTile {
+    pub rect: LayerRect,
+    pub stretch_size: LayerSize,
+    pub tile_offset: TileOffset,
+}
+
+pub struct TiledImageInfo {
+    /// The bounds of the item in layout space.
+    pub rect: LayerRect,
+    /// The space between each repeated pattern in layout space.
+    pub tile_spacing: LayerSize,
+    /// The size in layout space of each repetition of the image.
+    pub stretch_size: LayerSize,
+
+    /// The size the image occupies in the cache in device space.
+    pub device_image_size: DeviceUintSize,
+    /// The size of the tiles in the cache in device pixels.
+    pub device_tile_size: u32,
+}
+
+/// Decomposes an image that is repeated into an image per individual repetition.
+/// We need to do this when we are unable to perform the repetition in the shader,
+/// for example if the image is tiled.
+///
+/// In all of the "decompose" methods below, we independently handle horizontal and vertical
+/// decomposition. This lets us generate the minimum amount of primitives by, for example,
+/// decomposing the repetition horizontally while repeating vertically in the shader (for
+/// an image where the width is too bug but the height is not).
+///
+/// decompose_image and decompose_row handle image repetitions while decompose_cache_tiles
+/// takes care of the decomposition required by the internal tiling of the image in the cache.
+///
+/// Note that the term tiling is overloaded: There is the tiling we get from repeating images
+/// in layout space, and the tiling that we do in the texture cache (to avoid hitting texture
+/// size limits). The latter is referred to as "device" tiling here to disambiguate.
+pub fn decompose_image(info: &TiledImageInfo, callback: &mut FnMut(&DecomposedTile)) {
+
+    let no_vertical_tiling = info.device_image_size.height <= info.device_tile_size;
+    let no_vertical_spacing = info.tile_spacing.height == 0.0;
+
+    if no_vertical_tiling && no_vertical_spacing {
+        decompose_row(&info.rect, info, callback);
+        return;
+    }
+
+    // Decompose each vertical repetition into rows.
+    let layout_stride = info.stretch_size.height + info.tile_spacing.height;
+    let num_repetitions = (info.rect.size.height / layout_stride).ceil() as u32;
+
+    for i in 0 .. num_repetitions {
+        let row_rect = rect(
+            info.rect.origin.x,
+            info.rect.origin.y + (i as f32) * layout_stride,
+            info.rect.size.width,
+            info.stretch_size.height,
+        ).intersection(&info.rect);
+
+        if let Some(row_rect) = row_rect {
+            decompose_row(&row_rect, info, callback);
+        }
+    }
+}
+
+
+fn decompose_row(item_rect: &LayerRect, info: &TiledImageInfo, callback: &mut FnMut(&DecomposedTile)) {
+
+    let no_horizontal_tiling = info.device_image_size.width <= info.device_tile_size;
+    let no_horizontal_spacing = info.tile_spacing.width == 0.0;
+
+    if no_horizontal_tiling && no_horizontal_spacing {
+        decompose_cache_tiles(item_rect, info, callback);
+        return;
+    }
+
+    // Decompose each horizontal repetition.
+    let layout_stride = info.stretch_size.width + info.tile_spacing.width;
+    let num_repetitions = (item_rect.size.width / layout_stride).ceil() as u32;
+
+    for i in 0 .. num_repetitions {
+        let decomposed_rect = rect(
+            item_rect.origin.x + (i as f32) * layout_stride,
+            item_rect.origin.y,
+            info.stretch_size.width,
+            item_rect.size.height,
+        ).intersection(&item_rect);
+
+        if let Some(decomposed_rect) = decomposed_rect {
+            decompose_cache_tiles(&decomposed_rect, info, callback);
+        }
+    }
+}
+
+fn decompose_cache_tiles(
+    item_rect: &LayerRect,
+    info: &TiledImageInfo,
+    callback: &mut FnMut(&DecomposedTile),
+) {
+    // The image resource is tiled. We have to generate an image primitive
+    // for each tile.
+    // We need to do this because the image is broken up into smaller tiles in the texture
+    // cache and the image shader is not able to work with this type of sparse representation.
+
+    // The tiling logic works as follows:
+    //
+    //  ###################-+  -+
+    //  #    |    |    |//# |   | image size
+    //  #    |    |    |//# |   |
+    //  #----+----+----+--#-+   |  -+
+    //  #    |    |    |//# |   |   | regular tile size
+    //  #    |    |    |//# |   |   |
+    //  #----+----+----+--#-+   |  -+-+
+    //  #////|////|////|//# |   |     | "leftover" height
+    //  ################### |  -+  ---+
+    //  #----+----+----+----+
+    //
+    // In the ascii diagram above, a large image is split into tiles of almost regular size.
+    // The tiles on the right and bottom edges (hatched in the diagram) are smaller than
+    // the regular tiles and are handled separately in the code see leftover_width/height.
+    // each generated image primitive corresponds to a tile in the texture cache, with the
+    // assumption that the smaller tiles with leftover sizes are sized to fit their own
+    // irregular size in the texture cache.
+    //
+    // For the case where we don't tile along an axis, we can still perform the repetition in
+    // the shader (for this particular axis), and it is worth special-casing for this to avoid
+    // generating many primitives.
+    // This can happen with very tall and thin images used as a repeating background.
+    // Apparently web authors do that...
+
+    let needs_repeat_x = info.stretch_size.width < item_rect.size.width;
+    let needs_repeat_y = info.stretch_size.height < item_rect.size.height;
+
+    let tiled_in_x = info.device_image_size.width > info.device_tile_size;
+    let tiled_in_y = info.device_image_size.height > info.device_tile_size;
+
+    // If we don't actually tile in this dimension, repeating can be done in the shader.
+    let shader_repeat_x = needs_repeat_x && !tiled_in_x;
+    let shader_repeat_y = needs_repeat_y && !tiled_in_y;
+
+    let tile_size_f32 = info.device_tile_size as f32;
+
+    // Note: this rounds down so it excludes the partially filled tiles on the right and
+    // bottom edges (we handle them separately below).
+    let num_tiles_x = (info.device_image_size.width / info.device_tile_size) as u16;
+    let num_tiles_y = (info.device_image_size.height / info.device_tile_size) as u16;
+
+    // Ratio between (image space) tile size and image size.
+    let img_dw = tile_size_f32 / (info.device_image_size.width as f32);
+    let img_dh = tile_size_f32 / (info.device_image_size.height as f32);
+
+    // Stretched size of the tile in layout space.
+    let stretched_tile_size = LayerSize::new(
+        img_dw * info.stretch_size.width,
+        img_dh * info.stretch_size.height,
+    );
+
+    // The size in pixels of the tiles on the right and bottom edges, smaller
+    // than the regular tile size if the image is not a multiple of the tile size.
+    // Zero means the image size is a multiple of the tile size.
+    let leftover = DeviceUintSize::new(
+        info.device_image_size.width % info.device_tile_size,
+        info.device_image_size.height % info.device_tile_size
+    );
+
+    for ty in 0 .. num_tiles_y {
+        for tx in 0 .. num_tiles_x {
+            add_device_tile(
+                item_rect,
+                stretched_tile_size,
+                TileOffset::new(tx, ty),
+                1.0,
+                1.0,
+                shader_repeat_x,
+                shader_repeat_y,
+                callback,
+            );
+        }
+        if leftover.width != 0 {
+            // Tiles on the right edge that are smaller than the tile size.
+            add_device_tile(
+                item_rect,
+                stretched_tile_size,
+                TileOffset::new(num_tiles_x, ty),
+                (leftover.width as f32) / tile_size_f32,
+                1.0,
+                shader_repeat_x,
+                shader_repeat_y,
+                callback,
+            );
+        }
+    }
+
+    if leftover.height != 0 {
+        for tx in 0 .. num_tiles_x {
+            // Tiles on the bottom edge that are smaller than the tile size.
+            add_device_tile(
+                item_rect,
+                stretched_tile_size,
+                TileOffset::new(tx, num_tiles_y),
+                1.0,
+                (leftover.height as f32) / tile_size_f32,
+                shader_repeat_x,
+                shader_repeat_y,
+                callback,
+            );
+        }
+
+        if leftover.width != 0 {
+            // Finally, the bottom-right tile with a "leftover" size.
+            add_device_tile(
+                item_rect,
+                stretched_tile_size,
+                TileOffset::new(num_tiles_x, num_tiles_y),
+                (leftover.width as f32) / tile_size_f32,
+                (leftover.height as f32) / tile_size_f32,
+                shader_repeat_x,
+                shader_repeat_y,
+                callback,
+            );
+        }
+    }
+}
+
+fn add_device_tile(
+    item_rect: &LayerRect,
+    stretched_tile_size: LayerSize,
+    tile_offset: TileOffset,
+    tile_ratio_width: f32,
+    tile_ratio_height: f32,
+    shader_repeat_x: bool,
+    shader_repeat_y: bool,
+    callback: &mut FnMut(&DecomposedTile),
+) {
+    // If the image is tiled along a given axis, we can't have the shader compute
+    // the image repetition pattern. In this case we base the primitive's rectangle size
+    // on the stretched tile size which effectively cancels the repetition (and repetition
+    // has to be emulated by generating more primitives).
+    // If the image is not tiled along this axis, we can perform the repetition in the
+    // shader. In this case we use the item's size in the primitive (on that particular
+    // axis).
+    // See the shader_repeat_x/y code below.
+
+    let stretch_size = LayerSize::new(
+        stretched_tile_size.width * tile_ratio_width,
+        stretched_tile_size.height * tile_ratio_height,
+    );
+
+    let mut prim_rect = LayerRect::new(
+        item_rect.origin + LayerVector2D::new(
+            tile_offset.x as f32 * stretched_tile_size.width,
+            tile_offset.y as f32 * stretched_tile_size.height,
+        ),
+        stretch_size,
+    );
+
+    if shader_repeat_x {
+        assert_eq!(tile_offset.x, 0);
+        prim_rect.size.width = item_rect.size.width;
+    }
+
+    if shader_repeat_y {
+        assert_eq!(tile_offset.y, 0);
+        prim_rect.size.height = item_rect.size.height;
+    }
+
+    // Fix up the primitive's rect if it overflows the original item rect.
+    if let Some(rect) = prim_rect.intersection(&item_rect) {
+        callback(&DecomposedTile {
+            tile_offset,
+            rect,
+            stretch_size,
+        });
+    }
+}
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -73,30 +73,32 @@ mod freelist;
 #[cfg(any(target_os = "macos", target_os = "windows"))]
 mod gamma_lut;
 mod geometry;
 mod glyph_cache;
 mod glyph_rasterizer;
 mod gpu_cache;
 mod gpu_types;
 mod hit_test;
+mod image;
 mod internal_types;
 mod picture;
 mod prim_store;
 mod print_tree;
 mod profiler;
 mod query;
 mod record;
 mod render_backend;
 mod render_task;
 mod renderer;
 mod resource_cache;
 mod scene;
 mod scene_builder;
 mod segment;
+mod shade;
 mod spring;
 mod texture_allocator;
 mod texture_cache;
 mod tiling;
 mod util;
 
 mod shader_source {
     include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
@@ -152,17 +154,17 @@ extern crate rayon;
 extern crate ron;
 #[cfg(feature = "debugger")]
 extern crate serde_json;
 extern crate smallvec;
 extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 #[cfg(feature = "debugger")]
-extern crate image;
+extern crate image as image_loader;
 #[cfg(feature = "debugger")]
 extern crate base64;
 #[cfg(all(feature = "capture", feature = "png"))]
 extern crate png;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -6,18 +6,18 @@ use api::{DeviceIntPoint, DeviceIntRect}
 use api::{LayerPoint, LayerRect, LayerToWorldScale, LayerVector2D};
 use api::{ColorF, FilterOp, MixBlendMode, PipelineId};
 use api::{PremultipliedColorF, Shadow};
 use box_shadow::{BLUR_SAMPLE_SCALE};
 use clip_scroll_tree::ClipScrollNodeIndex;
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState};
 use gpu_cache::{GpuCacheHandle, GpuDataRequest};
 use gpu_types::{PictureType};
-use prim_store::{BrushKind, BrushPrimitive, PrimitiveIndex, PrimitiveRun, PrimitiveRunLocalRect};
-use prim_store::ScrollNodeAndClipChain;
+use prim_store::{PrimitiveIndex, PrimitiveRun, PrimitiveRunLocalRect};
+use prim_store::{PrimitiveMetadata, ScrollNodeAndClipChain};
 use render_task::{ClearMode, RenderTask};
 use render_task::{RenderTaskId, RenderTaskLocation, to_cache_size};
 use scene::{FilterOpHelpers, SceneProperties};
 use tiling::RenderTargetKind;
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
@@ -80,16 +80,19 @@ pub enum PictureKind {
         // It is only different if this is part of a 3D
         // rendering context.
         reference_frame_index: ClipScrollNodeIndex,
         real_local_rect: LayerRect,
         // An optional cache handle for storing extra data
         // in the GPU cache, depending on the type of
         // picture.
         extra_gpu_data_handle: GpuCacheHandle,
+        // The current screen-space rect of the rendered
+        // portion of this picture.
+        task_rect: DeviceIntRect,
     },
 }
 
 #[derive(Debug)]
 pub struct PicturePrimitive {
     // If this picture is drawn to an intermediate surface,
     // the associated target information.
     pub surface: Option<RenderTaskId>,
@@ -102,44 +105,31 @@ pub struct PicturePrimitive {
 
     // The pipeline that the primitives on this picture belong to.
     pub pipeline_id: PipelineId,
 
     // If true, apply visibility culling to primitives on this
     // picture. For text shadows and box shadows, we want to
     // unconditionally draw them.
     pub cull_children: bool,
-
-    // The brush primitive that will be used to draw this
-    // picture.
-    // TODO(gw): Having a brush primitive embedded here
-    //           makes the code complex in a few places.
-    //           Consider a better way to structure this.
-    //           Maybe embed the PicturePrimitive inside
-    //           the BrushKind enum instead?
-    pub brush: BrushPrimitive,
 }
 
 impl PicturePrimitive {
     pub fn new_text_shadow(shadow: Shadow, pipeline_id: PipelineId) -> Self {
         PicturePrimitive {
             runs: Vec::new(),
             surface: None,
             kind: PictureKind::TextShadow {
                 offset: shadow.offset,
                 color: shadow.color,
                 blur_radius: shadow.blur_radius,
                 content_rect: LayerRect::zero(),
             },
             pipeline_id,
             cull_children: false,
-            brush: BrushPrimitive::new(
-                BrushKind::Picture,
-                None,
-            ),
         }
     }
 
     pub fn resolve_scene_properties(&mut self, properties: &SceneProperties) -> bool {
         match self.kind {
             PictureKind::Image { ref mut composite_mode, .. } => {
                 match composite_mode {
                     &mut Some(PictureCompositeMode::Filter(ref mut filter)) => {
@@ -172,23 +162,20 @@ impl PicturePrimitive {
             kind: PictureKind::Image {
                 secondary_render_task_id: None,
                 composite_mode,
                 is_in_3d_context,
                 frame_output_pipeline_id,
                 reference_frame_index,
                 real_local_rect: LayerRect::zero(),
                 extra_gpu_data_handle: GpuCacheHandle::new(),
+                task_rect: DeviceIntRect::zero(),
             },
             pipeline_id,
             cull_children: true,
-            brush: BrushPrimitive::new(
-                BrushKind::Picture,
-                None,
-            ),
         }
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ScrollNodeAndClipChain
     ) {
@@ -243,64 +230,97 @@ impl PicturePrimitive {
                 content_rect.translate(&offset)
             }
         }
     }
 
     pub fn prepare_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
-        prim_screen_rect: &DeviceIntRect,
-        prim_local_rect: &LayerRect,
+        prim_metadata: &mut PrimitiveMetadata,
         pic_state_for_children: PictureState,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
     ) {
         let content_scale = LayerToWorldScale::new(1.0) * frame_context.device_pixel_scale;
+        let prim_screen_rect = prim_metadata
+                                .screen_rect
+                                .as_ref()
+                                .expect("bug: trying to draw an off-screen picture!?");
 
         match self.kind {
             PictureKind::Image {
                 ref mut secondary_render_task_id,
                 ref mut extra_gpu_data_handle,
+                ref mut task_rect,
                 composite_mode,
                 ..
             } => {
-                let content_origin = ContentOrigin::Screen(prim_screen_rect.origin);
-                match composite_mode {
+                let device_rect = match composite_mode {
                     Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
-                        let picture_task = RenderTask::new_picture(
-                            RenderTaskLocation::Dynamic(None, prim_screen_rect.size),
-                            prim_index,
-                            RenderTargetKind::Color,
-                            content_origin,
-                            PremultipliedColorF::TRANSPARENT,
-                            ClearMode::Transparent,
-                            pic_state_for_children.tasks,
-                            PictureType::Image,
-                        );
+                        // If blur radius is 0, we can skip drawing this an an
+                        // intermediate surface.
+                        if blur_radius == 0.0 {
+                            pic_state.tasks.extend(pic_state_for_children.tasks);
+                            self.surface = None;
+
+                            DeviceIntRect::zero()
+                        } else {
+                            let blur_std_deviation = blur_radius * frame_context.device_pixel_scale.0;
+                            let blur_range = (blur_std_deviation * BLUR_SAMPLE_SCALE).ceil() as i32;
+
+                            // The clipped field is the part of the picture that is visible
+                            // on screen. The unclipped field is the screen-space rect of
+                            // the complete picture, if no screen / clip-chain was applied
+                            // (this includes the extra space for blur region). To ensure
+                            // that we draw a large enough part of the picture to get correct
+                            // blur results, inflate that clipped area by the blur range, and
+                            // then intersect with the total screen rect, to minimize the
+                            // allocation size.
+                            let device_rect = prim_screen_rect
+                                .clipped
+                                .inflate(blur_range, blur_range)
+                                .intersection(&prim_screen_rect.unclipped)
+                                .unwrap();
+
+                            let content_origin = ContentOrigin::Screen(device_rect.origin);
 
-                        let blur_std_deviation = blur_radius * frame_context.device_pixel_scale.0;
-                        let picture_task_id = frame_state.render_tasks.add(picture_task);
+                            let picture_task = RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, device_rect.size),
+                                prim_index,
+                                RenderTargetKind::Color,
+                                content_origin,
+                                PremultipliedColorF::TRANSPARENT,
+                                ClearMode::Transparent,
+                                pic_state_for_children.tasks,
+                                PictureType::Image,
+                            );
+
+                            let picture_task_id = frame_state.render_tasks.add(picture_task);
 
-                        let blur_render_task = RenderTask::new_blur(
-                            blur_std_deviation,
-                            picture_task_id,
-                            frame_state.render_tasks,
-                            RenderTargetKind::Color,
-                            ClearMode::Transparent,
-                            PremultipliedColorF::TRANSPARENT,
-                        );
+                            let blur_render_task = RenderTask::new_blur(
+                                blur_std_deviation,
+                                picture_task_id,
+                                frame_state.render_tasks,
+                                RenderTargetKind::Color,
+                                ClearMode::Transparent,
+                            );
 
-                        let render_task_id = frame_state.render_tasks.add(blur_render_task);
-                        pic_state.tasks.push(render_task_id);
-                        self.surface = Some(render_task_id);
+                            let render_task_id = frame_state.render_tasks.add(blur_render_task);
+                            pic_state.tasks.push(render_task_id);
+                            self.surface = Some(render_task_id);
+
+                            device_rect
+                        }
                     }
-                    Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, blur_radius, color))) => {
-                        let rect = (prim_local_rect.translate(&-offset) * content_scale).round().to_i32();
+                    Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, blur_radius, _))) => {
+                        // TODO(gw): This is totally wrong and can never work with
+                        //           transformed drop-shadow elements. Fix me!
+                        let rect = (prim_metadata.local_rect.translate(&-offset) * content_scale).round().to_i32();
                         let mut picture_task = RenderTask::new_picture(
                             RenderTaskLocation::Dynamic(None, rect.size),
                             prim_index,
                             RenderTargetKind::Color,
                             ContentOrigin::Screen(rect.origin),
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
                             pic_state_for_children.tasks,
@@ -312,47 +332,56 @@ impl PicturePrimitive {
                         let picture_task_id = frame_state.render_tasks.add(picture_task);
 
                         let blur_render_task = RenderTask::new_blur(
                             blur_std_deviation.round(),
                             picture_task_id,
                             frame_state.render_tasks,
                             RenderTargetKind::Color,
                             ClearMode::Transparent,
-                            color.premultiplied(),
                         );
 
                         *secondary_render_task_id = Some(picture_task_id);
 
                         let render_task_id = frame_state.render_tasks.add(blur_render_task);
                         pic_state.tasks.push(render_task_id);
                         self.surface = Some(render_task_id);
+
+                        rect
                     }
                     Some(PictureCompositeMode::MixBlend(..)) => {
+                        let content_origin = ContentOrigin::Screen(prim_screen_rect.clipped.origin);
+
                         let picture_task = RenderTask::new_picture(
-                            RenderTaskLocation::Dynamic(None, prim_screen_rect.size),
+                            RenderTaskLocation::Dynamic(None, prim_screen_rect.clipped.size),
                             prim_index,
                             RenderTargetKind::Color,
                             content_origin,
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
                             pic_state_for_children.tasks,
                             PictureType::Image,
                         );
 
-                        let readback_task_id = frame_state.render_tasks.add(RenderTask::new_readback(*prim_screen_rect));
+                        let readback_task_id = frame_state.render_tasks.add(
+                            RenderTask::new_readback(prim_screen_rect.clipped)
+                        );
 
                         *secondary_render_task_id = Some(readback_task_id);
                         pic_state.tasks.push(readback_task_id);
 
                         let render_task_id = frame_state.render_tasks.add(picture_task);
                         pic_state.tasks.push(render_task_id);
                         self.surface = Some(render_task_id);
+
+                        prim_screen_rect.clipped
                     }
                     Some(PictureCompositeMode::Filter(filter)) => {
+                        let content_origin = ContentOrigin::Screen(prim_screen_rect.clipped.origin);
+
                         // If this filter is not currently going to affect
                         // the picture, just collapse this picture into the
                         // current render task. This most commonly occurs
                         // when opacity == 1.0, but can also occur on other
                         // filters and be a significant performance win.
                         if filter.is_noop() {
                             pic_state.tasks.extend(pic_state_for_children.tasks);
                             self.surface = None;
@@ -362,51 +391,68 @@ impl PicturePrimitive {
                                 if let Some(mut request) = frame_state.gpu_cache.request(extra_gpu_data_handle) {
                                     for i in 0..5 {
                                         request.push([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]);
                                     }
                                 }
                             }
 
                             let picture_task = RenderTask::new_picture(
-                                RenderTaskLocation::Dynamic(None, prim_screen_rect.size),
+                                RenderTaskLocation::Dynamic(None, prim_screen_rect.clipped.size),
                                 prim_index,
                                 RenderTargetKind::Color,
                                 content_origin,
                                 PremultipliedColorF::TRANSPARENT,
                                 ClearMode::Transparent,
                                 pic_state_for_children.tasks,
                                 PictureType::Image,
                             );
 
                             let render_task_id = frame_state.render_tasks.add(picture_task);
                             pic_state.tasks.push(render_task_id);
                             self.surface = Some(render_task_id);
                         }
+
+                        prim_screen_rect.clipped
                     }
                     Some(PictureCompositeMode::Blit) => {
+                        let content_origin = ContentOrigin::Screen(prim_screen_rect.clipped.origin);
+
                         let picture_task = RenderTask::new_picture(
-                            RenderTaskLocation::Dynamic(None, prim_screen_rect.size),
+                            RenderTaskLocation::Dynamic(None, prim_screen_rect.clipped.size),
                             prim_index,
                             RenderTargetKind::Color,
                             content_origin,
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
                             pic_state_for_children.tasks,
                             PictureType::Image,
                         );
 
                         let render_task_id = frame_state.render_tasks.add(picture_task);
                         pic_state.tasks.push(render_task_id);
                         self.surface = Some(render_task_id);
+
+                        prim_screen_rect.clipped
                     }
                     None => {
                         pic_state.tasks.extend(pic_state_for_children.tasks);
                         self.surface = None;
+
+                        DeviceIntRect::zero()
                     }
+                };
+
+                // If scrolling or property animation has resulted in the task
+                // rect being different than last time, invalidate the GPU
+                // cache entry for this picture to ensure that the correct
+                // task rect is provided to the image shader.
+                if *task_rect != device_rect {
+                    frame_state.gpu_cache.invalidate(&prim_metadata.gpu_location);
+                    *task_rect = device_rect;
                 }
             }
             PictureKind::TextShadow { blur_radius, color, content_rect, .. } => {
                 // This is a shadow element. Create a render task that will
                 // render the text run to a target, and then apply a gaussian
                 // blur to that text run in order to build the actual primitive
                 // which will be blitted to the framebuffer.
                 let cache_size = to_cache_size(content_rect.size * content_scale);
@@ -431,57 +477,39 @@ impl PicturePrimitive {
                 let picture_task_id = frame_state.render_tasks.add(picture_task);
 
                 let blur_render_task = RenderTask::new_blur(
                     blur_std_deviation,
                     picture_task_id,
                     frame_state.render_tasks,
                     RenderTargetKind::Color,
                     ClearMode::Transparent,
-                    color.premultiplied(),
                 );
 
                 let render_task_id = frame_state.render_tasks.add(blur_render_task);
                 pic_state.tasks.push(render_task_id);
                 self.surface = Some(render_task_id);
             }
         }
     }
 
     pub fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
-        // TODO(gw): It's unfortunate that we pay a fixed cost
-        //           of 5 GPU blocks / picture, just due to the size
-        //           of the color matrix. There aren't typically very
-        //           many pictures in a scene, but we should consider
-        //           making this more efficient for the common case.
         match self.kind {
             PictureKind::TextShadow { .. } => {
                 request.push([0.0; 4]);
+                request.push(PremultipliedColorF::WHITE);
             }
-            PictureKind::Image { composite_mode, .. } => {
-                match composite_mode {
-                    Some(PictureCompositeMode::Filter(filter)) => {
-                        let amount = match filter {
-                            FilterOp::Contrast(amount) => amount,
-                            FilterOp::Grayscale(amount) => amount,
-                            FilterOp::HueRotate(angle) => 0.01745329251 * angle,
-                            FilterOp::Invert(amount) => amount,
-                            FilterOp::Saturate(amount) => amount,
-                            FilterOp::Sepia(amount) => amount,
-                            FilterOp::Brightness(amount) => amount,
-                            FilterOp::Opacity(_, amount) => amount,
-
-                            // Go through different paths
-                            FilterOp::Blur(..) |
-                            FilterOp::DropShadow(..) |
-                            FilterOp::ColorMatrix(_) => 0.0,
-                        };
-
-                        request.push([amount, 1.0 - amount, 0.0, 0.0]);
+            PictureKind::Image { task_rect, composite_mode, .. } => {
+                let color = match composite_mode {
+                    Some(PictureCompositeMode::Filter(FilterOp::DropShadow(_, _, color))) => {
+                        color.premultiplied()
                     }
                     _ => {
-                        request.push([0.0; 4]);
+                        PremultipliedColorF::WHITE
                     }
-                }
+                };
+
+                request.push(task_rect.to_f32());
+                request.push(color);
             }
         }
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,29 +1,29 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderRadius, BoxShadowClipMode, BuiltDisplayList, ClipMode, ColorF, ComplexClipRegion};
 use api::{DeviceIntRect, DeviceIntSize, DevicePixelScale, Epoch, ExtendMode, FontRenderMode};
 use api::{GlyphInstance, GlyphKey, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag};
 use api::{LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, LayerVector2D, LineOrientation};
-use api::{LineStyle, PremultipliedColorF, YuvColorSpace, YuvFormat};
+use api::{LineStyle, PipelineId, PremultipliedColorF, Shadow, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderEdgeKind};
 use clip_scroll_tree::{ClipChainIndex, ClipScrollNodeIndex, CoordinateSystemId};
 use clip_scroll_node::ClipScrollNode;
 use clip::{ClipChain, ClipChainNode, ClipChainNodeIter, ClipChainNodeRef, ClipSource};
 use clip::{ClipSourcesHandle, ClipWorkItem};
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState};
 use frame_builder::PrimitiveRunContext;
 use glyph_rasterizer::{FontInstance, FontTransform};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
 use gpu_types::{ClipChainRectIndex};
-use picture::{PictureKind, PicturePrimitive};
+use picture::{PictureCompositeMode, PictureKind, PicturePrimitive};
 use render_task::{BlitSource, RenderTask, RenderTaskCacheKey, RenderTaskCacheKeyKind};
 use render_task::RenderTaskId;
 use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
 use resource_cache::{CacheItem, ImageProperties, ImageRequest, ResourceCache};
 use segment::SegmentBuilder;
 use std::{mem, usize};
 use std::sync::Arc;
 use util::{MatrixHelpers, WorldToLayerFastTransform, calculate_screen_bounding_rect};
@@ -129,22 +129,26 @@ pub struct DeferredResolve {
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct SpecificPrimitiveIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveIndex(pub usize);
 
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureIndex(pub usize);
+
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum PrimitiveKind {
     TextRun,
     Image,
     Border,
-    Picture,
     Brush,
 }
 
 impl GpuCacheHandle {
     pub fn as_int(&self, gpu_cache: &GpuCache) -> i32 {
         gpu_cache.get_address(self).as_int()
     }
 }
@@ -195,17 +199,19 @@ pub enum BrushKind {
     },
     Clear,
     Line {
         color: PremultipliedColorF,
         wavy_line_thickness: f32,
         style: LineStyle,
         orientation: LineOrientation,
     },
-    Picture,
+    Picture {
+        pic_index: PictureIndex,
+    },
     Image {
         request: ImageRequest,
         current_epoch: Epoch,
         alpha_type: AlphaType,
     },
     YuvImage {
         yuv_key: [ImageKey; 3],
         format: YuvFormat,
@@ -232,17 +238,17 @@ pub enum BrushKind {
         end_point: LayerPoint,
     }
 }
 
 impl BrushKind {
     fn supports_segments(&self) -> bool {
         match *self {
             BrushKind::Solid { .. } |
-            BrushKind::Picture |
+            BrushKind::Picture { .. } |
             BrushKind::Image { .. } |
             BrushKind::YuvImage { .. } |
             BrushKind::RadialGradient { .. } |
             BrushKind::LinearGradient { .. } => true,
 
             BrushKind::Clear |
             BrushKind::Line { .. } => false,
         }
@@ -313,24 +319,40 @@ impl BrushPrimitive {
         segment_desc: Option<BrushSegmentDescriptor>,
     ) -> BrushPrimitive {
         BrushPrimitive {
             kind,
             segment_desc,
         }
     }
 
-    fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
+    pub fn new_picture(pic_index: PictureIndex) -> BrushPrimitive {
+        BrushPrimitive {
+            kind: BrushKind::Picture {
+                pic_index,
+            },
+            segment_desc: None,
+        }
+    }
+
+    fn write_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest,
+        pictures: &[PicturePrimitive],
+    ) {
         // has to match VECS_PER_SPECIFIC_BRUSH
         match self.kind {
-            BrushKind::Picture |
+            BrushKind::Picture { pic_index } => {
+                pictures[pic_index.0].write_gpu_blocks(request);
+            }
             BrushKind::YuvImage { .. } => {
             }
             BrushKind::Image { .. } => {
                 request.push([0.0; 4]);
+                request.push(PremultipliedColorF::WHITE);
             }
             BrushKind::Solid { color } => {
                 request.push(color.premultiplied());
             }
             BrushKind::Clear => {
                 // Opaque black with operator dest out
                 request.push(PremultipliedColorF::BLACK);
             }
@@ -890,53 +912,93 @@ impl ClipData {
     }
 }
 
 #[derive(Debug)]
 pub enum PrimitiveContainer {
     TextRun(TextRunPrimitiveCpu),
     Image(ImagePrimitiveCpu),
     Border(BorderPrimitiveCpu),
-    Picture(PicturePrimitive),
     Brush(BrushPrimitive),
 }
 
 pub struct PrimitiveStore {
     /// CPU side information only.
     pub cpu_brushes: Vec<BrushPrimitive>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
-    pub cpu_pictures: Vec<PicturePrimitive>,
     pub cpu_images: Vec<ImagePrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
+
+    pub pictures: Vec<PicturePrimitive>,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_brushes: Vec::new(),
             cpu_text_runs: Vec::new(),
-            cpu_pictures: Vec::new(),
             cpu_images: Vec::new(),
             cpu_borders: Vec::new(),
+
+            pictures: Vec::new(),
         }
     }
 
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
             cpu_brushes: recycle_vec(self.cpu_brushes),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
-            cpu_pictures: recycle_vec(self.cpu_pictures),
             cpu_images: recycle_vec(self.cpu_images),
             cpu_borders: recycle_vec(self.cpu_borders),
+
+            pictures: recycle_vec(self.pictures),
         }
     }
 
+    pub fn add_image_picture(
+        &mut self,
+        composite_mode: Option<PictureCompositeMode>,
+        is_in_3d_context: bool,
+        pipeline_id: PipelineId,
+        reference_frame_index: ClipScrollNodeIndex,
+        frame_output_pipeline_id: Option<PipelineId>,
+    ) -> PictureIndex {
+        let pic = PicturePrimitive::new_image(
+            composite_mode,
+            is_in_3d_context,
+            pipeline_id,
+            reference_frame_index,
+            frame_output_pipeline_id,
+        );
+
+        let pic_index = PictureIndex(self.pictures.len());
+        self.pictures.push(pic);
+
+        pic_index
+    }
+
+    pub fn add_shadow_picture(
+        &mut self,
+        shadow: Shadow,
+        pipeline_id: PipelineId,
+    ) -> PictureIndex {
+        let pic = PicturePrimitive::new_text_shadow(
+            shadow,
+            pipeline_id,
+        );
+
+        let pic_index = PictureIndex(self.pictures.len());
+        self.pictures.push(pic);
+
+        pic_index
+    }
+
     pub fn add_primitive(
         &mut self,
         local_rect: &LayerRect,
         local_clip_rect: &LayerRect,
         is_backface_visible: bool,
         clip_sources: Option<ClipSourcesHandle>,
         tag: Option<ItemTag>,
         container: PrimitiveContainer,
@@ -963,21 +1025,17 @@ impl PrimitiveStore {
                 let opacity = match brush.kind {
                     BrushKind::Clear => PrimitiveOpacity::translucent(),
                     BrushKind::Solid { ref color } => PrimitiveOpacity::from_alpha(color.a),
                     BrushKind::Line { .. } => PrimitiveOpacity::translucent(),
                     BrushKind::Image { .. } => PrimitiveOpacity::translucent(),
                     BrushKind::YuvImage { .. } => PrimitiveOpacity::opaque(),
                     BrushKind::RadialGradient { .. } => PrimitiveOpacity::translucent(),
                     BrushKind::LinearGradient { .. } => PrimitiveOpacity::translucent(),
-                    BrushKind::Picture => {
-                        // TODO(gw): This is not currently used. In the future
-                        //           we should detect opaque pictures.
-                        unreachable!();
-                    }
+                    BrushKind::Picture { .. } => PrimitiveOpacity::translucent(),
                 };
 
                 let metadata = PrimitiveMetadata {
                     opacity,
                     prim_kind: PrimitiveKind::Brush,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_brushes.len()),
                     ..base_metadata
                 };
@@ -992,27 +1050,16 @@ impl PrimitiveStore {
                     prim_kind: PrimitiveKind::TextRun,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_text_runs.len()),
                     ..base_metadata
                 };
 
                 self.cpu_text_runs.push(text_cpu);
                 metadata
             }
-            PrimitiveContainer::Picture(picture) => {
-                let metadata = PrimitiveMetadata {
-                    opacity: PrimitiveOpacity::translucent(),
-                    prim_kind: PrimitiveKind::Picture,
-                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_pictures.len()),
-                    ..base_metadata
-                };
-
-                self.cpu_pictures.push(picture);
-                metadata
-            }
             PrimitiveContainer::Image(image_cpu) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     prim_kind: PrimitiveKind::Image,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_images.len()),
                     ..base_metadata
                 };
 
@@ -1053,30 +1100,16 @@ impl PrimitiveStore {
         pic_context: &PictureContext,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
     ) {
         let metadata = &mut self.cpu_metadata[prim_index.0];
         match metadata.prim_kind {
             PrimitiveKind::Border => {}
-            PrimitiveKind::Picture => {
-                self.cpu_pictures[metadata.cpu_prim_index.0]
-                    .prepare_for_render(
-                        prim_index,
-                        &metadata.screen_rect
-                            .expect("bug: trying to draw an off-screen picture!?")
-                            .clipped,
-                        &metadata.local_rect,
-                        pic_state_for_children,
-                        pic_state,
-                        frame_context,
-                        frame_state,
-                    );
-            }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 // The transform only makes sense for screen space rasterization
                 let transform = if pic_context.draw_text_transformed {
                     Some(prim_run_context.scroll_node.world_content_transform.into())
                 } else {
                     None
                 };
@@ -1255,20 +1288,30 @@ impl PrimitiveStore {
                                 pic_context.display_list,
                             );
                             gradient_builder.build(
                                 reverse_stops,
                                 &mut request,
                             );
                         }
                     }
+                    BrushKind::Picture { pic_index } => {
+                        self.pictures[pic_index.0]
+                            .prepare_for_render(
+                                prim_index,
+                                metadata,
+                                pic_state_for_children,
+                                pic_state,
+                                frame_context,
+                                frame_state,
+                            );
+                    }
                     BrushKind::Solid { .. } |
                     BrushKind::Clear |
-                    BrushKind::Line { .. } |
-                    BrushKind::Picture { .. } => {}
+                    BrushKind::Line { .. } => {}
                 }
             }
         }
 
         // Mark this GPU resource as required for this frame.
         if let Some(mut request) = frame_state.gpu_cache.request(&mut metadata.gpu_location) {
             // has to match VECS_PER_BRUSH_PRIM
             request.push(metadata.local_rect);
@@ -1282,37 +1325,19 @@ impl PrimitiveStore {
                 PrimitiveKind::Image => {
                     let image = &self.cpu_images[metadata.cpu_prim_index.0];
                     image.write_gpu_blocks(request);
                 }
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
-                PrimitiveKind::Picture => {
-                    let pic = &self.cpu_pictures[metadata.cpu_prim_index.0];
-                    pic.write_gpu_blocks(&mut request);
-
-                    let brush = &pic.brush;
-                    brush.write_gpu_blocks(&mut request);
-                    match brush.segment_desc {
-                        Some(ref segment_desc) => {
-                            for segment in &segment_desc.segments {
-                                // has to match VECS_PER_SEGMENT
-                                request.write_segment(segment.local_rect);
-                            }
-                        }
-                        None => {
-                            request.write_segment(metadata.local_rect);
-                        }
-                    }
-                }
                 PrimitiveKind::Brush => {
                     let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
-                    brush.write_gpu_blocks(&mut request);
+                    brush.write_gpu_blocks(&mut request, &self.pictures);
                     match brush.segment_desc {
                         Some(ref segment_desc) => {
                             for segment in &segment_desc.segments {
                                 // has to match VECS_PER_SEGMENT
                                 request.write_segment(segment.local_rect);
                             }
                         }
                         None => {
@@ -1480,19 +1505,16 @@ impl PrimitiveStore {
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
     ) -> bool {
         let metadata = &self.cpu_metadata[prim_index.0];
         let brush = match metadata.prim_kind {
             PrimitiveKind::Brush => {
                 &mut self.cpu_brushes[metadata.cpu_prim_index.0]
             }
-            PrimitiveKind::Picture => {
-                &mut self.cpu_pictures[metadata.cpu_prim_index.0].brush
-            }
             _ => {
                 return false;
             }
         };
 
         PrimitiveStore::write_brush_segment_description(
             brush,
             metadata,
@@ -1703,69 +1725,71 @@ impl PrimitiveStore {
             (metadata.prim_kind, metadata.cpu_prim_index)
         };
 
         // If we have dependencies, we need to prepare them first, in order
         // to know the actual rect of this primitive.
         // For example, scrolling may affect the location of an item in
         // local space, which may force us to render this item on a larger
         // picture target, if being composited.
-        if let PrimitiveKind::Picture = prim_kind {
-            let pic_context_for_children = {
-                let pic = &mut self.cpu_pictures[cpu_prim_index.0];
+        if let PrimitiveKind::Brush = prim_kind {
+            if let BrushKind::Picture { pic_index } = self.cpu_brushes[cpu_prim_index.0].kind {
+                let pic_context_for_children = {
+                    let pic = &mut self.pictures[pic_index.0];
+
+                    if !pic.resolve_scene_properties(frame_context.scene_properties) {
+                        return None;
+                    }
 
-                if !pic.resolve_scene_properties(frame_context.scene_properties) {
-                    return None;
-                }
+                    let (draw_text_transformed, original_reference_frame_index) = match pic.kind {
+                        PictureKind::Image { reference_frame_index, composite_mode, .. } => {
+                            may_need_clip_mask = composite_mode.is_some();
+                            (true, Some(reference_frame_index))
+                        }
+                        PictureKind::TextShadow { .. } => {
+                            (false, None)
+                        }
+                    };
 
-                let (draw_text_transformed, original_reference_frame_index) = match pic.kind {
-                    PictureKind::Image { reference_frame_index, composite_mode, .. } => {
-                        may_need_clip_mask = composite_mode.is_some();
-                        (true, Some(reference_frame_index))
-                    }
-                    PictureKind::TextShadow { .. } => {
-                        (false, None)
+                    let display_list = &frame_context
+                        .pipelines
+                        .get(&pic.pipeline_id)
+                        .expect("No display list?")
+                        .display_list;
+
+                    let inv_world_transform = prim_run_context
+                        .scroll_node
+                        .world_content_transform
+                        .inverse();
+
+                    PictureContext {
+                        pipeline_id: pic.pipeline_id,
+                        perform_culling: pic.cull_children,
+                        prim_runs: mem::replace(&mut pic.runs, Vec::new()),
+                        original_reference_frame_index,
+                        display_list,
+                        draw_text_transformed,
+                        inv_world_transform,
                     }
                 };
 
-                let display_list = &frame_context
-                    .pipelines
-                    .get(&pic.pipeline_id)
-                    .expect("No display list?")
-                    .display_list;
-
-                let inv_world_transform = prim_run_context
-                    .scroll_node
-                    .world_content_transform
-                    .inverse();
+                let result = self.prepare_prim_runs(
+                    &pic_context_for_children,
+                    &mut pic_state_for_children,
+                    frame_context,
+                    frame_state,
+                );
 
-                PictureContext {
-                    pipeline_id: pic.pipeline_id,
-                    perform_culling: pic.cull_children,
-                    prim_runs: mem::replace(&mut pic.runs, Vec::new()),
-                    original_reference_frame_index,
-                    display_list,
-                    draw_text_transformed,
-                    inv_world_transform,
-                }
-            };
+                // Restore the dependencies (borrow check dance)
+                let pic = &mut self.pictures[pic_index.0];
+                pic.runs = pic_context_for_children.prim_runs;
 
-            let result = self.prepare_prim_runs(
-                &pic_context_for_children,
-                &mut pic_state_for_children,
-                frame_context,
-                frame_state,
-            );
-
-            // Restore the dependencies (borrow check dance)
-            let pic = &mut self.cpu_pictures[cpu_prim_index.0];
-            pic.runs = pic_context_for_children.prim_runs;
-
-            let metadata = &mut self.cpu_metadata[prim_index.0];
-            metadata.local_rect = pic.update_local_rect(result);
+                let metadata = &mut self.cpu_metadata[prim_index.0];
+                metadata.local_rect = pic.update_local_rect(result);
+            }
         }
 
         let (local_rect, unclipped_device_rect) = {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             if metadata.local_rect.size.width <= 0.0 ||
                metadata.local_rect.size.height <= 0.0 {
                 //warn!("invalid primitive rect {:?}", metadata.local_rect);
                 return None;
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -167,19 +167,28 @@ impl Document {
             dynamic_properties: SceneProperties::new(),
         }
     }
 
     fn can_render(&self) -> bool { self.frame_builder.is_some() }
 
     // TODO: We will probably get rid of this soon and always forward to the scene building thread.
     fn build_scene(&mut self, resource_cache: &mut ResourceCache) {
+        let max_texture_size = resource_cache.max_texture_size();
 
-        if self.view.window_size.width == 0 || self.view.window_size.height == 0 {
-            error!("ERROR: Invalid window dimensions! Please call api.set_window_size()");
+        if self.view.window_size.width == 0 ||
+           self.view.window_size.height == 0 ||
+           self.view.window_size.width > max_texture_size ||
+           self.view.window_size.height > max_texture_size {
+            error!("ERROR: Invalid window dimensions {}x{}. Please call api.set_window_size()",
+                self.view.window_size.width,
+                self.view.window_size.height,
+            );
+
+            return;
         }
 
         let old_builder = self.frame_builder.take().unwrap_or_else(FrameBuilder::empty);
         let root_pipeline_id = match self.pending.scene.root_pipeline_id {
             Some(root_pipeline_id) => root_pipeline_id,
             None => return,
         };
 
@@ -369,16 +378,23 @@ impl DocumentOps {
 
     fn build() -> Self {
         DocumentOps {
             build: true,
             ..DocumentOps::nop()
         }
     }
 
+    fn render() -> Self {
+        DocumentOps {
+            render: true,
+            ..DocumentOps::nop()
+        }
+    }
+
     fn combine(&mut self, other: Self) {
         self.scroll = self.scroll || other.scroll;
         self.build = self.build || other.build;
         self.render = self.render || other.render;
         self.composite = self.composite || other.composite;
     }
 }
 
@@ -663,17 +679,17 @@ impl RenderBackend {
             }
             FrameMsg::GetScrollNodeState(tx) => {
                 profile_scope!("GetScrollNodeState");
                 tx.send(doc.get_scroll_node_state()).unwrap();
                 DocumentOps::nop()
             }
             FrameMsg::UpdateDynamicProperties(property_bindings) => {
                 doc.dynamic_properties.set_properties(property_bindings);
-                DocumentOps::build()
+                DocumentOps::render()
             }
         }
     }
 
     fn next_namespace_id(&self) -> IdNamespace {
         IdNamespace(NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed) as u32)
     }
 
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -174,17 +174,16 @@ pub struct PictureTask {
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BlurTask {
     pub blur_std_deviation: f32,
     pub target_kind: RenderTargetKind,
-    pub color: PremultipliedColorF,
     pub uv_rect_handle: GpuCacheHandle,
 }
 
 impl BlurTask {
     #[cfg(feature = "debugger")]
     fn print_with<T: PrintTreePrinter>(&self, pt: &mut T) {
         pt.add_item(format!("std deviation: {}", self.blur_std_deviation));
         pt.add_item(format!("target: {:?}", self.target_kind));
@@ -370,17 +369,16 @@ impl RenderTask {
 
                                 // Blur it
                                 let blur_render_task = RenderTask::new_blur(
                                     blur_radius_dp,
                                     mask_task_id,
                                     render_tasks,
                                     RenderTargetKind::Alpha,
                                     ClearMode::Zero,
-                                    PremultipliedColorF::TRANSPARENT,
                                 );
 
                                 let root_task_id = render_tasks.add(blur_render_task);
                                 children.push(root_task_id);
 
                                 (root_task_id, false)
                             }
                         );
@@ -440,17 +438,16 @@ impl RenderTask {
     //           +---- This is stored as the input task to the primitive shader.
     //
     pub fn new_blur(
         blur_std_deviation: f32,
         src_task_id: RenderTaskId,
         render_tasks: &mut RenderTaskTree,
         target_kind: RenderTargetKind,
         clear_mode: ClearMode,
-        color: PremultipliedColorF,
     ) -> Self {
         // Adjust large std deviation value.
         let mut adjusted_blur_std_deviation = blur_std_deviation;
         let blur_target_size = render_tasks[src_task_id].get_dynamic_size();
         let mut adjusted_blur_target_size = blur_target_size;
         let mut downscaling_src_task_id = src_task_id;
         let mut scale_factor = 1.0;
         while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
@@ -470,32 +467,30 @@ impl RenderTask {
         }
 
         let blur_task_v = RenderTask {
             children: vec![downscaling_src_task_id],
             location: RenderTaskLocation::Dynamic(None, adjusted_blur_target_size),
             kind: RenderTaskKind::VerticalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
-                color,
                 uv_rect_handle: GpuCacheHandle::new(),
             }),
             clear_mode,
             saved_index: None,
         };
 
         let blur_task_v_id = render_tasks.add(blur_task_v);
 
         let blur_task_h = RenderTask {
             children: vec![blur_task_v_id],
             location: RenderTaskLocation::Dynamic(None, adjusted_blur_target_size),
             kind: RenderTaskKind::HorizontalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
-                color,
                 uv_rect_handle: GpuCacheHandle::new(),
             }),
             clear_mode,
             saved_index: None,
         };
 
         blur_task_h
     }
@@ -572,17 +567,17 @@ impl RenderTask {
             RenderTaskKind::VerticalBlur(ref task) |
             RenderTaskKind::HorizontalBlur(ref task) => {
                 (
                     [
                         task.blur_std_deviation,
                         0.0,
                         0.0,
                     ],
-                    task.color.to_array()
+                    [0.0; 4],
                 )
             }
             RenderTaskKind::Readback(..) |
             RenderTaskKind::Scaling(..) |
             RenderTaskKind::Blit(..) => {
                 (
                     [0.0; 3],
                     [0.0; 4],
@@ -722,38 +717,37 @@ impl RenderTask {
     }
 
     pub fn prepare_for_render(
         &mut self,
         gpu_cache: &mut GpuCache,
     ) {
         let (target_rect, target_index) = self.get_target_rect();
 
-        let (cache_handle, color) = match self.kind {
+        let cache_handle = match self.kind {
             RenderTaskKind::HorizontalBlur(ref mut info) |
             RenderTaskKind::VerticalBlur(ref mut info) => {
-                (&mut info.uv_rect_handle, info.color)
+                &mut info.uv_rect_handle
             }
             RenderTaskKind::Picture(ref mut info) => {
-                (&mut info.uv_rect_handle, info.color)
+                &mut info.uv_rect_handle
             }
             RenderTaskKind::Readback(..) |
             RenderTaskKind::Scaling(..) |
             RenderTaskKind::Blit(..) |
             RenderTaskKind::ClipRegion(..) |
             RenderTaskKind::CacheMask(..) => {
                 return;
             }
         };
 
         if let Some(mut request) = gpu_cache.request(cache_handle) {
             let image_source = ImageSource {
                 p0: target_rect.origin.to_f32(),
                 p1: target_rect.bottom_right().to_f32(),
-                color,
                 texture_layer: target_index.0 as f32,
                 user_data: [0.0; 3],
             };
             image_source.write_gpu_blocks(&mut request);
         }
     }
 
     #[cfg(feature = "debugger")]
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -7,35 +7,33 @@
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use api::{BlobImageRenderer, ColorF, ColorU, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, Epoch, ExternalImageId};
 use api::{ExternalImageType, FontRenderMode, ImageFormat, PipelineId};
-use api::{RenderApiSender, RenderNotifier, TexelRect, TextureTarget, YuvColorSpace, YuvFormat};
-use api::{YUV_COLOR_SPACES, YUV_FORMATS, channel};
+use api::{RenderApiSender, RenderNotifier, TexelRect, TextureTarget};
+use api::{channel};
 #[cfg(not(feature = "debugger"))]
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
 use api::channel::MsgSender;
 use api::channel::PayloadReceiverHelperMethods;
-use batch::{BatchKey, BatchKind, BatchTextures, BrushBatchKind};
-use batch::{TransformBatchKind};
+use batch::{BatchKey, BatchKind, BatchTextures, BrushBatchKind, TransformBatchKind};
 #[cfg(any(feature = "capture", feature = "replay"))]
 use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
 use debug_colors;
 use debug_render::DebugRenderer;
 #[cfg(feature = "debugger")]
 use debug_server::{self, DebugServer};
-use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture,
-             VertexDescriptor, PBO};
-use device::{ExternalTexture, FBOId, TextureSlot, VertexAttribute, VertexAttributeKind};
+use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture, PBO};
+use device::{ExternalTexture, FBOId, TextureSlot};
 use device::{FileWatcherHandler, ShaderError, TextureFilter,
              VertexUsageHint, VAO, VBO, CustomVAO};
 use device::{ProgramCache, ReadPixelsFormat};
 use euclid::{rect, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
@@ -48,18 +46,20 @@ use picture::ContentOrigin;
 use prim_store::DeferredResolve;
 use profiler::{BackendProfileCounters, FrameProfileCounters, Profiler};
 use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use query::{GpuProfiler, GpuTimer};
 use rayon::{ThreadPool, ThreadPoolBuilder};
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
 use scene_builder::SceneBuilder;
-use render_task::{RenderTaskKind, RenderTaskTree};
+use shade::Shaders;
+use render_task::{RenderTask, RenderTaskKind, RenderTaskTree};
 use resource_cache::ResourceCache;
+
 #[cfg(feature = "debugger")]
 use serde_json;
 use std;
 use std::cmp;
 use std::collections::VecDeque;
 use std::collections::hash_map::Entry;
 use std::f32;
 use std::mem;
@@ -69,17 +69,17 @@ use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
 use tiling::{BlitJob, BlitJobSource, RenderPass, RenderPassKind, RenderTargetList};
 use tiling::{Frame, RenderTarget, ScalingInfo, TextureCacheRenderTarget};
 use time::precise_time_ns;
-use util::TransformedRectKind;
+
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 /// Enabling this toggle would force the GPU cache scattered texture to
 /// be resized every frame, which enables GPU debuggers to see if this
 /// is performed correctly.
 const GPU_CACHE_RESIZE_TEST: bool = false;
 
 /// Number of GPU blocks per UV rectangle provided for an image.
@@ -190,17 +190,17 @@ impl TransformBatchKind {
                 ImageBufferKind::TextureExternal => "Image (External)",
                 ImageBufferKind::Texture2DArray => "Image (Array)",
             },
             TransformBatchKind::BorderCorner => "BorderCorner",
             TransformBatchKind::BorderEdge => "BorderEdge",
         }
     }
 
-    fn gpu_sampler_tag(&self) -> GpuProfileTag {
+    fn sampler_tag(&self) -> GpuProfileTag {
         match *self {
             TransformBatchKind::TextRun(..) => GPU_TAG_PRIM_TEXT_RUN,
             TransformBatchKind::Image(..) => GPU_TAG_PRIM_IMAGE,
             TransformBatchKind::BorderCorner => GPU_TAG_PRIM_BORDER_CORNER,
             TransformBatchKind::BorderEdge => GPU_TAG_PRIM_BORDER_EDGE,
         }
     }
 }
@@ -222,33 +222,33 @@ impl BatchKind {
                     BrushBatchKind::RadialGradient => "Brush (RadialGradient)",
                     BrushBatchKind::LinearGradient => "Brush (LinearGradient)",
                 }
             }
             BatchKind::Transformable(_, batch_kind) => batch_kind.debug_name(),
         }
     }
 
-    fn gpu_sampler_tag(&self) -> GpuProfileTag {
+    fn sampler_tag(&self) -> GpuProfileTag {
         match *self {
             BatchKind::HardwareComposite => GPU_TAG_PRIM_HW_COMPOSITE,
             BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
             BatchKind::Brush(kind) => {
                 match kind {
                     BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
                     BrushBatchKind::Line => GPU_TAG_BRUSH_LINE,
                     BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
                     BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND,
                     BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND,
                     BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE,
                     BrushBatchKind::RadialGradient => GPU_TAG_BRUSH_RADIAL_GRADIENT,
                     BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT,
                 }
             }
-            BatchKind::Transformable(_, batch_kind) => batch_kind.gpu_sampler_tag(),
+            BatchKind::Transformable(_, batch_kind) => batch_kind.sampler_tag(),
         }
     }
 }
 
 bitflags! {
     #[derive(Default)]
     pub struct DebugFlags: u32 {
         const PROFILER_DBG      = 1 << 0;
@@ -265,55 +265,45 @@ bitflags! {
 fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> {
     if before & select != after & select {
         Some(after.contains(select))
     } else {
         None
     }
 }
 
-// A generic mode that can be passed to shaders to change
-// behaviour per draw-call.
-type ShaderMode = i32;
-
 #[repr(C)]
 enum TextShaderMode {
     Alpha = 0,
     SubpixelConstantTextColor = 1,
     SubpixelPass0 = 2,
     SubpixelPass1 = 3,
     SubpixelWithBgColorPass0 = 4,
     SubpixelWithBgColorPass1 = 5,
     SubpixelWithBgColorPass2 = 6,
     SubpixelDualSource = 7,
     Bitmap = 8,
     ColorBitmap = 9,
 }
 
-impl Into<ShaderMode> for TextShaderMode {
-    fn into(self) -> i32 {
-        self as i32
-    }
-}
-
 impl From<GlyphFormat> for TextShaderMode {
     fn from(format: GlyphFormat) -> TextShaderMode {
         match format {
             GlyphFormat::Alpha | GlyphFormat::TransformedAlpha => TextShaderMode::Alpha,
             GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
                 panic!("Subpixel glyph formats must be handled separately.");
             }
             GlyphFormat::Bitmap => TextShaderMode::Bitmap,
             GlyphFormat::ColorBitmap => TextShaderMode::ColorBitmap,
         }
     }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
-enum TextureSampler {
+pub(crate) enum TextureSampler {
     Color0,
     Color1,
     Color2,
     CacheA8,
     CacheRGBA8,
     ResourceCache,
     ClipScrollNodes,
     RenderTasks,
@@ -357,115 +347,119 @@ impl Into<TextureSlot> for TextureSample
 }
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedVertex {
     pub pos: [f32; 2],
 }
 
-const DESC_PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
-    vertex_attributes: &[
-        VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::F32,
-        },
-    ],
-    instance_attributes: &[
-        VertexAttribute {
-            name: "aData0",
-            count: 4,
-            kind: VertexAttributeKind::I32,
-        },
-        VertexAttribute {
-            name: "aData1",
-            count: 4,
-            kind: VertexAttributeKind::I32,
-        },
-    ],
-};
-
-const DESC_BLUR: VertexDescriptor = VertexDescriptor {
-    vertex_attributes: &[
-        VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::F32,
-        },
-    ],
-    instance_attributes: &[
-        VertexAttribute {
-            name: "aBlurRenderTaskAddress",
-            count: 1,
-            kind: VertexAttributeKind::I32,
-        },
-        VertexAttribute {
-            name: "aBlurSourceTaskAddress",
-            count: 1,
-            kind: VertexAttributeKind::I32,
-        },
-        VertexAttribute {
-            name: "aBlurDirection",
-            count: 1,
-            kind: VertexAttributeKind::I32,
-        },
-    ],
-};
-
-const DESC_CLIP: VertexDescriptor = VertexDescriptor {
-    vertex_attributes: &[
-        VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::F32,
-        },
-    ],
-    instance_attributes: &[
-        VertexAttribute {
-            name: "aClipRenderTaskAddress",
-            count: 1,
-            kind: VertexAttributeKind::I32,
-        },
-        VertexAttribute {
-            name: "aScrollNodeId",
-            count: 1,
-            kind: VertexAttributeKind::I32,
-        },
-        VertexAttribute {
-            name: "aClipSegment",
-            count: 1,
-            kind: VertexAttributeKind::I32,
-        },
-        VertexAttribute {
-            name: "aClipDataResourceAddress",
-            count: 4,
-            kind: VertexAttributeKind::U16,
-        },
-    ],
-};
-
-const DESC_GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
-    vertex_attributes: &[
-        VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U16Norm,
-        },
-        VertexAttribute {
-            name: "aValue",
-            count: 4,
-            kind: VertexAttributeKind::F32,
-        },
-    ],
-    instance_attributes: &[],
-};
+pub(crate) mod desc {
+    use device::{VertexAttribute, VertexAttributeKind, VertexDescriptor};
+
+    pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aData0",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aData1",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const BLUR: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aBlurRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aBlurSourceTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aBlurDirection",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const CLIP: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aClipRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aScrollNodeId",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aClipSegment",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aClipDataResourceAddress",
+                count: 4,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::U16Norm,
+            },
+            VertexAttribute {
+                name: "aValue",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[],
+    };
+}
 
 #[derive(Debug, Copy, Clone)]
-enum VertexArrayKind {
+pub(crate) enum VertexArrayKind {
     Primitive,
     Blur,
     Clip,
 }
 
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphicsApi {
     OpenGL,
@@ -495,51 +489,16 @@ impl From<TextureTarget> for ImageBuffer
             TextureTarget::Default => ImageBufferKind::Texture2D,
             TextureTarget::Rect => ImageBufferKind::TextureRect,
             TextureTarget::Array => ImageBufferKind::Texture2DArray,
             TextureTarget::External => ImageBufferKind::TextureExternal,
         }
     }
 }
 
-pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 4] = [
-    ImageBufferKind::Texture2D,
-    ImageBufferKind::TextureRect,
-    ImageBufferKind::TextureExternal,
-    ImageBufferKind::Texture2DArray,
-];
-
-impl ImageBufferKind {
-    pub fn get_feature_string(&self) -> &'static str {
-        match *self {
-            ImageBufferKind::Texture2D => "TEXTURE_2D",
-            ImageBufferKind::Texture2DArray => "",
-            ImageBufferKind::TextureRect => "TEXTURE_RECT",
-            ImageBufferKind::TextureExternal => "TEXTURE_EXTERNAL",
-        }
-    }
-
-    pub fn has_platform_support(&self, gl_type: &gl::GlType) -> bool {
-        match *gl_type {
-            gl::GlType::Gles => match *self {
-                ImageBufferKind::Texture2D => true,
-                ImageBufferKind::Texture2DArray => true,
-                ImageBufferKind::TextureRect => true,
-                ImageBufferKind::TextureExternal => true,
-            },
-            gl::GlType::Gl => match *self {
-                ImageBufferKind::Texture2D => true,
-                ImageBufferKind::Texture2DArray => true,
-                ImageBufferKind::TextureRect => true,
-                ImageBufferKind::TextureExternal => false,
-            },
-        }
-    }
-}
-
 #[derive(Debug, Copy, Clone)]
 pub enum RendererKind {
     Native,
     OSMesa,
 }
 
 #[derive(Debug)]
 pub struct GpuProfile {
@@ -842,24 +801,24 @@ struct CacheTexture {
 }
 
 impl CacheTexture {
     fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
         let texture = device.create_texture(TextureTarget::Default, ImageFormat::RGBAF32);
 
         let bus = if use_scatter {
             let program = device
-                .create_program("gpu_cache_update", "", &DESC_GPU_CACHE_UPDATE)?;
+                .create_program("gpu_cache_update", "", &desc::GPU_CACHE_UPDATE)?;
             let buf_position = device.create_vbo();
             let buf_value = device.create_vbo();
             //Note: the vertex attributes have to be supplied in the same order
             // as for program creation, but each assigned to a different stream.
             let vao = device.create_custom_vao(&[
-                buf_position.stream_with(&DESC_GPU_CACHE_UPDATE.vertex_attributes[0..1]),
-                buf_value   .stream_with(&DESC_GPU_CACHE_UPDATE.vertex_attributes[1..2]),
+                buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
+                buf_value   .stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[1..2]),
             ]);
             CacheBus::Scatter {
                 program,
                 vao,
                 buf_position,
                 buf_value,
                 count: 0,
             }
@@ -1149,410 +1108,16 @@ impl VertexDataTexture {
     }
 
     fn deinit(self, device: &mut Device) {
         device.delete_pbo(self.pbo);
         device.delete_texture(self.texture);
     }
 }
 
-const TRANSFORM_FEATURE: &str = "TRANSFORM";
-const ALPHA_FEATURE: &str = "ALPHA_PASS";
-
-enum ShaderKind {
-    Primitive,
-    Cache(VertexArrayKind),
-    ClipCache,
-    Brush,
-    Text,
-}
-
-struct LazilyCompiledShader {
-    program: Option<Program>,
-    name: &'static str,
-    kind: ShaderKind,
-    features: Vec<&'static str>,
-}
-
-impl LazilyCompiledShader {
-    fn new(
-        kind: ShaderKind,
-        name: &'static str,
-        features: &[&'static str],
-        device: &mut Device,
-        precache: bool,
-    ) -> Result<LazilyCompiledShader, ShaderError> {
-        let mut shader = LazilyCompiledShader {
-            program: None,
-            name,
-            kind,
-            features: features.to_vec(),
-        };
-
-        if precache {
-            let t0 = precise_time_ns();
-            let program = try!{ shader.get(device) };
-            let t1 = precise_time_ns();
-            device.bind_program(program);
-            device.draw_triangles_u16(0, 3);
-            let t2 = precise_time_ns();
-            debug!("[C: {:.1} ms D: {:.1} ms] Precache {} {:?}",
-                (t1 - t0) as f64 / 1000000.0,
-                (t2 - t1) as f64 / 1000000.0,
-                name,
-                features
-            );
-        }
-
-        Ok(shader)
-    }
-
-    fn bind<M>(
-        &mut self,
-        device: &mut Device,
-        projection: &Transform3D<f32>,
-        mode: M,
-        renderer_errors: &mut Vec<RendererError>,
-    ) where M: Into<ShaderMode> {
-        let program = match self.get(device) {
-            Ok(program) => program,
-            Err(e) => {
-                renderer_errors.push(RendererError::from(e));
-                return;
-            }
-        };
-        device.bind_program(program);
-        device.set_uniforms(program, projection, mode.into());
-    }
-
-    fn get(&mut self, device: &mut Device) -> Result<&Program, ShaderError> {
-        if self.program.is_none() {
-            let program = try!{
-                match self.kind {
-                    ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text => {
-                        create_prim_shader(self.name,
-                                           device,
-                                           &self.features,
-                                           VertexArrayKind::Primitive)
-                    }
-                    ShaderKind::Cache(format) => {
-                        create_prim_shader(self.name,
-                                           device,
-                                           &self.features,
-                                           format)
-                    }
-                    ShaderKind::ClipCache => {
-                        create_clip_shader(self.name, device)
-                    }
-                }
-            };
-            self.program = Some(program);
-        }
-
-        Ok(self.program.as_ref().unwrap())
-    }
-
-    fn deinit(self, device: &mut Device) {
-        if let Some(program) = self.program {
-            device.delete_program(program);
-        }
-    }
-}
-
-// A brush shader supports two modes:
-// opaque:
-//   Used for completely opaque primitives,
-//   or inside segments of partially
-//   opaque primitives. Assumes no need
-//   for clip masks, AA etc.
-// alpha:
-//   Used for brush primitives in the alpha
-//   pass. Assumes that AA should be applied
-//   along the primitive edge, and also that
-//   clip mask is present.
-struct BrushShader {
-    opaque: LazilyCompiledShader,
-    alpha: LazilyCompiledShader,
-}
-
-impl BrushShader {
-    fn new(
-        name: &'static str,
-        device: &mut Device,
-        features: &[&'static str],
-        precache: bool,
-    ) -> Result<Self, ShaderError> {
-        let opaque = try!{
-            LazilyCompiledShader::new(ShaderKind::Brush,
-                                      name,
-                                      features,
-                                      device,
-                                      precache)
-        };
-
-        let mut alpha_features = features.to_vec();
-        alpha_features.push(ALPHA_FEATURE);
-
-        let alpha = try!{
-            LazilyCompiledShader::new(ShaderKind::Brush,
-                                      name,
-                                      &alpha_features,
-                                      device,
-                                      precache)
-        };
-
-        Ok(BrushShader { opaque, alpha })
-    }
-
-    fn bind<M>(
-        &mut self,
-        device: &mut Device,
-        blend_mode: BlendMode,
-        projection: &Transform3D<f32>,
-        mode: M,
-        renderer_errors: &mut Vec<RendererError>,
-    ) where M: Into<ShaderMode> {
-        match blend_mode {
-            BlendMode::None => {
-                self.opaque.bind(device, projection, mode, renderer_errors)
-            }
-            BlendMode::Alpha |
-            BlendMode::PremultipliedAlpha |
-            BlendMode::PremultipliedDestOut |
-            BlendMode::SubpixelDualSource |
-            BlendMode::SubpixelConstantTextColor(..) |
-            BlendMode::SubpixelVariableTextColor |
-            BlendMode::SubpixelWithBgColor => {
-                self.alpha.bind(device, projection, mode, renderer_errors)
-            }
-        }
-    }
-
-    fn deinit(self, device: &mut Device) {
-        self.opaque.deinit(device);
-        self.alpha.deinit(device);
-    }
-}
-
-struct PrimitiveShader {
-    simple: LazilyCompiledShader,
-    transform: LazilyCompiledShader,
-}
-
-impl PrimitiveShader {
-    fn new(
-        name: &'static str,
-        device: &mut Device,
-        features: &[&'static str],
-        precache: bool,
-    ) -> Result<Self, ShaderError> {
-        let simple = try!{
-            LazilyCompiledShader::new(ShaderKind::Primitive,
-                                      name,
-                                      features,
-                                      device,
-                                      precache)
-        };
-
-        let mut transform_features = features.to_vec();
-        transform_features.push(TRANSFORM_FEATURE);
-
-        let transform = try!{
-            LazilyCompiledShader::new(ShaderKind::Primitive,
-                                      name,
-                                      &transform_features,
-                                      device,
-                                      precache)
-        };
-
-        Ok(PrimitiveShader { simple, transform })
-    }
-
-    fn bind<M>(
-        &mut self,
-        device: &mut Device,
-        transform_kind: TransformedRectKind,
-        projection: &Transform3D<f32>,
-        mode: M,
-        renderer_errors: &mut Vec<RendererError>,
-    ) where M: Into<ShaderMode> {
-        match transform_kind {
-            TransformedRectKind::AxisAligned => {
-                self.simple.bind(device, projection, mode, renderer_errors)
-            }
-            TransformedRectKind::Complex => {
-                self.transform.bind(device, projection, mode, renderer_errors)
-            }
-        }
-    }
-
-    fn deinit(self, device: &mut Device) {
-        self.simple.deinit(device);
-        self.transform.deinit(device);
-    }
-}
-
-struct TextShader {
-    simple: LazilyCompiledShader,
-    transform: LazilyCompiledShader,
-    glyph_transform: LazilyCompiledShader,
-}
-
-impl TextShader {
-    fn new(
-        name: &'static str,
-        device: &mut Device,
-        features: &[&'static str],
-        precache: bool,
-    ) -> Result<Self, ShaderError> {
-        let simple = try!{
-            LazilyCompiledShader::new(ShaderKind::Text,
-                                      name,
-                                      features,
-                                      device,
-                                      precache)
-        };
-
-        let mut transform_features = features.to_vec();
-        transform_features.push("TRANSFORM");
-
-        let transform = try!{
-            LazilyCompiledShader::new(ShaderKind::Text,
-                                      name,
-                                      &transform_features,
-                                      device,
-                                      precache)
-        };
-
-        let mut glyph_transform_features = features.to_vec();
-        glyph_transform_features.push("GLYPH_TRANSFORM");
-
-        let glyph_transform = try!{
-            LazilyCompiledShader::new(ShaderKind::Text,
-                                      name,
-                                      &glyph_transform_features,
-                                      device,
-                                      precache)
-        };
-
-        Ok(TextShader { simple, transform, glyph_transform })
-    }
-
-    fn bind<M>(
-        &mut self,
-        device: &mut Device,
-        glyph_format: GlyphFormat,
-        transform_kind: TransformedRectKind,
-        projection: &Transform3D<f32>,
-        mode: M,
-        renderer_errors: &mut Vec<RendererError>,
-    ) where M: Into<ShaderMode> {
-        match glyph_format {
-            GlyphFormat::Alpha |
-            GlyphFormat::Subpixel |
-            GlyphFormat::Bitmap |
-            GlyphFormat::ColorBitmap => {
-                match transform_kind {
-                    TransformedRectKind::AxisAligned => {
-                        self.simple.bind(device, projection, mode, renderer_errors)
-                    }
-                    TransformedRectKind::Complex => {
-                        self.transform.bind(device, projection, mode, renderer_errors)
-                    }
-                }
-            }
-            GlyphFormat::TransformedAlpha |
-            GlyphFormat::TransformedSubpixel => {
-                self.glyph_transform.bind(device, projection, mode, renderer_errors)
-            }
-        }
-    }
-
-    fn deinit(self, device: &mut Device) {
-        self.simple.deinit(device);
-        self.transform.deinit(device);
-        self.glyph_transform.deinit(device);
-    }
-}
-
-fn create_prim_shader(
-    name: &'static str,
-    device: &mut Device,
-    features: &[&'static str],
-    vertex_format: VertexArrayKind,
-) -> Result<Program, ShaderError> {
-    let mut prefix = format!(
-        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n",
-        MAX_VERTEX_TEXTURE_WIDTH
-    );
-
-    for feature in features {
-        prefix.push_str(&format!("#define WR_FEATURE_{}\n", feature));
-    }
-
-    debug!("PrimShader {}", name);
-
-    let vertex_descriptor = match vertex_format {
-        VertexArrayKind::Primitive => DESC_PRIM_INSTANCES,
-        VertexArrayKind::Blur => DESC_BLUR,
-        VertexArrayKind::Clip => DESC_CLIP,
-    };
-
-    let program = device.create_program(name, &prefix, &vertex_descriptor);
-
-    if let Ok(ref program) = program {
-        device.bind_shader_samplers(
-            program,
-            &[
-                ("sColor0", TextureSampler::Color0),
-                ("sColor1", TextureSampler::Color1),
-                ("sColor2", TextureSampler::Color2),
-                ("sDither", TextureSampler::Dither),
-                ("sCacheA8", TextureSampler::CacheA8),
-                ("sCacheRGBA8", TextureSampler::CacheRGBA8),
-                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
-                ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
-                ("sLocalClipRects", TextureSampler::LocalClipRects),
-            ],
-        );
-    }
-
-    program
-}
-
-fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
-    let prefix = format!(
-        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n
-                          #define WR_FEATURE_TRANSFORM\n",
-        MAX_VERTEX_TEXTURE_WIDTH
-    );
-
-    debug!("ClipShader {}", name);
-
-    let program = device.create_program(name, &prefix, &DESC_CLIP);
-
-    if let Ok(ref program) = program {
-        device.bind_shader_samplers(
-            program,
-            &[
-                ("sColor0", TextureSampler::Color0),
-                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
-                ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
-                ("sLocalClipRects", TextureSampler::LocalClipRects),
-            ],
-        );
-    }
-
-    program
-}
-
 struct FileWatcher {
     notifier: Box<RenderNotifier>,
     result_tx: Sender<ResultMsg>,
 }
 
 impl FileWatcherHandler for FileWatcher {
     fn file_changed(&self, path: PathBuf) {
         self.result_tx.send(ResultMsg::RefreshShader(path)).ok();
@@ -1579,60 +1144,21 @@ pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     debug_server: DebugServer,
     device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     active_documents: Vec<(DocumentId, RenderedDocument)>,
 
-    // These are "cache shaders". These shaders are used to
-    // draw intermediate results to cache targets. The results
-    // of these shaders are then used by the primitive shaders.
-    cs_text_run: LazilyCompiledShader,
-    cs_blur_a8: LazilyCompiledShader,
-    cs_blur_rgba8: LazilyCompiledShader,
-
-    // Brush shaders
-    brush_solid: BrushShader,
-    brush_line: BrushShader,
-    brush_image: Vec<Option<BrushShader>>,
-    brush_blend: BrushShader,
-    brush_mix_blend: BrushShader,
-    brush_yuv_image: Vec<Option<BrushShader>>,
-    brush_radial_gradient: BrushShader,
-    brush_linear_gradient: BrushShader,
-
-    /// These are "cache clip shaders". These shaders are used to
-    /// draw clip instances into the cached clip mask. The results
-    /// of these shaders are also used by the primitive shaders.
-    cs_clip_rectangle: LazilyCompiledShader,
-    cs_clip_box_shadow: LazilyCompiledShader,
-    cs_clip_image: LazilyCompiledShader,
-    cs_clip_border: LazilyCompiledShader,
-
-    // The are "primitive shaders". These shaders draw and blend
-    // final results on screen. They are aware of tile boundaries.
-    // Most draw directly to the framebuffer, but some use inputs
-    // from the cache shaders to draw. Specifically, the box
-    // shadow primitive shader stretches the box shadow cache
-    // output, and the cache_image shader blits the results of
-    // a cache shader (e.g. blur) to the screen.
-    ps_text_run: TextShader,
-    ps_text_run_dual_source: TextShader,
-    ps_image: Vec<Option<PrimitiveShader>>,
-    ps_border_corner: PrimitiveShader,
-    ps_border_edge: PrimitiveShader,
-
-    ps_hw_composite: LazilyCompiledShader,
-    ps_split_composite: LazilyCompiledShader,
+    shaders: Shaders,
 
     max_texture_size: u32,
-
     max_recorded_profiles: usize,
+
     clear_color: Option<ColorF>,
     enable_clear_scissor: bool,
     debug: DebugRenderer,
     debug_flags: DebugFlags,
     backend_profile_counters: BackendProfileCounters,
     profile_counters: RendererProfileCounters,
     profiler: Profiler,
     last_time: u64,
@@ -1728,36 +1254,35 @@ impl Renderer {
     /// };
     /// let (renderer, sender) = Renderer::new(opts);
     /// ```
     /// [rendereroptions]: struct.RendererOptions.html
     pub fn new(
         gl: Rc<gl::Gl>,
         notifier: Box<RenderNotifier>,
         mut options: RendererOptions,
-    ) -> Result<(Renderer, RenderApiSender), RendererError> {
-        let (api_tx, api_rx) = try!{ channel::msg_channel() };
-        let (payload_tx, payload_rx) = try!{ channel::payload_channel() };
+    ) -> Result<(Self, RenderApiSender), RendererError> {
+        let (api_tx, api_rx) = channel::msg_channel()?;
+        let (payload_tx, payload_rx) = channel::payload_channel()?;
         let (result_tx, result_rx) = channel();
         let gl_type = gl.get_type();
-        let dithering_feature = ["DITHERING"];
 
         let debug_server = DebugServer::new(api_tx.clone());
 
         let file_watch_handler = FileWatcher {
             result_tx: result_tx.clone(),
             notifier: notifier.clone(),
         };
 
         let mut device = Device::new(
             gl,
             options.resource_override_path.clone(),
-            options.upload_method,
+            options.upload_method.clone(),
             Box::new(file_watch_handler),
-            options.cached_programs,
+            options.cached_programs.take(),
         );
 
         let ext_dual_source_blending = !options.disable_dual_source_blending &&
             device.supports_extension("GL_ARB_blend_func_extended");
 
         let device_max_size = device.max_texture_size();
         // 512 is the minimum that the texture cache can work with.
         // Broken GL contexts can return a max texture size of zero (See #1260). Better to
@@ -1777,243 +1302,17 @@ impl Renderer {
             ),
             min_texture_size,
         );
 
         register_thread_with_profiler("Compositor".to_owned());
 
         device.begin_frame();
 
-        let cs_text_run = try!{
-            LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Primitive),
-                                      "cs_text_run",
-                                      &[],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
-        let brush_solid = try!{
-            BrushShader::new("brush_solid",
-                             &mut device,
-                             &[],
-                             options.precache_shaders)
-        };
-
-        let brush_line = try!{
-            BrushShader::new("brush_line",
-                             &mut device,
-                             &[],
-                             options.precache_shaders)
-        };
-
-        let brush_blend = try!{
-            BrushShader::new("brush_blend",
-                             &mut device,
-                             &[],
-                             options.precache_shaders)
-        };
-
-        let brush_mix_blend = try!{
-            BrushShader::new("brush_mix_blend",
-                             &mut device,
-                             &[],
-                             options.precache_shaders)
-        };
-
-        let brush_radial_gradient = try!{
-            BrushShader::new("brush_radial_gradient",
-                             &mut device,
-                             if options.enable_dithering {
-                                &dithering_feature
-                             } else {
-                                &[]
-                             },
-                             options.precache_shaders)
-        };
-
-        let brush_linear_gradient = try!{
-            BrushShader::new("brush_linear_gradient",
-                             &mut device,
-                             if options.enable_dithering {
-                                &dithering_feature
-                             } else {
-                                &[]
-                             },
-                             options.precache_shaders)
-        };
-
-        let cs_blur_a8 = try!{
-            LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Blur),
-                                     "cs_blur",
-                                      &["ALPHA_TARGET"],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
-        let cs_blur_rgba8 = try!{
-            LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Blur),
-                                     "cs_blur",
-                                      &["COLOR_TARGET"],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
-        let cs_clip_rectangle = try!{
-            LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                      "cs_clip_rectangle",
-                                      &[],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
-        let cs_clip_box_shadow = try!{
-            LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                      "cs_clip_box_shadow",
-                                      &[],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
-        let cs_clip_image = try!{
-            LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                      "cs_clip_image",
-                                      &[],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
-        let cs_clip_border = try!{
-            LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                      "cs_clip_border",
-                                      &[],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
-        let ps_text_run = try!{
-            TextShader::new("ps_text_run",
-                            &mut device,
-                            &[],
-                            options.precache_shaders)
-        };
-
-        let ps_text_run_dual_source = try!{
-            TextShader::new("ps_text_run",
-                            &mut device,
-                            &["DUAL_SOURCE_BLENDING"],
-                            options.precache_shaders)
-        };
-
-        // All image configuration.
-        let mut image_features = Vec::new();
-        let mut ps_image = Vec::new();
-        let mut brush_image = Vec::new();
-        // PrimitiveShader is not clonable. Use push() to initialize the vec.
-        for _ in 0 .. IMAGE_BUFFER_KINDS.len() {
-            ps_image.push(None);
-            brush_image.push(None);
-        }
-        for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
-            if IMAGE_BUFFER_KINDS[buffer_kind].has_platform_support(&gl_type) {
-                let feature_string = IMAGE_BUFFER_KINDS[buffer_kind].get_feature_string();
-                if feature_string != "" {
-                    image_features.push(feature_string);
-                }
-                let shader = try!{
-                    PrimitiveShader::new("ps_image",
-                                         &mut device,
-                                         &image_features,
-                                         options.precache_shaders)
-                };
-                ps_image[buffer_kind] = Some(shader);
-
-                let shader = try!{
-                    BrushShader::new("brush_image",
-                                     &mut device,
-                                     &image_features,
-                                     options.precache_shaders)
-                };
-                brush_image[buffer_kind] = Some(shader);
-            }
-            image_features.clear();
-        }
-
-        // All yuv_image configuration.
-        let mut yuv_features = Vec::new();
-        let yuv_shader_num = IMAGE_BUFFER_KINDS.len() * YUV_FORMATS.len() * YUV_COLOR_SPACES.len();
-        let mut brush_yuv_image = Vec::new();
-        // PrimitiveShader is not clonable. Use push() to initialize the vec.
-        for _ in 0 .. yuv_shader_num {
-            brush_yuv_image.push(None);
-        }
-        for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
-            if IMAGE_BUFFER_KINDS[buffer_kind].has_platform_support(&gl_type) {
-                for format_kind in 0 .. YUV_FORMATS.len() {
-                    for color_space_kind in 0 .. YUV_COLOR_SPACES.len() {
-                        let feature_string = IMAGE_BUFFER_KINDS[buffer_kind].get_feature_string();
-                        if feature_string != "" {
-                            yuv_features.push(feature_string);
-                        }
-                        let feature_string = YUV_FORMATS[format_kind].get_feature_string();
-                        if feature_string != "" {
-                            yuv_features.push(feature_string);
-                        }
-                        let feature_string =
-                            YUV_COLOR_SPACES[color_space_kind].get_feature_string();
-                        if feature_string != "" {
-                            yuv_features.push(feature_string);
-                        }
-
-                        let shader = try!{
-                            BrushShader::new("brush_yuv_image",
-                                             &mut device,
-                                             &yuv_features,
-                                             options.precache_shaders)
-                        };
-                        let index = Renderer::get_yuv_shader_index(
-                            IMAGE_BUFFER_KINDS[buffer_kind],
-                            YUV_FORMATS[format_kind],
-                            YUV_COLOR_SPACES[color_space_kind],
-                        );
-                        brush_yuv_image[index] = Some(shader);
-                        yuv_features.clear();
-                    }
-                }
-            }
-        }
-
-        let ps_border_corner = try!{
-            PrimitiveShader::new("ps_border_corner",
-                                 &mut device,
-                                 &[],
-                                 options.precache_shaders)
-        };
-
-        let ps_border_edge = try!{
-            PrimitiveShader::new("ps_border_edge",
-                                 &mut device,
-                                 &[],
-                                 options.precache_shaders)
-        };
-
-        let ps_hw_composite = try!{
-            LazilyCompiledShader::new(ShaderKind::Primitive,
-                                     "ps_hardware_composite",
-                                     &[],
-                                     &mut device,
-                                     options.precache_shaders)
-        };
-
-        let ps_split_composite = try!{
-            LazilyCompiledShader::new(ShaderKind::Primitive,
-                                     "ps_split_composite",
-                                     &[],
-                                     &mut device,
-                                     options.precache_shaders)
-        };
+        let shaders = Shaders::new(&mut device, gl_type, &options)?;
 
         let texture_cache = TextureCache::new(max_device_size);
         let max_texture_size = texture_cache.max_texture_size();
 
         let backend_profile_counters = BackendProfileCounters::new();
 
         let dither_matrix_texture = if options.enable_dithering {
             let dither_matrix: [u8; 64] = [
@@ -2110,23 +1409,23 @@ impl Renderer {
         let quad_indices: [u16; 6] = [0, 1, 2, 2, 1, 3];
         let quad_vertices = [
             PackedVertex { pos: [x0, y0] },
             PackedVertex { pos: [x1, y0] },
             PackedVertex { pos: [x0, y1] },
             PackedVertex { pos: [x1, y1] },
         ];
 
-        let prim_vao = device.create_vao(&DESC_PRIM_INSTANCES);
+        let prim_vao = device.create_vao(&desc::PRIM_INSTANCES);
         device.bind_vao(&prim_vao);
         device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static);
         device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);
 
-        let blur_vao = device.create_vao_with_new_instances(&DESC_BLUR, &prim_vao);
-        let clip_vao = device.create_vao_with_new_instances(&DESC_CLIP, &prim_vao);
+        let blur_vao = device.create_vao_with_new_instances(&desc::BLUR, &prim_vao);
+        let clip_vao = device.create_vao_with_new_instances(&desc::CLIP, &prim_vao);
 
         let texture_cache_upload_pbo = device.create_pbo();
 
         let texture_resolver = SourceTextureResolver::new(&mut device);
 
         let node_data_texture = VertexDataTexture::new(&mut device);
         let local_clip_rects_texture = VertexDataTexture::new(&mut device);
         let render_task_texture = VertexDataTexture::new(&mut device);
@@ -2192,92 +1491,67 @@ impl Renderer {
         let scene_thread_name = format!("WRSceneBuilder#{}", options.renderer_id.unwrap_or(0));
         let resource_cache = ResourceCache::new(
             texture_cache,
             workers,
             blob_image_renderer,
         )?;
 
         let (scene_builder, scene_tx, scene_rx) = SceneBuilder::new(config, api_tx.clone());
-        try! {
-            thread::Builder::new().name(scene_thread_name.clone()).spawn(move || {
-                register_thread_with_profiler(scene_thread_name.clone());
-                if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
-                    thread_listener.thread_started(&scene_thread_name);
-                }
-
-                let mut scene_builder = scene_builder;
-                scene_builder.run();
-
-                if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
-                    thread_listener.thread_stopped(&scene_thread_name);
-                }
-            })
-        };
-
-        try!{
-            thread::Builder::new().name(rb_thread_name.clone()).spawn(move || {
-                register_thread_with_profiler(rb_thread_name.clone());
-                if let Some(ref thread_listener) = *thread_listener_for_render_backend {
-                    thread_listener.thread_started(&rb_thread_name);
-                }
-                let mut backend = RenderBackend::new(
-                    api_rx,
-                    payload_rx_for_backend,
-                    result_tx,
-                    scene_tx,
-                    scene_rx,
-                    device_pixel_ratio,
-                    resource_cache,
-                    backend_notifier,
-                    config,
-                    recorder,
-                    enable_render_on_scroll,
-                );
-                backend.run(backend_profile_counters);
-                if let Some(ref thread_listener) = *thread_listener_for_render_backend {
-                    thread_listener.thread_stopped(&rb_thread_name);
-                }
-            })
-        };
+        thread::Builder::new().name(scene_thread_name.clone()).spawn(move || {
+            register_thread_with_profiler(scene_thread_name.clone());
+            if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
+                thread_listener.thread_started(&scene_thread_name);
+            }
+
+            let mut scene_builder = scene_builder;
+            scene_builder.run();
+
+            if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
+                thread_listener.thread_stopped(&scene_thread_name);
+            }
+        })?;
+
+        thread::Builder::new().name(rb_thread_name.clone()).spawn(move || {
+            register_thread_with_profiler(rb_thread_name.clone());
+            if let Some(ref thread_listener) = *thread_listener_for_render_backend {
+                thread_listener.thread_started(&rb_thread_name);
+            }
+            let mut backend = RenderBackend::new(
+                api_rx,
+                payload_rx_for_backend,
+                result_tx,
+                scene_tx,
+                scene_rx,
+                device_pixel_ratio,
+                resource_cache,
+                backend_notifier,
+                config,
+                recorder,
+                enable_render_on_scroll,
+            );
+            backend.run(backend_profile_counters);
+            if let Some(ref thread_listener) = *thread_listener_for_render_backend {
+                thread_listener.thread_stopped(&rb_thread_name);
+            }
+        })?;
 
         let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()));
         #[cfg(feature = "capture")]
         let read_fbo = device.create_fbo_for_external_texture(0);
 
         let mut renderer = Renderer {
             result_rx,
             debug_server,
             device,
             active_documents: Vec::new(),
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
-            cs_text_run,
-            cs_blur_a8,
-            cs_blur_rgba8,
-            brush_solid,
-            brush_line,
-            brush_image,
-            brush_blend,
-            brush_mix_blend,
-            brush_yuv_image,
-            brush_radial_gradient,
-            brush_linear_gradient,
-            cs_clip_rectangle,
-            cs_clip_box_shadow,
-            cs_clip_border,
-            cs_clip_image,
-            ps_text_run,
-            ps_text_run_dual_source,
-            ps_image,
-            ps_border_corner,
-            ps_border_edge,
-            ps_hw_composite,
-            ps_split_composite,
+            shaders,
             debug: debug_renderer,
             debug_flags,
             backend_profile_counters: BackendProfileCounters::new(),
             profile_counters: RendererProfileCounters::new(),
             profiler: Profiler::new(),
             max_texture_size: max_texture_size,
             max_recorded_profiles: options.max_recorded_profiles,
             clear_color: options.clear_color,
@@ -2322,25 +1596,16 @@ impl Renderer {
     pub fn get_graphics_api_info(&self) -> GraphicsApiInfo {
         GraphicsApiInfo {
             kind: GraphicsApi::OpenGL,
             version: self.device.gl().get_string(gl::VERSION),
             renderer: self.device.gl().get_string(gl::RENDERER),
         }
     }
 
-    fn get_yuv_shader_index(
-        buffer_kind: ImageBufferKind,
-        format: YuvFormat,
-        color_space: YuvColorSpace,
-    ) -> usize {
-        ((buffer_kind as usize) * YUV_FORMATS.len() + (format as usize)) * YUV_COLOR_SPACES.len() +
-            (color_space as usize)
-    }
-
     /// Returns the Epoch of the current frame in a pipeline.
     pub fn current_epoch(&self, pipeline_id: PipelineId) -> Option<Epoch> {
         self.pipeline_info.epochs.get(&pipeline_id).cloned()
     }
 
     pub fn flush_pipeline_info(&mut self) -> PipelineInfo {
         mem::replace(&mut self.pipeline_info, PipelineInfo::default())
     }
@@ -3139,232 +2404,120 @@ impl Renderer {
                 self.profile_counters.draw_calls.inc();
                 stats.total_draw_calls += 1;
             }
         }
 
         self.profile_counters.vertices.add(6 * data.len());
     }
 
+    fn handle_readback_composite(
+        &mut self,
+        render_target: Option<(&Texture, i32)>,
+        framebuffer_size: DeviceUintSize,
+        scissor_rect: Option<DeviceIntRect>,
+        source: &RenderTask,
+        backdrop: &RenderTask,
+        readback: &RenderTask,
+    ) {
+        if scissor_rect.is_some() {
+            self.device.disable_scissor();
+        }
+
+        let cache_texture = self.texture_resolver
+            .resolve(&SourceTexture::CacheRGBA8)
+            .unwrap();
+
+        // Before submitting the composite batch, do the
+        // framebuffer readbacks that are needed for each
+        // composite operation in this batch.
+        let (readback_rect, readback_layer) = readback.get_target_rect();
+        let (backdrop_rect, _) = backdrop.get_target_rect();
+        let backdrop_screen_origin = match backdrop.kind {
+            RenderTaskKind::Picture(ref task_info) => match task_info.content_origin {
+                ContentOrigin::Local(_) => panic!("bug: composite from a local-space rasterized picture?"),
+                ContentOrigin::Screen(p) => p,
+            },
+            _ => panic!("bug: composite on non-picture?"),
+        };
+        let source_screen_origin = match source.kind {
+            RenderTaskKind::Picture(ref task_info) => match task_info.content_origin {
+                ContentOrigin::Local(_) => panic!("bug: composite from a local-space rasterized picture?"),
+                ContentOrigin::Screen(p) => p,
+            },
+            _ => panic!("bug: composite on non-picture?"),
+        };
+
+        // Bind the FBO to blit the backdrop to.
+        // Called per-instance in case the layer (and therefore FBO)
+        // changes. The device will skip the GL call if the requested
+        // target is already bound.
+        let cache_draw_target = (cache_texture, readback_layer.0 as i32);
+        self.device.bind_draw_target(Some(cache_draw_target), None);
+
+        let mut src = DeviceIntRect::new(
+            source_screen_origin + (backdrop_rect.origin - backdrop_screen_origin),
+            readback_rect.size,
+        );
+        let mut dest = readback_rect.to_i32();
+
+        // Need to invert the y coordinates and flip the image vertically when
+        // reading back from the framebuffer.
+        if render_target.is_none() {
+            src.origin.y = framebuffer_size.height as i32 - src.size.height - src.origin.y;
+            dest.origin.y += dest.size.height;
+            dest.size.height = -dest.size.height;
+        }
+
+        self.device.bind_read_target(render_target);
+        self.device.blit_render_target(src, dest);
+
+        // Restore draw target to current pass render target + layer.
+        // Note: leaving the viewport unchanged, it's not a part of FBO state
+        self.device.bind_draw_target(render_target, None);
+
+        if scissor_rect.is_some() {
+            self.device.enable_scissor();
+        }
+    }
+
     fn submit_batch(
         &mut self,
         key: &BatchKey,
         instances: &[PrimitiveInstance],
         projection: &Transform3D<f32>,
         render_tasks: &RenderTaskTree,
         render_target: Option<(&Texture, i32)>,
         framebuffer_size: DeviceUintSize,
         stats: &mut RendererStats,
         scissor_rect: Option<DeviceIntRect>,
     ) {
-        match key.kind {
-            BatchKind::HardwareComposite => {
-                self.ps_hw_composite
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
-            }
-            BatchKind::SplitComposite => {
-                self.ps_split_composite.bind(
-                    &mut self.device,
-                    projection,
-                    0,
-                    &mut self.renderer_errors,
-                );
-            }
-            BatchKind::Brush(brush_kind) => {
-                match brush_kind {
-                    BrushBatchKind::Solid => {
-                        self.brush_solid.bind(
-                            &mut self.device,
-                            key.blend_mode,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    }
-                    BrushBatchKind::Image(image_buffer_kind) => {
-                        self.brush_image[image_buffer_kind as usize]
-                            .as_mut()
-                            .expect("Unsupported image shader kind")
-                            .bind(
-                                &mut self.device,
-                                key.blend_mode,
-                                projection,
-                                0,
-                                &mut self.renderer_errors,
-                            );
-                    }
-                    BrushBatchKind::Line => {
-                        self.brush_line.bind(
-                            &mut self.device,
-                            key.blend_mode,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    }
-                    BrushBatchKind::Blend => {
-                        self.brush_blend.bind(
-                            &mut self.device,
-                            key.blend_mode,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    }
-                    BrushBatchKind::MixBlend { .. } => {
-                        self.brush_mix_blend.bind(
-                            &mut self.device,
-                            key.blend_mode,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    }
-                    BrushBatchKind::RadialGradient => {
-                        self.brush_radial_gradient.bind(
-                            &mut self.device,
-                            key.blend_mode,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    }
-                    BrushBatchKind::LinearGradient => {
-                        self.brush_linear_gradient.bind(
-                            &mut self.device,
-                            key.blend_mode,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    }
-                    BrushBatchKind::YuvImage(image_buffer_kind, format, color_space) => {
-                        let shader_index =
-                            Renderer::get_yuv_shader_index(image_buffer_kind, format, color_space);
-                        self.brush_yuv_image[shader_index]
-                            .as_mut()
-                            .expect("Unsupported YUV shader kind")
-                            .bind(
-                                &mut self.device,
-                                key.blend_mode,
-                                projection,
-                                0,
-                                &mut self.renderer_errors,
-                            );
-                    }
-                }
-            }
-            BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
-                TransformBatchKind::TextRun(..) => {
-                    unreachable!("bug: text batches are special cased");
-                }
-                TransformBatchKind::Image(image_buffer_kind) => {
-                    self.ps_image[image_buffer_kind as usize]
-                        .as_mut()
-                        .expect("Unsupported image shader kind")
-                        .bind(
-                            &mut self.device,
-                            transform_kind,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                }
-                TransformBatchKind::BorderCorner => {
-                    self.ps_border_corner.bind(
-                        &mut self.device,
-                        transform_kind,
-                        projection,
-                        0,
-                        &mut self.renderer_errors,
-                    );
-                }
-                TransformBatchKind::BorderEdge => {
-                    self.ps_border_edge.bind(
-                        &mut self.device,
-                        transform_kind,
-                        projection,
-                        0,
-                        &mut self.renderer_errors,
-                    );
-                }
-            },
-        };
+        self.shaders
+            .get(key)
+            .bind(
+                &mut self.device, projection,
+                &mut self.renderer_errors,
+            );
 
         // Handle special case readback for composites.
         if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, source_id, backdrop_id }) = key.kind {
-            if scissor_rect.is_some() {
-                self.device.disable_scissor();
-            }
-
             // composites can't be grouped together because
             // they may overlap and affect each other.
             debug_assert_eq!(instances.len(), 1);
-            let cache_texture = self.texture_resolver
-                .resolve(&SourceTexture::CacheRGBA8)
-                .unwrap();
-
-            // Before submitting the composite batch, do the
-            // framebuffer readbacks that are needed for each
-            // composite operation in this batch.
-            let source = &render_tasks[source_id];
-            let backdrop = &render_tasks[task_id];
-            let readback = &render_tasks[backdrop_id];
-
-            let (readback_rect, readback_layer) = readback.get_target_rect();
-            let (backdrop_rect, _) = backdrop.get_target_rect();
-            let backdrop_screen_origin = match backdrop.kind {
-                RenderTaskKind::Picture(ref task_info) => match task_info.content_origin {
-                    ContentOrigin::Local(_) => panic!("bug: composite from a local-space rasterized picture?"),
-                    ContentOrigin::Screen(p) => p,
-                },
-                _ => panic!("bug: composite on non-picture?"),
-            };
-            let source_screen_origin = match source.kind {
-                RenderTaskKind::Picture(ref task_info) => match task_info.content_origin {
-                    ContentOrigin::Local(_) => panic!("bug: composite from a local-space rasterized picture?"),
-                    ContentOrigin::Screen(p) => p,
-                },
-                _ => panic!("bug: composite on non-picture?"),
-            };
-
-            // Bind the FBO to blit the backdrop to.
-            // Called per-instance in case the layer (and therefore FBO)
-            // changes. The device will skip the GL call if the requested
-            // target is already bound.
-            let cache_draw_target = (cache_texture, readback_layer.0 as i32);
-            self.device.bind_draw_target(Some(cache_draw_target), None);
-
-            let mut src = DeviceIntRect::new(
-                source_screen_origin + (backdrop_rect.origin - backdrop_screen_origin),
-                readback_rect.size,
+            self.handle_readback_composite(
+                render_target,
+                framebuffer_size,
+                scissor_rect,
+                &render_tasks[source_id],
+                &render_tasks[task_id],
+                &render_tasks[backdrop_id],
             );
-            let mut dest = readback_rect.to_i32();
-
-            // Need to invert the y coordinates and flip the image vertically when
-            // reading back from the framebuffer.
-            if render_target.is_none() {
-                src.origin.y = framebuffer_size.height as i32 - src.size.height - src.origin.y;
-                dest.origin.y += dest.size.height;
-                dest.size.height = -dest.size.height;
-            }
-
-            self.device.bind_read_target(render_target);
-            self.device.blit_render_target(src, dest);
-
-            // Restore draw target to current pass render target + layer.
-            // Note: leaving the viewport unchanged, it's not a part of FBO state
-            self.device.bind_draw_target(render_target, None);
-
-            if scissor_rect.is_some() {
-                self.device.enable_scissor();
-            }
         }
 
-        let _timer = self.gpu_profile.start_timer(key.kind.gpu_sampler_tag());
+        let _timer = self.gpu_profile.start_timer(key.kind.sampler_tag());
         self.draw_instanced_batch(
             instances,
             VertexArrayKind::Primitive,
             &key.textures,
             stats
         );
     }
 
@@ -3509,18 +2662,18 @@ impl Renderer {
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             self.device.set_blend(false);
-            self.cs_blur_rgba8
-                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+            self.shaders.cs_blur_rgba8
+                .bind(&mut self.device, projection, &mut self.renderer_errors);
 
             if !target.vertical_blurs.is_empty() {
                 self.draw_instanced_batch(
                     &target.vertical_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
@@ -3545,18 +2698,18 @@ impl Renderer {
         // it removes the overhead of submitting many small glyphs
         // to multiple tiles in the normal text run case.
         for alpha_batch_container in &target.alpha_batch_containers {
             if !alpha_batch_container.text_run_cache_prims.is_empty() {
                 self.device.set_blend(true);
                 self.device.set_blend_mode_premultiplied_alpha();
 
                 let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_TEXT_RUN);
-                self.cs_text_run
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+                self.shaders.cs_text_run
+                    .bind(&mut self.device, projection, &mut self.renderer_errors);
                 for (texture_id, instances) in &alpha_batch_container.text_run_cache_prims {
                     self.draw_instanced_batch(
                         instances,
                         VertexArrayKind::Primitive,
                         &BatchTextures::color(*texture_id),
                         stats,
                     );
                 }
@@ -3627,169 +2780,116 @@ impl Renderer {
                         // effect, to ensure we can apply clip masks correctly.
                         // In the future, there are several optimizations available:
                         // 1) Use dual source blending where available (almost all recent hardware).
                         // 2) Use frame buffer fetch where available (most modern hardware).
                         // 3) Consider the old constant color blend method where no clip is applied.
                         let _timer = self.gpu_profile.start_timer(GPU_TAG_PRIM_TEXT_RUN);
 
                         self.device.set_blend(true);
+                        // bind the proper shader first
+                        match batch.key.blend_mode {
+                            BlendMode::SubpixelDualSource => &mut self.shaders.ps_text_run_dual_source,
+                            _ => &mut self.shaders.ps_text_run,
+                        }
+                            .get(glyph_format, transform_kind)
+                            .bind(
+                                &mut self.device,
+                                projection,
+                                &mut self.renderer_errors,
+                            );
 
                         match batch.key.blend_mode {
                             BlendMode::Alpha => panic!("Attempt to composite non-premultiplied text primitives."),
                             BlendMode::PremultipliedAlpha => {
                                 self.device.set_blend_mode_premultiplied_alpha();
-
-                                self.ps_text_run.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::from(glyph_format),
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::from(glyph_format) as _);
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures,
                                     stats,
                                 );
                             }
                             BlendMode::SubpixelDualSource => {
                                 self.device.set_blend_mode_subpixel_dual_source();
-
-                                self.ps_text_run_dual_source.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::SubpixelDualSource,
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::SubpixelDualSource as _);
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures,
                                     stats,
                                 );
                             }
                             BlendMode::SubpixelConstantTextColor(color) => {
                                 self.device.set_blend_mode_subpixel_constant_text_color(color);
-
-                                self.ps_text_run.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::SubpixelConstantTextColor,
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::SubpixelConstantTextColor as _);
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures,
                                     stats,
                                 );
                             }
                             BlendMode::SubpixelVariableTextColor => {
                                 // Using the two pass component alpha rendering technique:
                                 //
                                 // http://anholt.livejournal.com/32058.html
                                 //
                                 self.device.set_blend_mode_subpixel_pass0();
-
-                                self.ps_text_run.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::SubpixelPass0,
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::SubpixelPass0 as _);
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures,
                                     stats,
                                 );
 
                                 self.device.set_blend_mode_subpixel_pass1();
-
-                                self.ps_text_run.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::SubpixelPass1,
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::SubpixelPass1 as _);
 
                                 // When drawing the 2nd pass, we know that the VAO, textures etc
                                 // are all set up from the previous draw_instanced_batch call,
                                 // so just issue a draw call here to avoid re-uploading the
                                 // instances and re-binding textures etc.
                                 self.device
                                     .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
                             }
                             BlendMode::SubpixelWithBgColor => {
                                 // Using the three pass "component alpha with font smoothing
                                 // background color" rendering technique:
                                 //
                                 // /webrender/doc/text-rendering.md
                                 //
                                 self.device.set_blend_mode_subpixel_with_bg_color_pass0();
-
-                                self.ps_text_run.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::SubpixelWithBgColorPass0,
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::SubpixelWithBgColorPass0 as _);
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures,
                                     stats,
                                 );
 
                                 self.device.set_blend_mode_subpixel_with_bg_color_pass1();
-
-                                self.ps_text_run.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::SubpixelWithBgColorPass1,
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::SubpixelWithBgColorPass1 as _);
 
                                 // When drawing the 2nd and 3rd passes, we know that the VAO, textures etc
                                 // are all set up from the previous draw_instanced_batch call,
                                 // so just issue a draw call here to avoid re-uploading the
                                 // instances and re-binding textures etc.
                                 self.device
                                     .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
 
                                 self.device.set_blend_mode_subpixel_with_bg_color_pass2();
-
-                                self.ps_text_run.bind(
-                                    &mut self.device,
-                                    glyph_format,
-                                    transform_kind,
-                                    projection,
-                                    TextShaderMode::SubpixelWithBgColorPass2,
-                                    &mut self.renderer_errors,
-                                );
+                                self.device.switch_mode(TextShaderMode::SubpixelWithBgColorPass2 as _);
 
                                 self.device
                                     .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
                             }
                             BlendMode::PremultipliedDestOut | BlendMode::None => {
                                 unreachable!("bug: bad blend mode for text");
                             }
                         }
@@ -3928,18 +3028,18 @@ impl Renderer {
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             self.device.set_blend(false);
-            self.cs_blur_a8
-                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+            self.shaders.cs_blur_a8
+                .bind(&mut self.device, projection, &mut self.renderer_errors);
 
             if !target.vertical_blurs.is_empty() {
                 self.draw_instanced_batch(
                     &target.vertical_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
@@ -3962,18 +3062,18 @@ impl Renderer {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);
 
             // If we have border corner clips, the first step is to clear out the
             // area in the clip mask. This allows drawing multiple invididual clip
             // in regions below.
             if !target.clip_batcher.border_clears.is_empty() {
                 let _gm2 = self.gpu_profile.start_marker("clip borders [clear]");
                 self.device.set_blend(false);
-                self.cs_clip_border
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+                self.shaders.cs_clip_border
+                    .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     &target.clip_batcher.border_clears,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
 
@@ -3981,37 +3081,36 @@ impl Renderer {
             if !target.clip_batcher.borders.is_empty() {
                 let _gm2 = self.gpu_profile.start_marker("clip borders");
                 // We are masking in parts of the corner (dots or dashes) here.
                 // Blend mode is set to max to allow drawing multiple dots.
                 // The individual dots and dashes in a border never overlap, so using
                 // a max blend mode here is fine.
                 self.device.set_blend(true);
                 self.device.set_blend_mode_max();
-                self.cs_clip_border
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+                self.shaders.cs_clip_border
+                    .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     &target.clip_batcher.borders,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
 
             // switch to multiplicative blending
             self.device.set_blend(true);
             self.device.set_blend_mode_multiply();
 
             // draw rounded cornered rectangles
             if !target.clip_batcher.rectangles.is_empty() {
                 let _gm2 = self.gpu_profile.start_marker("clip rectangles");
-                self.cs_clip_rectangle.bind(
+                self.shaders.cs_clip_rectangle.bind(
                     &mut self.device,
                     projection,
-                    0,
                     &mut self.renderer_errors,
                 );
                 self.draw_instanced_batch(
                     &target.clip_batcher.rectangles,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                     stats,
                 );
@@ -4021,18 +3120,18 @@ impl Renderer {
                 let _gm2 = self.gpu_profile.start_marker("box-shadows");
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
                         SourceTexture::Invalid,
                         SourceTexture::Invalid,
                     ],
                 };
-                self.cs_clip_box_shadow
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+                self.shaders.cs_clip_box_shadow
+                    .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     items,
                     VertexArrayKind::Clip,
                     &textures,
                     stats,
                 );
             }
 
@@ -4041,18 +3140,18 @@ impl Renderer {
                 let _gm2 = self.gpu_profile.start_marker("clip images");
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
                         SourceTexture::Invalid,
                         SourceTexture::Invalid,
                     ],
                 };
-                self.cs_clip_image
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+                self.shaders.cs_clip_image
+                    .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     items,
                     VertexArrayKind::Clip,
                     &textures,
                     stats,
                 );
             }
         }
@@ -4092,18 +3191,18 @@ impl Renderer {
 
         // Handle any blits to this texture from child tasks.
         self.handle_blits(&target.blits, render_tasks);
 
         // Draw any blurs for this target.
         if !target.horizontal_blurs.is_empty() {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
-            self.cs_blur_a8
-                .bind(&mut self.device, &projection, 0, &mut self.renderer_errors);
+            self.shaders.cs_blur_a8
+                .bind(&mut self.device, &projection, &mut self.renderer_errors);
 
             self.draw_instanced_batch(
                 &target.horizontal_blurs,
                 VertexArrayKind::Blur,
                 &BatchTextures::no_texture(),
                 stats,
             );
         }
@@ -4661,53 +3760,20 @@ impl Renderer {
         self.local_clip_rects_texture.deinit(&mut self.device);
         self.render_task_texture.deinit(&mut self.device);
         self.device.delete_pbo(self.texture_cache_upload_pbo);
         self.texture_resolver.deinit(&mut self.device);
         self.device.delete_vao(self.prim_vao);
         self.device.delete_vao(self.clip_vao);
         self.device.delete_vao(self.blur_vao);
         self.debug.deinit(&mut self.device);
-        self.cs_text_run.deinit(&mut self.device);
-        self.cs_blur_a8.deinit(&mut self.device);
-        self.cs_blur_rgba8.deinit(&mut self.device);
-        self.brush_solid.deinit(&mut self.device);
-        self.brush_line.deinit(&mut self.device);
-        self.brush_blend.deinit(&mut self.device);
-        self.brush_mix_blend.deinit(&mut self.device);
-        self.brush_radial_gradient.deinit(&mut self.device);
-        self.brush_linear_gradient.deinit(&mut self.device);
-        self.cs_clip_rectangle.deinit(&mut self.device);
-        self.cs_clip_box_shadow.deinit(&mut self.device);
-        self.cs_clip_image.deinit(&mut self.device);
-        self.cs_clip_border.deinit(&mut self.device);
-        self.ps_text_run.deinit(&mut self.device);
-        self.ps_text_run_dual_source.deinit(&mut self.device);
-        for shader in self.brush_image {
-            if let Some(shader) = shader {
-                shader.deinit(&mut self.device);
-            }
-        }
-        for shader in self.ps_image {
-            if let Some(shader) = shader {
-                shader.deinit(&mut self.device);
-            }
-        }
-        for shader in self.brush_yuv_image {
-            if let Some(shader) = shader {
-                shader.deinit(&mut self.device);
-            }
-        }
         for (_, target) in self.output_targets {
             self.device.delete_fbo(target.fbo_id);
         }
-        self.ps_border_corner.deinit(&mut self.device);
-        self.ps_border_edge.deinit(&mut self.device);
-        self.ps_hw_composite.deinit(&mut self.device);
-        self.ps_split_composite.deinit(&mut self.device);
+        self.shaders.deinit(&mut self.device);
         #[cfg(feature = "capture")]
         self.device.delete_fbo(self.read_fbo);
         #[cfg(feature = "replay")]
         for (_, ext) in self.owned_external_images {
             self.device.delete_external_texture(ext);
         }
         self.device.end_frame();
     }
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -387,16 +387,18 @@ impl ResourceCache {
         // it locally for glyph metric requests.
         self.glyph_rasterizer.add_font(font_key, template.clone());
         self.resources.font_templates.insert(font_key, template);
     }
 
     pub fn delete_font_template(&mut self, font_key: FontKey) {
         self.glyph_rasterizer.delete_font(font_key);
         self.resources.font_templates.remove(&font_key);
+        self.cached_glyphs
+            .clear_fonts(|font| font.font_key == font_key);
         if let Some(ref mut r) = self.blob_image_renderer {
             r.delete_font(font_key);
         }
     }
 
     pub fn add_font_instance(
         &mut self,
         instance_key: FontInstanceKey,
@@ -951,23 +953,29 @@ impl ResourceCache {
         }
     }
 
     pub fn clear_namespace(&mut self, namespace: IdNamespace) {
         self.resources
             .image_templates
             .images
             .retain(|key, _| key.0 != namespace);
+        self.cached_images
+            .clear_keys(|request| request.key.0 == namespace);
 
+        self.resources.font_instances
+            .write()
+            .unwrap()
+            .retain(|key, _| key.0 != namespace);
+        for &key in self.resources.font_templates.keys().filter(|key| key.0 == namespace) {
+            self.glyph_rasterizer.delete_font(key);
+        }
         self.resources
             .font_templates
             .retain(|key, _| key.0 != namespace);
-
-        self.cached_images
-            .clear_keys(|request| request.key.0 == namespace);
         self.cached_glyphs
             .clear_fonts(|font| font.font_key.0 == namespace);
     }
 }
 
 // Compute the width and height of a tile depending on its position in the image.
 pub fn compute_tile_size(
     descriptor: &ImageDescriptor,
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/shade.rs
@@ -0,0 +1,820 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{
+    YUV_COLOR_SPACES, YUV_FORMATS,
+    YuvColorSpace, YuvFormat,
+};
+use batch::{BatchKey, BatchKind, BrushBatchKind, TransformBatchKind};
+use device::{Device, Program, ShaderError};
+use euclid::{Transform3D};
+use glyph_rasterizer::GlyphFormat;
+use renderer::{
+    desc,
+    MAX_VERTEX_TEXTURE_WIDTH,
+    BlendMode, ImageBufferKind, RendererError, RendererOptions,
+    TextureSampler, VertexArrayKind,
+};
+use util::TransformedRectKind;
+
+use gleam::gl::GlType;
+use time::precise_time_ns;
+
+
+impl ImageBufferKind {
+    fn get_feature_string(&self) -> &'static str {
+        match *self {
+            ImageBufferKind::Texture2D => "TEXTURE_2D",
+            ImageBufferKind::Texture2DArray => "",
+            ImageBufferKind::TextureRect => "TEXTURE_RECT",
+            ImageBufferKind::TextureExternal => "TEXTURE_EXTERNAL",
+        }
+    }
+
+    fn has_platform_support(&self, gl_type: &GlType) -> bool {
+        match (*self, gl_type) {
+            (ImageBufferKind::Texture2D, _) => true,
+            (ImageBufferKind::Texture2DArray, _) => true,
+            (ImageBufferKind::TextureRect, _) => true,
+            (ImageBufferKind::TextureExternal, &GlType::Gles) => true,
+            (ImageBufferKind::TextureExternal, &GlType::Gl) => false,
+        }
+    }
+}
+
+pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 4] = [
+    ImageBufferKind::Texture2D,
+    ImageBufferKind::TextureRect,
+    ImageBufferKind::TextureExternal,
+    ImageBufferKind::Texture2DArray,
+];
+
+const TRANSFORM_FEATURE: &str = "TRANSFORM";
+const ALPHA_FEATURE: &str = "ALPHA_PASS";
+const DITHERING_FEATURE: &str = "DITHERING";
+
+enum ShaderKind {
+    Primitive,
+    Cache(VertexArrayKind),
+    ClipCache,
+    Brush,
+    Text,
+}
+
+pub struct LazilyCompiledShader {
+    program: Option<Program>,
+    name: &'static str,
+    kind: ShaderKind,
+    features: Vec<&'static str>,
+}
+
+impl LazilyCompiledShader {
+    fn new(
+        kind: ShaderKind,
+        name: &'static str,
+        features: &[&'static str],
+        device: &mut Device,
+        precache: bool,
+    ) -> Result<Self, ShaderError> {
+        let mut shader = LazilyCompiledShader {
+            program: None,
+            name,
+            kind,
+            features: features.to_vec(),
+        };
+
+        if precache {
+            let t0 = precise_time_ns();
+            let program = shader.get(device)?;
+            let t1 = precise_time_ns();
+            device.bind_program(program);
+            device.draw_triangles_u16(0, 3);
+            let t2 = precise_time_ns();
+            debug!("[C: {:.1} ms D: {:.1} ms] Precache {} {:?}",
+                (t1 - t0) as f64 / 1000000.0,
+                (t2 - t1) as f64 / 1000000.0,
+                name,
+                features
+            );
+        }
+
+        Ok(shader)
+    }
+
+    pub fn bind(
+        &mut self,
+        device: &mut Device,
+        projection: &Transform3D<f32>,
+        renderer_errors: &mut Vec<RendererError>,
+    ) {
+        let program = match self.get(device) {
+            Ok(program) => program,
+            Err(e) => {
+                renderer_errors.push(RendererError::from(e));
+                return;
+            }
+        };
+        device.bind_program(program);
+        device.set_uniforms(program, projection);
+    }
+
+    fn get(&mut self, device: &mut Device) -> Result<&Program, ShaderError> {
+        if self.program.is_none() {
+            let program = match self.kind {
+                ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text => {
+                    create_prim_shader(self.name,
+                                       device,
+                                       &self.features,
+                                       VertexArrayKind::Primitive)
+                }
+                ShaderKind::Cache(format) => {
+                    create_prim_shader(self.name,
+                                       device,
+                                       &self.features,
+                                       format)
+                }
+                ShaderKind::ClipCache => {
+                    create_clip_shader(self.name, device)
+                }
+            };
+            self.program = Some(program?);
+        }
+
+        Ok(self.program.as_ref().unwrap())
+    }
+
+    fn deinit(self, device: &mut Device) {
+        if let Some(program) = self.program {
+            device.delete_program(program);
+        }
+    }
+}
+
+// A brush shader supports two modes:
+// opaque:
+//   Used for completely opaque primitives,
+//   or inside segments of partially
+//   opaque primitives. Assumes no need
+//   for clip masks, AA etc.
+// alpha:
+//   Used for brush primitives in the alpha
+//   pass. Assumes that AA should be applied
+//   along the primitive edge, and also that
+//   clip mask is present.
+struct BrushShader {
+    opaque: LazilyCompiledShader,
+    alpha: LazilyCompiledShader,
+}
+
+impl BrushShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache: bool,
+    ) -> Result<Self, ShaderError> {
+        let opaque = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            features,
+            device,
+            precache,
+        )?;
+
+        let mut alpha_features = features.to_vec();
+        alpha_features.push(ALPHA_FEATURE);
+
+        let alpha = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            &alpha_features,
+            device,
+            precache,
+        )?;
+
+        Ok(BrushShader { opaque, alpha })
+    }
+
+    fn get(&mut self, blend_mode: BlendMode) -> &mut LazilyCompiledShader {
+        match blend_mode {
+            BlendMode::None => &mut self.opaque,
+            BlendMode::Alpha |
+            BlendMode::PremultipliedAlpha |
+            BlendMode::PremultipliedDestOut |
+            BlendMode::SubpixelDualSource |
+            BlendMode::SubpixelConstantTextColor(..) |
+            BlendMode::SubpixelVariableTextColor |
+            BlendMode::SubpixelWithBgColor => &mut self.alpha,
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.opaque.deinit(device);
+        self.alpha.deinit(device);
+    }
+}
+
+struct PrimitiveShader {
+    simple: LazilyCompiledShader,
+    transform: LazilyCompiledShader,
+}
+
+impl PrimitiveShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache: bool,
+    ) -> Result<Self, ShaderError> {
+        let simple = LazilyCompiledShader::new(
+            ShaderKind::Primitive,
+            name,
+            features,
+            device,
+            precache,
+        )?;
+
+        let mut transform_features = features.to_vec();
+        transform_features.push(TRANSFORM_FEATURE);
+
+        let transform = LazilyCompiledShader::new(
+            ShaderKind::Primitive,
+            name,
+            &transform_features,
+            device,
+            precache,
+        )?;
+
+        Ok(PrimitiveShader { simple, transform })
+    }
+
+    fn get(&mut self, transform_kind: TransformedRectKind) -> &mut LazilyCompiledShader {
+        match transform_kind {
+            TransformedRectKind::AxisAligned => &mut self.simple,
+            TransformedRectKind::Complex => &mut self.transform,
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.simple.deinit(device);
+        self.transform.deinit(device);
+    }
+}
+
+pub struct TextShader {
+    simple: LazilyCompiledShader,
+    transform: LazilyCompiledShader,
+    glyph_transform: LazilyCompiledShader,
+}
+
+impl TextShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache: bool,
+    ) -> Result<Self, ShaderError> {
+        let simple = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            features,
+            device,
+            precache,
+        )?;
+
+        let mut transform_features = features.to_vec();
+        transform_features.push("TRANSFORM");
+
+        let transform = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &transform_features,
+            device,
+            precache,
+        )?;
+
+        let mut glyph_transform_features = features.to_vec();
+        glyph_transform_features.push("GLYPH_TRANSFORM");
+
+        let glyph_transform = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &glyph_transform_features,
+            device,
+            precache,
+        )?;
+
+        Ok(TextShader { simple, transform, glyph_transform })
+    }
+
+    pub fn get(
+        &mut self,
+        glyph_format: GlyphFormat,
+        transform_kind: TransformedRectKind,
+    ) -> &mut LazilyCompiledShader {
+        match glyph_format {
+            GlyphFormat::Alpha |
+            GlyphFormat::Subpixel |
+            GlyphFormat::Bitmap |
+            GlyphFormat::ColorBitmap => match transform_kind {
+                TransformedRectKind::AxisAligned => &mut self.simple,
+                TransformedRectKind::Complex => &mut self.transform,
+            }
+            GlyphFormat::TransformedAlpha |
+            GlyphFormat::TransformedSubpixel => &mut self.glyph_transform,
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.simple.deinit(device);
+        self.transform.deinit(device);
+        self.glyph_transform.deinit(device);
+    }
+}
+
+fn create_prim_shader(
+    name: &'static str,
+    device: &mut Device,
+    features: &[&'static str],
+    vertex_format: VertexArrayKind,
+) -> Result<Program, ShaderError> {
+    let mut prefix = format!(
+        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n",
+        MAX_VERTEX_TEXTURE_WIDTH
+    );
+
+    for feature in features {
+        prefix.push_str(&format!("#define WR_FEATURE_{}\n", feature));
+    }
+
+    debug!("PrimShader {}", name);
+
+    let vertex_descriptor = match vertex_format {
+        VertexArrayKind::Primitive => desc::PRIM_INSTANCES,
+        VertexArrayKind::Blur => desc::BLUR,
+        VertexArrayKind::Clip => desc::CLIP,
+    };
+
+    let program = device.create_program(name, &prefix, &vertex_descriptor);
+
+    if let Ok(ref program) = program {
+        device.bind_shader_samplers(
+            program,
+            &[
+                ("sColor0", TextureSampler::Color0),
+                ("sColor1", TextureSampler::Color1),
+                ("sColor2", TextureSampler::Color2),
+                ("sDither", TextureSampler::Dither),
+                ("sCacheA8", TextureSampler::CacheA8),
+                ("sCacheRGBA8", TextureSampler::CacheRGBA8),
+                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
+                ("sRenderTasks", TextureSampler::RenderTasks),
+                ("sResourceCache", TextureSampler::ResourceCache),
+                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sLocalClipRects", TextureSampler::LocalClipRects),
+            ],
+        );
+    }
+
+    program
+}
+
+fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
+    let prefix = format!(
+        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n
+        #define WR_FEATURE_TRANSFORM\n",
+        MAX_VERTEX_TEXTURE_WIDTH
+    );
+
+    debug!("ClipShader {}", name);
+
+    let program = device.create_program(name, &prefix, &desc::CLIP);
+
+    if let Ok(ref program) = program {
+        device.bind_shader_samplers(
+            program,
+            &[
+                ("sColor0", TextureSampler::Color0),
+                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
+                ("sRenderTasks", TextureSampler::RenderTasks),
+                ("sResourceCache", TextureSampler::ResourceCache),
+                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sLocalClipRects", TextureSampler::LocalClipRects),
+            ],
+        );
+    }
+
+    program
+}
+
+
+pub struct Shaders {
+    // These are "cache shaders". These shaders are used to
+    // draw intermediate results to cache targets. The results
+    // of these shaders are then used by the primitive shaders.
+    pub cs_text_run: LazilyCompiledShader,
+    pub cs_blur_a8: LazilyCompiledShader,
+    pub cs_blur_rgba8: LazilyCompiledShader,
+
+    // Brush shaders
+    brush_solid: BrushShader,
+    brush_line: BrushShader,
+    brush_image: Vec<Option<BrushShader>>,
+    brush_blend: BrushShader,
+    brush_mix_blend: BrushShader,
+    brush_yuv_image: Vec<Option<BrushShader>>,
+    brush_radial_gradient: BrushShader,
+    brush_linear_gradient: BrushShader,
+
+    /// These are "cache clip shaders". These shaders are used to
+    /// draw clip instances into the cached clip mask. The results
+    /// of these shaders are also used by the primitive shaders.
+    pub cs_clip_rectangle: LazilyCompiledShader,
+    pub cs_clip_box_shadow: LazilyCompiledShader,
+    pub cs_clip_image: LazilyCompiledShader,
+    pub cs_clip_border: LazilyCompiledShader,
+
+    // The are "primitive shaders". These shaders draw and blend
+    // final results on screen. They are aware of tile boundaries.
+    // Most draw directly to the framebuffer, but some use inputs
+    // from the cache shaders to draw. Specifically, the box
+    // shadow primitive shader stretches the box shadow cache
+    // output, and the cache_image shader blits the results of
+    // a cache shader (e.g. blur) to the screen.
+    pub ps_text_run: TextShader,
+    pub ps_text_run_dual_source: TextShader,
+    ps_image: Vec<Option<PrimitiveShader>>,
+    ps_border_corner: PrimitiveShader,
+    ps_border_edge: PrimitiveShader,
+
+    ps_hw_composite: LazilyCompiledShader,
+    ps_split_composite: LazilyCompiledShader,
+}
+
+impl Shaders {
+    pub fn new(
+        device: &mut Device,
+        gl_type: GlType,
+        options: &RendererOptions,
+    ) -> Result<Self, ShaderError> {
+        let cs_text_run = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Primitive),
+            "cs_text_run",
+            &[],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let brush_solid = BrushShader::new(
+            "brush_solid",
+            device,
+            &[],
+            options.precache_shaders,
+        )?;
+
+        let brush_line = BrushShader::new(
+            "brush_line",
+            device,
+            &[],
+            options.precache_shaders,
+        )?;
+
+        let brush_blend = BrushShader::new(
+            "brush_blend",
+            device,
+            &[],
+            options.precache_shaders,
+        )?;
+
+        let brush_mix_blend = BrushShader::new(
+            "brush_mix_blend",
+            device,
+            &[],
+            options.precache_shaders,
+        )?;
+
+        let brush_radial_gradient = BrushShader::new(
+            "brush_radial_gradient",
+            device,
+            if options.enable_dithering {
+               &[DITHERING_FEATURE]
+            } else {
+               &[]
+            },
+            options.precache_shaders,
+        )?;
+
+        let brush_linear_gradient = BrushShader::new(
+            "brush_linear_gradient",
+            device,
+            if options.enable_dithering {
+               &[DITHERING_FEATURE]
+            } else {
+               &[]
+            },
+            options.precache_shaders,
+        )?;
+
+        let cs_blur_a8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Blur),
+            "cs_blur",
+            &["ALPHA_TARGET"],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let cs_blur_rgba8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Blur),
+            "cs_blur",
+            &["COLOR_TARGET"],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let cs_clip_rectangle = LazilyCompiledShader::new(
+            ShaderKind::ClipCache,
+            "cs_clip_rectangle",
+            &[],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let cs_clip_box_shadow = LazilyCompiledShader::new(
+            ShaderKind::ClipCache,
+            "cs_clip_box_shadow",
+            &[],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let cs_clip_image = LazilyCompiledShader::new(
+            ShaderKind::ClipCache,
+            "cs_clip_image",
+            &[],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let cs_clip_border = LazilyCompiledShader::new(
+            ShaderKind::ClipCache,
+            "cs_clip_border",
+            &[],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let ps_text_run = TextShader::new("ps_text_run",
+            device,
+            &[],
+            options.precache_shaders,
+        )?;
+
+        let ps_text_run_dual_source = TextShader::new("ps_text_run",
+            device,
+            &["DUAL_SOURCE_BLENDING"],
+            options.precache_shaders,
+        )?;
+
+        // All image configuration.
+        let mut image_features = Vec::new();
+        let mut ps_image = Vec::new();
+        let mut brush_image = Vec::new();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. IMAGE_BUFFER_KINDS.len() {
+            ps_image.push(None);
+            brush_image.push(None);
+        }
+        for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
+            if IMAGE_BUFFER_KINDS[buffer_kind].has_platform_support(&gl_type) {
+                let feature_string = IMAGE_BUFFER_KINDS[buffer_kind].get_feature_string();
+                if feature_string != "" {
+                    image_features.push(feature_string);
+                }
+                ps_image[buffer_kind] = Some(PrimitiveShader::new(
+                    "ps_image",
+                    device,
+                    &image_features,
+                    options.precache_shaders,
+                )?);
+                brush_image[buffer_kind] = Some(BrushShader::new(
+                    "brush_image",
+                    device,
+                    &image_features,
+                    options.precache_shaders,
+                )?);
+            }
+            image_features.clear();
+        }
+
+        // All yuv_image configuration.
+        let mut yuv_features = Vec::new();
+        let yuv_shader_num = IMAGE_BUFFER_KINDS.len() * YUV_FORMATS.len() * YUV_COLOR_SPACES.len();
+        let mut brush_yuv_image = Vec::new();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. yuv_shader_num {
+            brush_yuv_image.push(None);
+        }
+        for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
+            if IMAGE_BUFFER_KINDS[buffer_kind].has_platform_support(&gl_type) {
+                for format_kind in 0 .. YUV_FORMATS.len() {
+                    for color_space_kind in 0 .. YUV_COLOR_SPACES.len() {
+                        let feature_string = IMAGE_BUFFER_KINDS[buffer_kind].get_feature_string();
+                        if feature_string != "" {
+                            yuv_features.push(feature_string);
+                        }
+                        let feature_string = YUV_FORMATS[format_kind].get_feature_string();
+                        if feature_string != "" {
+                            yuv_features.push(feature_string);
+                        }
+                        let feature_string =
+                            YUV_COLOR_SPACES[color_space_kind].get_feature_string();
+                        if feature_string != "" {
+                            yuv_features.push(feature_string);
+                        }
+
+                        let shader = BrushShader::new(
+                            "brush_yuv_image",
+                            device,
+                            &yuv_features,
+                            options.precache_shaders,
+                        )?;
+                        let index = Self::get_yuv_shader_index(
+                            IMAGE_BUFFER_KINDS[buffer_kind],
+                            YUV_FORMATS[format_kind],
+                            YUV_COLOR_SPACES[color_space_kind],
+                        );
+                        brush_yuv_image[index] = Some(shader);
+                        yuv_features.clear();
+                    }
+                }
+            }
+        }
+
+        let ps_border_corner = PrimitiveShader::new(
+            "ps_border_corner",
+             device,
+             &[],
+             options.precache_shaders,
+        )?;
+
+        let ps_border_edge = PrimitiveShader::new(
+            "ps_border_edge",
+             device,
+             &[],
+             options.precache_shaders,
+        )?;
+
+        let ps_hw_composite = LazilyCompiledShader::new(
+            ShaderKind::Primitive,
+            "ps_hardware_composite",
+            &[],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let ps_split_composite = LazilyCompiledShader::new(
+            ShaderKind::Primitive,
+            "ps_split_composite",
+            &[],
+            device,
+            options.precache_shaders,
+        )?;
+
+        Ok(Shaders {
+            cs_text_run,
+            cs_blur_a8,
+            cs_blur_rgba8,
+            brush_solid,
+            brush_line,
+            brush_image,
+            brush_blend,
+            brush_mix_blend,
+            brush_yuv_image,
+            brush_radial_gradient,
+            brush_linear_gradient,
+            cs_clip_rectangle,
+            cs_clip_box_shadow,
+            cs_clip_border,
+            cs_clip_image,
+            ps_text_run,
+            ps_text_run_dual_source,
+            ps_image,
+            ps_border_corner,
+            ps_border_edge,
+            ps_hw_composite,
+            ps_split_composite,
+        })
+    }
+
+    fn get_yuv_shader_index(
+        buffer_kind: ImageBufferKind,
+        format: YuvFormat,
+        color_space: YuvColorSpace,
+    ) -> usize {
+        ((buffer_kind as usize) * YUV_FORMATS.len() + (format as usize)) * YUV_COLOR_SPACES.len() +
+            (color_space as usize)
+    }
+
+    pub fn get(&mut self, key: &BatchKey) -> &mut LazilyCompiledShader {
+        match key.kind {
+            BatchKind::HardwareComposite => {
+                &mut self.ps_hw_composite
+            }
+            BatchKind::SplitComposite => {
+                &mut self.ps_split_composite
+            }
+            BatchKind::Brush(brush_kind) => {
+                let brush_shader = match brush_kind {
+                    BrushBatchKind::Solid => {
+                        &mut self.brush_solid
+                    }
+                    BrushBatchKind::Image(image_buffer_kind) => {
+                        self.brush_image[image_buffer_kind as usize]
+                            .as_mut()
+                            .expect("Unsupported image shader kind")
+                    }
+                    BrushBatchKind::Line => {
+                        &mut self.brush_line
+                    }
+                    BrushBatchKind::Blend => {
+                        &mut self.brush_blend
+                    }
+                    BrushBatchKind::MixBlend { .. } => {
+                        &mut self.brush_mix_blend
+                    }
+                    BrushBatchKind::RadialGradient => {
+                        &mut self.brush_radial_gradient
+                    }
+                    BrushBatchKind::LinearGradient => {
+                        &mut self.brush_linear_gradient
+                    }
+                    BrushBatchKind::YuvImage(image_buffer_kind, format, color_space) => {
+                        let shader_index =
+                            Self::get_yuv_shader_index(image_buffer_kind, format, color_space);
+                        self.brush_yuv_image[shader_index]
+                            .as_mut()
+                            .expect("Unsupported YUV shader kind")
+                    }
+                };
+                brush_shader.get(key.blend_mode)
+            }
+            BatchKind::Transformable(transform_kind, batch_kind) => {
+                let prim_shader = match batch_kind {
+                    TransformBatchKind::TextRun(..) => {
+                        unreachable!("bug: text batches are special cased");
+                    }
+                    TransformBatchKind::Image(image_buffer_kind) => {
+                        self.ps_image[image_buffer_kind as usize]
+                            .as_mut()
+                            .expect("Unsupported image shader kind")
+                    }
+                    TransformBatchKind::BorderCorner => {
+                        &mut self.ps_border_corner
+                    }
+                    TransformBatchKind::BorderEdge => {
+                        &mut self.ps_border_edge
+                    }
+                };
+                prim_shader.get(transform_kind)
+            }
+        }
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        self.cs_text_run.deinit(device);
+        self.cs_blur_a8.deinit(device);
+        self.cs_blur_rgba8.deinit(device);
+        self.brush_solid.deinit(device);
+        self.brush_line.deinit(device);
+        self.brush_blend.deinit(device);
+        self.brush_mix_blend.deinit(device);
+        self.brush_radial_gradient.deinit(device);
+        self.brush_linear_gradient.deinit(device);
+        self.cs_clip_rectangle.deinit(device);
+        self.cs_clip_box_shadow.deinit(device);
+        self.cs_clip_image.deinit(device);
+        self.cs_clip_border.deinit(device);
+        self.ps_text_run.deinit(device);
+        self.ps_text_run_dual_source.deinit(device);
+        for shader in self.brush_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.ps_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.brush_yuv_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        self.ps_border_corner.deinit(device);
+        self.ps_border_edge.deinit(device);
+        self.ps_hw_composite.deinit(device);
+        self.ps_split_composite.deinit(device);
+    }
+}
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorF, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
-use api::{ExternalImageType, ImageData, ImageFormat, PremultipliedColorF};
+use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
+use api::{ExternalImageType, ImageData, ImageFormat};
 use api::ImageDescriptor;
 use device::TextureFilter;
 use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle};
 use gpu_types::ImageSource;
 use internal_types::{CacheTextureId, FastHashMap, TextureUpdateList, TextureUpdateSource};
 use internal_types::{RenderTargetInfo, SourceTexture, TextureUpdate, TextureUpdateOp};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
@@ -98,18 +98,16 @@ struct CacheEntry {
     last_access: FrameId,
     // Handle to the resource rect in the GPU cache.
     uv_rect_handle: GpuCacheHandle,
     // Image format of the item.
     format: ImageFormat,
     filter: TextureFilter,
     // The actual device texture ID this is part of.
     texture_id: CacheTextureId,
-    // Color to modulate this cache item by.
-    color: PremultipliedColorF,
 }
 
 impl CacheEntry {
     // Create a new entry for a standalone texture.
     fn new_standalone(
         texture_id: CacheTextureId,
         size: DeviceUintSize,
         format: ImageFormat,
@@ -121,17 +119,16 @@ impl CacheEntry {
             size,
             user_data,
             last_access,
             kind: EntryKind::Standalone,
             texture_id,
             format,
             filter,
             uv_rect_handle: GpuCacheHandle::new(),
-            color: ColorF::new(1.0, 1.0, 1.0, 1.0).premultiplied(),
         }
     }
 
     // Update the GPU cache for this texture cache entry.
     // This ensures that the UV rect, and texture layer index
     // are up to date in the GPU cache for vertex shaders
     // to fetch from.
     fn update_gpu_cache(&mut self, gpu_cache: &mut GpuCache) {
@@ -142,17 +139,16 @@ impl CacheEntry {
                     origin,
                     layer_index,
                     ..
                 } => (origin, layer_index as f32),
             };
             let image_source = ImageSource {
                 p0: origin.to_f32(),
                 p1: (origin + self.size).to_f32(),
-                color: self.color,
                 texture_layer: layer_index,
                 user_data: self.user_data,
             };
             image_source.write_gpu_blocks(&mut request);
         }
     }
 }
 
@@ -1068,17 +1064,16 @@ impl TextureArray {
                 size: DeviceUintSize::new(width, height),
                 user_data,
                 last_access: frame_id,
                 kind,
                 uv_rect_handle: GpuCacheHandle::new(),
                 format: self.format,
                 filter: self.filter,
                 texture_id: self.texture_id.unwrap(),
-                color: ColorF::new(1.0, 1.0, 1.0, 1.0).premultiplied(),
             }
         })
     }
 }
 
 impl TextureUpdate {
     // Constructs a TextureUpdate operation to be passed to the
     // rendering thread in order to do an upload to the right
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -8,19 +8,19 @@ use api::{MixBlendMode, PipelineId};
 use batch::{AlphaBatchBuilder, AlphaBatchContainer, ClipBatcher, resolve_image};
 use clip::{ClipStore};
 use clip_scroll_tree::{ClipScrollTree, ClipScrollNodeIndex};
 use device::{FrameId, Texture};
 use gpu_cache::{GpuCache};
 use gpu_types::{BlurDirection, BlurInstance};
 use gpu_types::{ClipScrollNodeData};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
-use picture::{PictureKind};
+use picture::PictureKind;
 use prim_store::{CachedGradient, PrimitiveIndex, PrimitiveKind, PrimitiveStore};
-use prim_store::{DeferredResolve};
+use prim_store::{BrushKind, DeferredResolve};
 use profiler::FrameProfileCounters;
 use render_task::{BlitSource, RenderTaskId, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use resource_cache::ResourceCache;
 use std::{cmp, usize, f32, i32};
 use texture_allocator::GuillotineAllocator;
 
 const MIN_TARGET_SIZE: u32 = 2048;
@@ -315,33 +315,41 @@ impl RenderTarget for ColorRenderTarget 
     ) {
         let mut merged_batches = AlphaBatchContainer::new(None);
 
         for task_id in &self.alpha_tasks {
             let task = &render_tasks[*task_id];
 
             match task.kind {
                 RenderTaskKind::Picture(ref pic_task) => {
-                    let pic_index = ctx.prim_store.cpu_metadata[pic_task.prim_index.0].cpu_prim_index;
-                    let pic = &ctx.prim_store.cpu_pictures[pic_index.0];
-                    let (target_rect, _) = task.get_target_rect();
+                    let brush_index = ctx.prim_store.cpu_metadata[pic_task.prim_index.0].cpu_prim_index;
+                    let brush = &ctx.prim_store.cpu_brushes[brush_index.0];
+                    match brush.kind {
+                        BrushKind::Picture { pic_index } => {
+                            let pic = &ctx.prim_store.pictures[pic_index.0];
+                            let (target_rect, _) = task.get_target_rect();
 
-                    let mut batch_builder = AlphaBatchBuilder::new(self.screen_size, target_rect);
+                            let mut batch_builder = AlphaBatchBuilder::new(self.screen_size, target_rect);
 
-                    batch_builder.add_pic_to_batch(
-                        pic,
-                        *task_id,
-                        ctx,
-                        gpu_cache,
-                        render_tasks,
-                        deferred_resolves,
-                    );
+                            batch_builder.add_pic_to_batch(
+                                pic,
+                                *task_id,
+                                ctx,
+                                gpu_cache,
+                                render_tasks,
+                                deferred_resolves,
+                            );
 
-                    if let Some(batch_container) = batch_builder.build(&mut merged_batches) {
-                        self.alpha_batch_containers.push(batch_container);
+                            if let Some(batch_container) = batch_builder.build(&mut merged_batches) {
+                                self.alpha_batch_containers.push(batch_container);
+                            }
+                        }
+                        _ => {
+                            unreachable!();
+                        }
                     }
                 }
                 _ => {
                     unreachable!();
                 }
             }
         }
 
@@ -376,22 +384,23 @@ impl RenderTarget for ColorRenderTarget 
                     task.children[0],
                     BlurDirection::Horizontal,
                     render_tasks,
                 );
             }
             RenderTaskKind::Picture(ref task_info) => {
                 let prim_metadata = ctx.prim_store.get_metadata(task_info.prim_index);
                 match prim_metadata.prim_kind {
-                    PrimitiveKind::Picture => {
-                        let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
+                    PrimitiveKind::Brush => {
+                        let brush = &ctx.prim_store.cpu_brushes[prim_metadata.cpu_prim_index.0];
+                        let pic = &ctx.prim_store.pictures[brush.get_picture_index().0];
 
                         self.alpha_tasks.push(task_id);
 
-                        if let PictureKind::Image { frame_output_pipeline_id, .. } = prim.kind {
+                        if let PictureKind::Image { frame_output_pipeline_id, .. } = pic.kind {
                             // If this pipeline is registered as a frame output
                             // store the information necessary to do the copy.
                             if let Some(pipeline_id) = frame_output_pipeline_id {
                                 self.outputs.push(FrameOutput {
                                     pipeline_id,
                                     task_id,
                                 });
                             }
--- a/gfx/webrender_api/src/color.rs
+++ b/gfx/webrender_api/src/color.rs
@@ -21,16 +21,18 @@ pub struct PremultipliedColorF {
     pub a: f32,
 }
 
 impl PremultipliedColorF {
     ///
     pub const BLACK: Self = PremultipliedColorF { r: 0.0, g: 0.0, b: 0.0, a: 1.0 };
     ///
     pub const TRANSPARENT: Self = PremultipliedColorF { r: 0.0, g: 0.0, b: 0.0, a: 0.0 };
+    ///
+    pub const WHITE: Self = PremultipliedColorF { r: 1.0, g: 1.0, b: 1.0, a: 1.0 };
 
     pub fn to_array(&self) -> [f32; 4] {
         [self.r, self.g, self.b, self.a]
     }
 }
 
 /// Represents RGBA screen colors with floating point numbers.
 ///
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -59,37 +59,33 @@ pub struct GenericDisplayItem<T> {
     pub info: LayoutPrimitiveInfo,
 }
 
 pub type DisplayItem = GenericDisplayItem<SpecificDisplayItem>;
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct PrimitiveInfo<T> {
     pub rect: TypedRect<f32, T>,
-    pub local_clip: LocalClip,
+    pub clip_rect: TypedRect<f32, T>,
     pub is_backface_visible: bool,
     pub tag: Option<ItemTag>,
 }
 
 impl LayerPrimitiveInfo {
     pub fn new(rect: TypedRect<f32, LayerPixel>) -> Self {
         Self::with_clip_rect(rect, rect)
     }
 
     pub fn with_clip_rect(
         rect: TypedRect<f32, LayerPixel>,
         clip_rect: TypedRect<f32, LayerPixel>,
     ) -> Self {
-        Self::with_clip(rect, LocalClip::from(clip_rect))
-    }
-
-    pub fn with_clip(rect: TypedRect<f32, LayerPixel>, clip: LocalClip) -> Self {
         PrimitiveInfo {
-            rect: rect,
-            local_clip: clip,
+            rect,
+            clip_rect,
             is_backface_visible: true,
             tag: None,
         }
     }
 }
 
 pub type LayoutPrimitiveInfo = PrimitiveInfo<LayoutPixel>;
 pub type LayerPrimitiveInfo = PrimitiveInfo<LayerPixel>;
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -14,18 +14,18 @@ use std::marker::PhantomData;
 use std::{io, mem, ptr, slice};
 use time::precise_time_ns;
 use {AlphaType, BorderDetails, BorderDisplayItem, BorderRadius, BorderWidths, BoxShadowClipMode};
 use {BoxShadowDisplayItem, ClipAndScrollInfo, ClipChainId, ClipChainItem, ClipDisplayItem, ClipId};
 use {ColorF, ComplexClipRegion, DisplayItem, ExtendMode, ExternalScrollId, FilterOp};
 use {FontInstanceKey, GlyphInstance, GlyphOptions, Gradient, GradientDisplayItem, GradientStop};
 use {IframeDisplayItem, ImageDisplayItem, ImageKey, ImageMask, ImageRendering, LayerPrimitiveInfo};
 use {LayoutPoint, LayoutPrimitiveInfo, LayoutRect, LayoutSize, LayoutTransform, LayoutVector2D};
-use {LineDisplayItem, LineOrientation, LineStyle, LocalClip, MixBlendMode, PipelineId};
-use {PropertyBinding, PushStackingContextDisplayItem, RadialGradient, RadialGradientDisplayItem};
+use {LineDisplayItem, LineOrientation, LineStyle, MixBlendMode, PipelineId, PropertyBinding};
+use {PushStackingContextDisplayItem, RadialGradient, RadialGradientDisplayItem};
 use {RectangleDisplayItem, ScrollFrameDisplayItem, ScrollPolicy, ScrollSensitivity, Shadow};
 use {SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, StickyOffsetBounds};
 use {TextDisplayItem, TransformStyle, YuvColorSpace, YuvData, YuvImageDisplayItem};
 
 // We don't want to push a long text-run. If a text-run is too long, split it into several parts.
 // This needs to be set to (renderer::MAX_VERTEX_TEXTURE_WIDTH - VECS_PER_PRIM_HEADER - VECS_PER_TEXT_RUN) * 2
 pub const MAX_TEXT_RUN_LENGTH: usize = 2038;
 
@@ -331,24 +331,24 @@ impl<'a, 'b> DisplayItemRef<'a, 'b> {
     pub fn rect(&self) -> LayoutRect {
         self.iter.cur_item.info.rect
     }
 
     pub fn get_layer_primitive_info(&self, offset: &LayoutVector2D) -> LayerPrimitiveInfo {
         let info = self.iter.cur_item.info;
         LayerPrimitiveInfo {
             rect: info.rect.translate(&offset),
-            local_clip: info.local_clip.create_with_offset(offset),
+            clip_rect: info.clip_rect.translate(&offset),
             is_backface_visible: info.is_backface_visible,
             tag: info.tag,
         }
     }
 
-    pub fn local_clip(&self) -> &LocalClip {
-        &self.iter.cur_item.info.local_clip
+    pub fn clip_rect(&self) -> &LayoutRect {
+        &self.iter.cur_item.info.clip_rect
     }
 
     pub fn clip_and_scroll(&self) -> ClipAndScrollInfo {
         self.iter.cur_item.clip_and_scroll
     }
 
     pub fn item(&self) -> &SpecificDisplayItem {
         &self.iter.cur_item.item
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-4ccaede43b3944199f89a42f49093d93409c7f61
+486ee5f3aefb0172c2c5703e19f833e63eb295b9
--- a/gfx/wrench/src/cgfont_to_data.rs
+++ b/gfx/wrench/src/cgfont_to_data.rs
@@ -24,17 +24,17 @@ fn calc_table_checksum<D: Read>(mut data
     // which won't affect the checksum
     sum = sum.wrapping_add(BigEndian::read_u32(&buf));
     sum
 }
 
 fn max_pow_2_less_than(a: u16) -> u16 {
     let x = 1;
     let mut shift = 0;
-    while (x << (shift + 1)) < a {
+    while a > (x << (shift + 1)) {
         shift += 1;
     }
     shift
 }
 
 struct TableRecord {
     tag: u32,
     checksum: u32,
--- a/gfx/wrench/src/rawtest.rs
+++ b/gfx/wrench/src/rawtest.rs
@@ -597,23 +597,16 @@ impl<'a> RawtestHarness<'a> {
         let make_rounded_complex_clip = | rect: &LayoutRect, radius: f32 | -> ComplexClipRegion {
             ComplexClipRegion::new(
                 *rect,
                 BorderRadius::uniform_size(LayoutSize::new(radius, radius)),
                 ClipMode::Clip
             )
         };
 
-        // Add a rounded 100x100 rectangle at 200,0.
-        let rect = LayoutRect::new(LayoutPoint::new(200., 0.), LayoutSize::new(100., 100.));
-        let mut info = LayoutPrimitiveInfo::with_clip(
-            rect, LocalClip::RoundedRect(rect, make_rounded_complex_clip(&rect, 20.)));
-        info.tag = Some((0, 3));
-        builder.push_rect(&info, ColorF::new(1.0, 1.0, 1.0, 1.0));
-
 
         // Add a rectangle that is clipped by a rounded rect clip item.
         let rect = LayoutRect::new(LayoutPoint::new(100., 100.), LayoutSize::new(100., 100.));
         let clip_id = builder.define_clip(rect, vec![make_rounded_complex_clip(&rect, 20.)], None);
         builder.push_clip_id(clip_id);
         let mut info = LayoutPrimitiveInfo::new(rect);
         info.tag = Some((0, 4));
         builder.push_rect(&info, ColorF::new(1.0, 1.0, 1.0, 1.0));
@@ -682,14 +675,13 @@ impl<'a> RawtestHarness<'a> {
             );
 
             assert_hit_test(top_left, vec![(0, 1)]);
             assert_hit_test(WorldPoint::new(bottom_right.x, top_left.y), vec![(0, 1)]);
             assert_hit_test(WorldPoint::new(top_left.x, bottom_right.y), vec![(0, 1)]);
             assert_hit_test(bottom_right, vec![(0, 1)]);
         };
 
-        test_rounded_rectangle(WorldPoint::new(200., 0.), WorldSize::new(100., 100.), (0, 3));
         test_rounded_rectangle(WorldPoint::new(100., 100.), WorldSize::new(100., 100.), (0, 4));
         test_rounded_rectangle(WorldPoint::new(200., 100.), WorldSize::new(100., 100.), (0, 5));
     }
 
 }
--- a/gfx/wrench/src/yaml_frame_reader.rs
+++ b/gfx/wrench/src/yaml_frame_reader.rs
@@ -1252,24 +1252,22 @@ impl YamlFrameReader {
         item: &Yaml,
         info: &mut LayoutPrimitiveInfo,
     ) {
         info.rect = item["bounds"].as_rect().expect("iframe must have bounds");
         let pipeline_id = item["id"].as_pipeline_id().unwrap();
         dl.push_iframe(&info, pipeline_id);
     }
 
-    pub fn get_local_clip_for_item(&mut self, yaml: &Yaml, full_clip: LayoutRect) -> LocalClip {
-        let rect = yaml["clip-rect"].as_rect().unwrap_or(full_clip);
+    pub fn get_complex_clip_for_item(&mut self, yaml: &Yaml) -> Option<ComplexClipRegion> {
         let complex_clip = &yaml["complex-clip"];
-        if !complex_clip.is_badvalue() {
-            LocalClip::RoundedRect(rect, self.to_complex_clip_region(complex_clip))
-        } else {
-            LocalClip::from(rect)
+        if complex_clip.is_badvalue() {
+            return None;
         }
+        Some(self.to_complex_clip_region(complex_clip))
     }
 
     pub fn add_display_list_items_from_yaml(
         &mut self,
         dl: &mut DisplayListBuilder,
         wrench: &mut Wrench,
         yaml: &Yaml,
     ) {
@@ -1312,18 +1310,33 @@ impl YamlFrameReader {
 
             let clip_scroll_info = self.to_clip_and_scroll_info(
                 &item["clip-and-scroll"],
                 dl.pipeline_id
             );
             if let Some(clip_scroll_info) = clip_scroll_info {
                 dl.push_clip_and_scroll_info(clip_scroll_info);
             }
-            let local_clip = self.get_local_clip_for_item(item, full_clip);
-            let mut info = LayoutPrimitiveInfo::with_clip(LayoutRect::zero(), local_clip);
+
+            let complex_clip = self.get_complex_clip_for_item(item);
+            let clip_rect = item["clip-rect"].as_rect().unwrap_or(full_clip);
+
+            let mut pushed_clip = false;
+            if let Some(complex_clip) = complex_clip {
+                match item_type {
+                    "clip" | "clip-chain" | "scroll-frame" => {},
+                    _ => {
+                        let id = dl.define_clip(clip_rect, vec![complex_clip], None);
+                        dl.push_clip_id(id);
+                        pushed_clip = true;
+                    }
+                }
+            }
+
+            let mut info = LayoutPrimitiveInfo::with_clip_rect(LayoutRect::zero(), clip_rect);
             info.is_backface_visible = item["backface-visible"].as_bool().unwrap_or(true);;
             match item_type {
                 "rect" => self.handle_rect(dl, item, &mut info),
                 "clear-rect" => self.handle_clear_rect(dl, item, &mut info),
                 "line" => self.handle_line(dl, item, &mut info),
                 "image" => self.handle_image(dl, wrench, item, &mut info),
                 "yuv-image" => self.handle_yuv_image(dl, wrench, item, &mut info),
                 "text" | "glyphs" => self.handle_text(dl, wrench, item, &mut info),
@@ -1339,19 +1352,25 @@ impl YamlFrameReader {
                 "stacking-context" => {
                     self.add_stacking_context_from_yaml(dl, wrench, item, false, &mut info)
                 }
                 "shadow" => self.handle_push_shadow(dl, item, &mut info),
                 "pop-all-shadows" => self.handle_pop_all_shadows(dl),
                 _ => println!("Skipping unknown item type: {:?}", item),
             }
 
+            if pushed_clip {
+                dl.pop_clip_id();
+
+            }
+
             if clip_scroll_info.is_some() {
                 dl.pop_clip_id();
             }
+
         }
     }
 
     pub fn handle_scroll_frame(
         &mut self,
         dl: &mut DisplayListBuilder,
         wrench: &mut Wrench,
         yaml: &Yaml,
@@ -1430,17 +1449,17 @@ impl YamlFrameReader {
         dl: &mut DisplayListBuilder,
         yaml: &Yaml,
         info: &mut LayoutPrimitiveInfo,
     ) {
         let rect = yaml["bounds"]
             .as_rect()
             .expect("Text shadows require bounds");
         info.rect = rect;
-        info.local_clip = LocalClip::from(rect);
+        info.clip_rect = rect;
         let blur_radius = yaml["blur-radius"].as_f32().unwrap_or(0.0);
         let offset = yaml["offset"].as_vector().unwrap_or(LayoutVector2D::zero());
         let color = yaml["color"].as_colorf().unwrap_or(*BLACK_COLOR);
 
         dl.push_shadow(
             &info,
             Shadow {
                 blur_radius,
@@ -1550,17 +1569,17 @@ impl YamlFrameReader {
             if let Some(size) = yaml["scroll-offset"].as_point() {
                 let external_id = ExternalScrollId(0, dl.pipeline_id);
                 self.scroll_offsets.insert(external_id, LayerPoint::new(size.x, size.y));
             }
         }
 
         let filters = yaml["filters"].as_vec_filter_op().unwrap_or(vec![]);
         info.rect = bounds;
-        info.local_clip = LocalClip::from(bounds);
+        info.clip_rect = bounds;
 
         dl.push_stacking_context(
             &info,
             scroll_policy,
             transform.into(),
             transform_style,
             perspective,
             mix_blend_mode,
--- a/gfx/wrench/src/yaml_frame_writer.rs
+++ b/gfx/wrench/src/yaml_frame_writer.rs
@@ -660,20 +660,17 @@ impl YamlFrameWriter {
             let base = match list_iterator.next() {
                 Some(base) => base,
                 None => break,
             };
 
             let mut v = new_table();
             rect_node(&mut v, "bounds", &base.rect());
 
-            rect_node(&mut v, "clip-rect", base.local_clip().clip_rect());
-            if let &LocalClip::RoundedRect(_, ref region) = base.local_clip() {
-                yaml_node(&mut v, "complex-clip", self.make_complex_clip_node(region));
-            }
+            rect_node(&mut v, "clip-rect", base.clip_rect());
 
             let clip_and_scroll_yaml = match clip_id_mapper.map_info(&base.clip_and_scroll()) {
                 (scroll_id, Some(clip_id)) => {
                     Yaml::Array(vec![Yaml::Integer(scroll_id), Yaml::Integer(clip_id)])
                 }
                 (scroll_id, None) => Yaml::Integer(scroll_id),
             };
             yaml_node(&mut v, "clip-and-scroll", clip_and_scroll_yaml);
@@ -1029,17 +1026,17 @@ impl YamlFrameWriter {
                         let parent = ClipId::ClipChain(parent);
                         u32_node(&mut v, "parent", clip_id_mapper.map_id(&parent) as u32);
                     }
                 }
                 ScrollFrame(item) => {
                     str_node(&mut v, "type", "scroll-frame");
                     usize_node(&mut v, "id", clip_id_mapper.add_id(item.scroll_frame_id));
                     size_node(&mut v, "content-size", &base.rect().size);
-                    rect_node(&mut v, "bounds", &base.local_clip().clip_rect());
+                    rect_node(&mut v, "bounds", &base.clip_rect());
 
                     let (complex_clips, complex_clip_count) = base.complex_clip();
                     if let Some(complex) = self.make_complex_clips_node(
                         complex_clip_count,
                         complex_clips,
                         display_list,
                     ) {
                         yaml_node(&mut v, "complex", complex);
@@ -1047,17 +1044,17 @@ impl YamlFrameWriter {
 
                     if let Some(mask_yaml) = self.make_clip_mask_image_node(&item.image_mask) {
                         yaml_node(&mut v, "image-mask", mask_yaml);
                     }
                 }
                 StickyFrame(item) => {
                     str_node(&mut v, "type", "sticky-frame");
                     usize_node(&mut v, "id", clip_id_mapper.add_id(item.id));
-                    rect_node(&mut v, "bounds", &base.local_clip().clip_rect());
+                    rect_node(&mut v, "bounds", &base.clip_rect());
 
                     if let Some(margin) = item.margins.top {
                         f32_node(&mut v, "margin-top", margin);
                     }
                     if let Some(margin) = item.margins.bottom {
                         f32_node(&mut v, "margin-bottom", margin);
                     }
                     if let Some(margin) = item.margins.left {