Bug 1418315 - Update webrender to commit 81cfbcf0763205f25329adb9b2ff75d1cd56e3f1. r?nical draft
authorKartikaya Gupta <kgupta@mozilla.com>
Tue, 21 Nov 2017 10:40:43 -0500
changeset 701406 0d196daf8565e3f43778a2931e55fb8baac272e9
parent 701250 72ee4800d4156931c89b58bd807af4a3083702bb
child 701407 8459a24fc34e0419779e9d1eeff17684e4a70b9c
push id90141
push userkgupta@mozilla.com
push dateTue, 21 Nov 2017 15:46:02 +0000
reviewersnical
bugs1418315
milestone59.0a1
Bug 1418315 - Update webrender to commit 81cfbcf0763205f25329adb9b2ff75d1cd56e3f1. r?nical MozReview-Commit-ID: 8eGY2WPahzB
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_blur.glsl
gfx/webrender/res/cs_clip_border.glsl
gfx/webrender/res/cs_text_run.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_blend.glsl
gfx/webrender/res/ps_composite.glsl
gfx/webrender/res/ps_hardware_composite.glsl
gfx/webrender/res/ps_line.glsl
gfx/webrender/res/ps_split_composite.glsl
gfx/webrender/src/device.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/color.rs
gfx/webrender_bindings/Cargo.toml
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-d490a74c438d987122c600afca6bb2247ab38637
+81cfbcf0763205f25329adb9b2ff75d1cd56e3f1
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
 name = "webrender"
-version = "0.53.2"
+version = "0.54.0"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
@@ -13,17 +13,17 @@ profiler = ["thread_profiler/thread_prof
 debugger = ["ws", "serde_json", "serde", "serde_derive"]
 
 [dependencies]
 app_units = "0.5.6"
 bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.15.5"
 fxhash = "0.2.1"
-gleam = "0.4.8"
+gleam = "0.4.14"
 lazy_static = "0.2"
 log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
 bitflags = "1.0"
 thread_profiler = "0.1.1"
@@ -32,20 +32,20 @@ ws = { optional = true, version = "0.7.3
 serde_json = { optional = true, version = "1.0" }
 serde = { optional = true, version = "1.0" }
 serde_derive = { optional = true, version = "1.0" }
 
 [dev-dependencies]
 angle = {git = "https://github.com/servo/angle", branch = "servo"}
 env_logger = "0.4"
 rand = "0.3"                # for the benchmarks
-servo-glutin = "0.12"     # for the example apps
+servo-glutin = "0.13"     # for the example apps
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.3", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
-core-graphics = "0.9"
-core-text = { version = "7.0", default-features = false }
+core-graphics = "0.12.2"
+core-text = { version = "8.0", default-features = false }
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -6,20 +6,18 @@
 
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
     ivec2 user_data
 );
 
-// Whether this brush is being drawn on a Picture
-// task (new) or an alpha batch task (legacy).
-// Can be removed once everything uses pictures.
-#define BRUSH_FLAG_USES_PICTURE     (1 << 0)
+#define RASTERIZATION_MODE_LOCAL_SPACE      0.0
+#define RASTERIZATION_MODE_SCREEN_SPACE     1.0
 
 struct BrushInstance {
     int picture_address;
     int prim_address;
     int clip_node_id;
     int scroll_node_id;
     int clip_address;
     int z;
@@ -49,44 +47,45 @@ void main(void) {
     // Load the geometry for this brush. For now, this is simply the
     // local rect of the primitive. In the future, this will support
     // loading segment rects, and other rect formats (glyphs).
     PrimitiveGeometry geom = fetch_primitive_geometry(brush.prim_address);
 
     vec2 device_pos, local_pos;
     RectWithSize local_rect = geom.local_rect;
 
-    if ((brush.flags & BRUSH_FLAG_USES_PICTURE) != 0) {
-        // Fetch the dynamic picture that we are drawing on.
-        PictureTask pic_task = fetch_picture_task(brush.picture_address);
+    // Fetch the dynamic picture that we are drawing on.
+    PictureTask pic_task = fetch_picture_task(brush.picture_address);
+
+    if (pic_task.rasterization_mode == RASTERIZATION_MODE_LOCAL_SPACE) {
 
         local_pos = local_rect.p0 + aPosition.xy * local_rect.size;
 
         // Right now - pictures only support local positions. In the future, this
         // will be expanded to support transform picture types (the common kind).
-        device_pos = pic_task.target_rect.p0 + uDevicePixelRatio * (local_pos - pic_task.content_origin);
+        device_pos = pic_task.common_data.task_rect.p0 +
+                     uDevicePixelRatio * (local_pos - pic_task.content_origin);
 
         // Write the final position transformed by the orthographic device-pixel projection.
         gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
     } else {
-        AlphaBatchTask alpha_task = fetch_alpha_batch_task(brush.picture_address);
         Layer layer = fetch_layer(brush.clip_node_id, brush.scroll_node_id);
         ClipArea clip_area = fetch_clip_area(brush.clip_address);
 
         // Write the normal vertex information out.
         // TODO(gw): Support transform types in brushes. For now,
         //           the old cache image shader didn't support
         //           them yet anyway, so we're not losing any
         //           existing functionality.
         VertexInfo vi = write_vertex(
             geom.local_rect,
             geom.local_clip_rect,
             float(brush.z),
             layer,
-            alpha_task,
+            pic_task,
             geom.local_rect
         );
 
         local_pos = vi.local_pos;
 
         // For brush instances in the alpha pass, always write
         // out clip information.
         // TODO(gw): It's possible that we might want alpha
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -26,30 +26,30 @@ void brush_vs(
     ivec2 user_data
 ) {
     // TODO(gw): For now, this brush_image shader is only
     //           being used to draw items from the intermediate
     //           surface cache (render tasks). In the future
     //           we can expand this to support items from
     //           the normal texture cache and unify this
     //           with the normal image shader.
-    BlurTask task = fetch_blur_task(user_data.x);
-    vUv.z = task.render_target_layer_index;
+    BlurTask blur_task = fetch_blur_task(user_data.x);
+    vUv.z = blur_task.common_data.texture_layer_index;
     vImageKind = user_data.y;
 
 #if defined WR_FEATURE_COLOR_TARGET
     vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
 #else
     vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
-    vColor = task.color;
+    vColor = blur_task.color;
 #endif
 
-    vec2 uv0 = task.target_rect.p0;
-    vec2 src_size = task.target_rect.size * task.scale_factor;
-    vec2 uv1 = uv0 + task.target_rect.size;
+    vec2 uv0 = blur_task.common_data.task_rect.p0;
+    vec2 src_size = blur_task.common_data.task_rect.size * blur_task.scale_factor;
+    vec2 uv1 = uv0 + blur_task.common_data.task_rect.size;
 
     // TODO(gw): In the future we'll probably draw these as segments
     //           with the brush shader. When that occurs, we can
     //           modify the UVs for each segment in the VS, and the
     //           FS can become a simple shader that doesn't need
     //           to adjust the UVs.
 
     switch (vImageKind) {
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -47,18 +47,18 @@ RectWithSize intersect_rect(RectWithSize
 }
 
 // The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 ClipVertexInfo write_clip_tile_vertex(RectWithSize local_clip_rect,
                                       Layer layer,
                                       ClipArea area,
                                       int segment) {
-    vec2 outer_p0 = area.screen_origin_target_index.xy;
-    vec2 outer_p1 = outer_p0 + area.task_bounds.zw - area.task_bounds.xy;
+    vec2 outer_p0 = area.screen_origin;
+    vec2 outer_p1 = outer_p0 + area.common_data.task_rect.size;
     vec2 inner_p0 = area.inner_rect.xy;
     vec2 inner_p1 = area.inner_rect.zw;
 
     vec2 p0, p1;
     switch (segment) {
         case SEGMENT_ALL:
             p0 = outer_p0;
             p1 = outer_p1;
@@ -81,17 +81,19 @@ ClipVertexInfo write_clip_tile_vertex(Re
             break;
     }
 
     vec2 actual_pos = mix(p0, p1, aPosition.xy);
 
     vec4 layer_pos = get_layer_pos(actual_pos / uDevicePixelRatio, layer);
 
     // compute the point position in side the layer, in CSS space
-    vec2 vertex_pos = actual_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
+    vec2 vertex_pos = actual_pos +
+                      area.common_data.task_rect.p0 -
+                      area.screen_origin;
 
     gl_Position = uTransform * vec4(vertex_pos, 0.0, 1);
 
     vLocalBounds = vec4(local_clip_rect.p0, local_clip_rect.p0 + local_clip_rect.size);
 
     ClipVertexInfo vi = ClipVertexInfo(layer_pos.xyw, actual_pos, local_clip_rect);
     return vi;
 }
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -18,54 +18,55 @@ flat varying int vBlurRadius;
 #define DIR_VERTICAL    1
 
 in int aBlurRenderTaskAddress;
 in int aBlurSourceTaskAddress;
 in int aBlurDirection;
 in vec4 aBlurRegion;
 
 void main(void) {
-    RenderTaskData task = fetch_render_task(aBlurRenderTaskAddress);
-    RenderTaskData src_task = fetch_render_task(aBlurSourceTaskAddress);
+    BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
+    RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress);
 
-    vec4 src_rect = src_task.data0;
-    vec4 target_rect = task.data0;
+    RectWithSize src_rect = src_task.task_rect;
+    RectWithSize target_rect = blur_task.common_data.task_rect;
 
 #if defined WR_FEATURE_COLOR_TARGET
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
 #else
     vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
 #endif
-    vUv.z = src_task.data1.x;
-    vBlurRadius = int(3.0 * task.data1.y);
-    vSigma = task.data1.y;
+    vUv.z = src_task.texture_layer_index;
+    vBlurRadius = int(3.0 * blur_task.blur_radius);
+    vSigma = blur_task.blur_radius;
 
     switch (aBlurDirection) {
         case DIR_HORIZONTAL:
             vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
             break;
         case DIR_VERTICAL:
             vOffsetScale = vec2(0.0, 1.0 / texture_size.y);
             break;
     }
 
-    vUvRect = vec4(src_rect.xy + vec2(0.5),
-                   src_rect.xy + src_rect.zw - vec2(0.5));
+    vUvRect = vec4(src_rect.p0 + vec2(0.5),
+                   src_rect.p0 + src_rect.size - vec2(0.5));
     vUvRect /= texture_size.xyxy;
 
     if (aBlurRegion.z > 0.0) {
         vec4 blur_region = aBlurRegion * uDevicePixelRatio;
-        src_rect = vec4(src_rect.xy + blur_region.xy, blur_region.zw);
-        target_rect = vec4(target_rect.xy + blur_region.xy, blur_region.zw);
+        src_rect = RectWithSize(src_rect.p0 + blur_region.xy, blur_region.zw);
+        target_rect.p0 = target_rect.p0 + blur_region.xy;
+        target_rect.size = blur_region.zw;
     }
 
-    vec2 pos = target_rect.xy + target_rect.zw * aPosition.xy;
+    vec2 pos = target_rect.p0 + target_rect.size * aPosition.xy;
 
-    vec2 uv0 = src_rect.xy / texture_size;
-    vec2 uv1 = (src_rect.xy + src_rect.zw) / texture_size;
+    vec2 uv0 = src_rect.p0 / texture_size;
+    vec2 uv1 = (src_rect.p0 + src_rect.size) / texture_size;
     vUv.xy = mix(uv0, uv1, aPosition.xy);
 
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
--- a/gfx/webrender/res/cs_clip_border.glsl
+++ b/gfx/webrender/res/cs_clip_border.glsl
@@ -123,18 +123,18 @@ void main(void) {
     vec4 world_pos = layer.transform * vec4(pos, 0.0, 1.0);
     world_pos.xyz /= world_pos.w;
 
     // Scale into device pixels.
     vec2 device_pos = world_pos.xy * uDevicePixelRatio;
 
     // Position vertex within the render task area.
     vec2 final_pos = device_pos -
-                     area.screen_origin_target_index.xy +
-                     area.task_bounds.xy;
+                     area.screen_origin +
+                     area.common_data.task_rect.p0;
 
     // Calculate the local space position for this vertex.
     vec4 layer_pos = get_layer_pos(world_pos.xy, layer);
     vPos = layer_pos.xyw;
 
     gl_Position = uTransform * vec4(final_pos, 0.0, 1.0);
 }
 #endif
--- a/gfx/webrender/res/cs_text_run.glsl
+++ b/gfx/webrender/res/cs_text_run.glsl
@@ -1,14 +1,12 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define PRIMITIVE_HAS_PICTURE_TASK
-
 #include shared,prim_shared
 
 varying vec3 vUv;
 flat varying vec4 vColor;
 
 #ifdef WR_VERTEX_SHADER
 // Draw a text run to a cache target. These are always
 // drawn un-transformed. These are used for effects such
@@ -27,17 +25,17 @@ void main(void) {
 
     GlyphResource res = fetch_glyph_resource(resource_address);
 
     // Glyphs size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
     vec2 size = (res.uv_rect.zw - res.uv_rect.xy) * res.scale;
     vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
-    vec2 origin = prim.task.target_rect.p0 +
+    vec2 origin = prim.task.common_data.task_rect.p0 +
                   uDevicePixelRatio * (local_pos - prim.task.content_origin);
     vec4 local_rect = vec4(origin, size);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vec2 pos = mix(local_rect.xy,
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -201,121 +201,139 @@ Layer fetch_layer(int clip_node_id, int 
     local_clip_rect.xy -= scroll_node.reference_frame_relative_scroll_offset;
     local_clip_rect.xy -= scroll_node.scroll_offset;
 
     layer.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
 
     return layer;
 }
 
+struct RenderTaskCommonData {
+    RectWithSize task_rect;
+    float texture_layer_index;
+};
+
 struct RenderTaskData {
-    vec4 data0;
-    vec4 data1;
+    RenderTaskCommonData common_data;
+    vec3 data1;
     vec4 data2;
 };
 
-RenderTaskData fetch_render_task(int index) {
-    RenderTaskData task;
-
+RenderTaskData fetch_render_task_data(int index) {
     ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
 
-    task.data0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
-    task.data1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
-    task.data2 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(2, 0));
+    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
+    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
+    vec4 texel2 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(2, 0));
+
+    RectWithSize task_rect = RectWithSize(
+        texel0.xy,
+        texel0.zw
+    );
+
+    RenderTaskCommonData common_data = RenderTaskCommonData(
+        task_rect,
+        texel1.x
+    );
+
+    RenderTaskData data = RenderTaskData(
+        common_data,
+        texel1.yzw,
+        texel2
+    );
 
-    return task;
+    return data;
+}
+
+RenderTaskCommonData fetch_render_task_common_data(int index) {
+    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
+
+    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
+    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
+
+    RectWithSize task_rect = RectWithSize(
+        texel0.xy,
+        texel0.zw
+    );
+
+    RenderTaskCommonData data = RenderTaskCommonData(
+        task_rect,
+        texel1.x
+    );
+
+    return data;
 }
 
 /*
  The dynamic picture that this brush exists on. Right now, it
  contains minimal information. In the future, it will describe
  the transform mode of primitives on this picture, among other things.
  */
 struct PictureTask {
-    RectWithSize target_rect;
-    float render_target_layer_index;
+    RenderTaskCommonData common_data;
     vec2 content_origin;
+    float rasterization_mode;
     vec4 color;
 };
 
 PictureTask fetch_picture_task(int address) {
-    RenderTaskData task_data = fetch_render_task(address);
-
-    RectWithSize target_rect = RectWithSize(
-        task_data.data0.xy,
-        task_data.data0.zw
-    );
+    RenderTaskData task_data = fetch_render_task_data(address);
 
     PictureTask task = PictureTask(
-        target_rect,
-        task_data.data1.x,
-        task_data.data1.yz,
+        task_data.common_data,
+        task_data.data1.xy,
+        task_data.data1.z,
         task_data.data2
     );
 
     return task;
 }
 
 struct BlurTask {
-    RectWithSize target_rect;
-    float render_target_layer_index;
+    RenderTaskCommonData common_data;
     float blur_radius;
     float scale_factor;
     vec4 color;
 };
 
 BlurTask fetch_blur_task(int address) {
-    RenderTaskData task_data = fetch_render_task(address);
+    RenderTaskData task_data = fetch_render_task_data(address);
 
-    return BlurTask(
-        RectWithSize(task_data.data0.xy, task_data.data0.zw),
+    BlurTask task = BlurTask(
+        task_data.common_data,
         task_data.data1.x,
         task_data.data1.y,
-        task_data.data1.z,
         task_data.data2
     );
-}
-
-struct AlphaBatchTask {
-    vec2 screen_space_origin;
-    vec2 render_target_origin;
-    vec2 size;
-    float render_target_layer_index;
-};
-
-AlphaBatchTask fetch_alpha_batch_task(int index) {
-    RenderTaskData data = fetch_render_task(index);
-
-    AlphaBatchTask task;
-    task.render_target_origin = data.data0.xy;
-    task.size = data.data0.zw;
-    task.screen_space_origin = data.data1.xy;
-    task.render_target_layer_index = data.data1.z;
 
     return task;
 }
 
 struct ClipArea {
-    vec4 task_bounds;
-    vec4 screen_origin_target_index;
+    RenderTaskCommonData common_data;
+    vec2 screen_origin;
     vec4 inner_rect;
 };
 
 ClipArea fetch_clip_area(int index) {
     ClipArea area;
 
     if (index == 0x7FFFFFFF) { //special sentinel task index
-        area.task_bounds = vec4(0.0, 0.0, 0.0, 0.0);
-        area.screen_origin_target_index = vec4(0.0, 0.0, 0.0, 0.0);
+        area.common_data = RenderTaskCommonData(
+            RectWithSize(vec2(0.0), vec2(0.0)),
+            0.0
+        );
+        area.screen_origin = vec2(0.0);
         area.inner_rect = vec4(0.0);
     } else {
-        RenderTaskData task = fetch_render_task(index);
-        area.task_bounds = task.data0;
-        area.screen_origin_target_index = task.data1;
-        area.inner_rect = task.data2;
+        RenderTaskData task_data = fetch_render_task_data(index);
+
+        area.common_data = task_data.common_data;
+        area.screen_origin = task_data.data1.xy;
+        area.inner_rect = task_data.data2;
     }
 
     return area;
 }
 
 struct Gradient {
     vec4 start_end_point;
     vec4 tile_size_repeat;
@@ -440,21 +458,17 @@ CompositeInstance fetch_composite_instan
     ci.user_data3 = aData1.w;
 
     return ci;
 }
 
 struct Primitive {
     Layer layer;
     ClipArea clip_area;
-#ifdef PRIMITIVE_HAS_PICTURE_TASK
     PictureTask task;
-#else
-    AlphaBatchTask task;
-#endif
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
     int specific_prim_address;
     int user_data0;
     int user_data1;
     int user_data2;
     float z;
 };
@@ -472,21 +486,17 @@ PrimitiveGeometry fetch_primitive_geomet
 
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Primitive prim;
 
     prim.layer = fetch_layer(pi.clip_node_id, pi.scroll_node_id);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
-#ifdef PRIMITIVE_HAS_PICTURE_TASK
     prim.task = fetch_picture_task(pi.render_task_index);
-#else
-    prim.task = fetch_alpha_batch_task(pi.render_task_index);
-#endif
 
     PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
     prim.local_rect = geom.local_rect;
     prim.local_clip_rect = geom.local_clip_rect;
 
     prim.specific_prim_address = pi.specific_prim_address;
     prim.user_data0 = pi.user_data0;
     prim.user_data1 = pi.user_data1;
@@ -572,17 +582,17 @@ struct VertexInfo {
     vec2 local_pos;
     vec2 screen_pos;
 };
 
 VertexInfo write_vertex(RectWithSize instance_rect,
                         RectWithSize local_clip_rect,
                         float z,
                         Layer layer,
-                        AlphaBatchTask task,
+                        PictureTask task,
                         RectWithSize snap_rect) {
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
     // Clamp to the two local clip rects.
     vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect), layer.local_clip_rect);
 
@@ -592,18 +602,18 @@ VertexInfo write_vertex(RectWithSize ins
     // Transform the current vertex to the world cpace.
     vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos + snap_offset -
-                     task.screen_space_origin +
-                     task.render_target_origin;
+                     task.content_origin +
+                     task.common_data.task_rect.p0;
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(clamped_local_pos, device_pos);
     return vi;
 }
 
 #ifdef WR_FEATURE_TRANSFORM
@@ -632,17 +642,17 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
     return vec2(nx / d, ny / d);
 }
 
 TransformVertexInfo write_transform_vertex(RectWithSize instance_rect,
                                            RectWithSize local_clip_rect,
                                            vec4 clip_edge_mask,
                                            float z,
                                            Layer layer,
-                                           AlphaBatchTask task) {
+                                           PictureTask task) {
     RectWithEndpoint local_rect = to_rect_with_endpoint(instance_rect);
     RectWithSize clip_rect;
     clip_rect.p0 = clamp_rect(local_clip_rect.p0, layer.local_clip_rect);
     clip_rect.size = clamp_rect(local_clip_rect.p0 + local_clip_rect.size, layer.local_clip_rect) - clip_rect.p0;
 
     vec2 current_local_pos, prev_local_pos, next_local_pos;
 
     // Clamp to the two local clip rects.
@@ -702,18 +712,18 @@ TransformVertexInfo write_transform_vert
                                       adjusted_prev_p1,
                                       adjusted_next_p0,
                                       adjusted_next_p1);
 
     vec4 layer_pos = get_layer_pos(device_pos / uDevicePixelRatio, layer);
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos - //Note: `snap_rect` is not used
-                     task.screen_space_origin +
-                     task.render_target_origin;
+                     task.content_origin +
+                     task.common_data.task_rect.p0;
 
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     vLocalBounds = mix(
         vec4(clip_rect.p0, clip_rect.p0 + clip_rect.size),
         vec4(local_rect.p0, local_rect.p1),
         clip_edge_mask
@@ -792,19 +802,24 @@ struct Image {
 };
 
 Image fetch_image(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
     return Image(data[0], data[1]);
 }
 
 void write_clip(vec2 global_pos, ClipArea area) {
-    vec2 uv = global_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
-    vClipMaskUvBounds = area.task_bounds;
-    vClipMaskUv = vec3(uv, area.screen_origin_target_index.z);
+    vec2 uv = global_pos +
+              area.common_data.task_rect.p0 -
+              area.screen_origin;
+    vClipMaskUvBounds = vec4(
+        area.common_data.task_rect.p0,
+        area.common_data.task_rect.p0 + area.common_data.task_rect.size
+    );
+    vClipMaskUv = vec3(uv, area.common_data.texture_layer_index);
 }
 #endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 
 /// Find the appropriate half range to apply the AA smoothstep over.
 /// This range represents a coefficient to go from one CSS pixel to half a device pixel.
 float compute_aa_range(vec2 position) {
--- a/gfx/webrender/res/ps_blend.glsl
+++ b/gfx/webrender/res/ps_blend.glsl
@@ -8,33 +8,33 @@ varying vec3 vUv;
 flat varying vec4 vUvBounds;
 flat varying float vAmount;
 flat varying int vOp;
 flat varying mat4 vColorMat;
 
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
+    PictureTask dest_task = fetch_picture_task(ci.render_task_index);
+    PictureTask src_task = fetch_picture_task(ci.src_task_index);
 
-    vec2 dest_origin = dest_task.render_target_origin -
-                       dest_task.screen_space_origin +
-                       src_task.screen_space_origin;
+    vec2 dest_origin = dest_task.common_data.task_rect.p0 -
+                       dest_task.content_origin +
+                       src_task.content_origin;
 
     vec2 local_pos = mix(dest_origin,
-                         dest_origin + src_task.size,
+                         dest_origin + src_task.common_data.task_rect.size,
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
-    vec2 st0 = src_task.render_target_origin;
-    vec2 st1 = src_task.render_target_origin + src_task.size;
+    vec2 st0 = src_task.common_data.task_rect.p0;
+    vec2 st1 = src_task.common_data.task_rect.p0 + src_task.common_data.task_rect.size;
 
-    vec2 uv = src_task.render_target_origin + aPosition.xy * src_task.size;
-    vUv = vec3(uv / texture_size, src_task.render_target_layer_index);
+    vec2 uv = src_task.common_data.task_rect.p0 + aPosition.xy * src_task.common_data.task_rect.size;
+    vUv = vec3(uv / texture_size, src_task.common_data.texture_layer_index);
     vUvBounds = vec4(st0 + 0.5, st1 - 0.5) / texture_size.xyxy;
 
     vOp = ci.user_data0;
     vAmount = float(ci.user_data1) / 65535.0;
 
     float lumR = 0.2126;
     float lumG = 0.7152;
     float lumB = 0.0722;
--- a/gfx/webrender/res/ps_composite.glsl
+++ b/gfx/webrender/res/ps_composite.glsl
@@ -4,56 +4,39 @@
 
 #include shared,prim_shared
 
 varying vec3 vUv0;
 varying vec3 vUv1;
 flat varying int vOp;
 
 #ifdef WR_VERTEX_SHADER
-struct ReadbackTask {
-    vec2 render_target_origin;
-    vec2 size;
-    float render_target_layer_index;
-};
-
-ReadbackTask fetch_readback_task(int index) {
-    RenderTaskData data = fetch_render_task(index);
-
-    ReadbackTask task;
-    task.render_target_origin = data.data0.xy;
-    task.size = data.data0.zw;
-    task.render_target_layer_index = data.data1.x;
-
-    return task;
-}
-
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
-    ReadbackTask backdrop_task = fetch_readback_task(ci.backdrop_task_index);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
+    PictureTask dest_task = fetch_picture_task(ci.render_task_index);
+    RenderTaskCommonData backdrop_task = fetch_render_task_common_data(ci.backdrop_task_index);
+    PictureTask src_task = fetch_picture_task(ci.src_task_index);
 
-    vec2 dest_origin = dest_task.render_target_origin -
-                       dest_task.screen_space_origin +
-                       src_task.screen_space_origin;
+    vec2 dest_origin = dest_task.common_data.task_rect.p0 -
+                       dest_task.content_origin +
+                       src_task.content_origin;
 
     vec2 local_pos = mix(dest_origin,
-                         dest_origin + src_task.size,
+                         dest_origin + src_task.common_data.task_rect.size,
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
 
-    vec2 st0 = backdrop_task.render_target_origin / texture_size;
-    vec2 st1 = (backdrop_task.render_target_origin + backdrop_task.size) / texture_size;
-    vUv0 = vec3(mix(st0, st1, aPosition.xy), backdrop_task.render_target_layer_index);
+    vec2 st0 = backdrop_task.task_rect.p0 / texture_size;
+    vec2 st1 = (backdrop_task.task_rect.p0 + backdrop_task.task_rect.size) / texture_size;
+    vUv0 = vec3(mix(st0, st1, aPosition.xy), backdrop_task.texture_layer_index);
 
-    st0 = src_task.render_target_origin / texture_size;
-    st1 = (src_task.render_target_origin + src_task.size) / texture_size;
-    vUv1 = vec3(mix(st0, st1, aPosition.xy), src_task.render_target_layer_index);
+    st0 = src_task.common_data.task_rect.p0 / texture_size;
+    st1 = (src_task.common_data.task_rect.p0 + src_task.common_data.task_rect.size) / texture_size;
+    vUv1 = vec3(mix(st0, st1, aPosition.xy), src_task.common_data.texture_layer_index);
 
     vOp = ci.user_data0;
 
     gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
--- a/gfx/webrender/res/ps_hardware_composite.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.glsl
@@ -5,31 +5,31 @@
 #include shared,prim_shared
 
 varying vec3 vUv;
 flat varying vec4 vUvBounds;
 
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
+    PictureTask dest_task = fetch_picture_task(ci.render_task_index);
+    PictureTask src_task = fetch_picture_task(ci.src_task_index);
 
-    vec2 dest_origin = dest_task.render_target_origin -
-                       dest_task.screen_space_origin +
+    vec2 dest_origin = dest_task.common_data.task_rect.p0 -
+                       dest_task.content_origin +
                        vec2(ci.user_data0, ci.user_data1);
 
     vec2 local_pos = mix(dest_origin,
                          dest_origin + vec2(ci.user_data2, ci.user_data3),
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
-    vec2 st0 = src_task.render_target_origin;
-    vec2 st1 = src_task.render_target_origin + src_task.size;
-    vUv = vec3(mix(st0, st1, aPosition.xy) / texture_size, src_task.render_target_layer_index);
+    vec2 st0 = src_task.common_data.task_rect.p0;
+    vec2 st1 = src_task.common_data.task_rect.p0 + src_task.common_data.task_rect.size;
+    vUv = vec3(mix(st0, st1, aPosition.xy) / texture_size, src_task.common_data.texture_layer_index);
     vUvBounds = vec4(st0 + 0.5, st1 - 0.5) / texture_size.xyxy;
 
     gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
--- a/gfx/webrender/res/ps_line.glsl
+++ b/gfx/webrender/res/ps_line.glsl
@@ -1,16 +1,12 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#ifdef WR_FEATURE_CACHE
-    #define PRIMITIVE_HAS_PICTURE_TASK
-#endif
-
 #include shared,prim_shared
 
 varying vec4 vColor;
 flat varying int vStyle;
 flat varying float vAxisSelect;
 flat varying vec4 vParams;
 flat varying vec2 vLocalOrigin;
 
@@ -94,17 +90,17 @@ void main(void) {
                            slope_length,
                            flat_length,
                            size.y);
             break;
         }
     }
 
 #ifdef WR_FEATURE_CACHE
-    vec2 device_origin = prim.task.target_rect.p0 +
+    vec2 device_origin = prim.task.common_data.task_rect.p0 +
                          uDevicePixelRatio * (prim.local_rect.p0 - prim.task.content_origin);
     vec2 device_size = uDevicePixelRatio * prim.local_rect.size;
 
     vec2 device_pos = mix(device_origin,
                           device_origin + device_size,
                           aPosition.xy);
 
     vColor = prim.task.color;
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -32,35 +32,35 @@ vec3 bilerp(vec3 a, vec3 b, vec3 c, vec3
     vec3 x = mix(a, b, t);
     vec3 y = mix(c, d, t);
     return mix(x, y, s);
 }
 
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
     SplitGeometry geometry = fetch_split_geometry(ci.user_data0);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
+    PictureTask src_task = fetch_picture_task(ci.src_task_index);
+    PictureTask dest_task = fetch_picture_task(ci.render_task_index);
 
-    vec2 dest_origin = dest_task.render_target_origin -
-                       dest_task.screen_space_origin;
+    vec2 dest_origin = dest_task.common_data.task_rect.p0 -
+                       dest_task.content_origin;
 
     vec3 world_pos = bilerp(geometry.points[0], geometry.points[1],
                             geometry.points[3], geometry.points[2],
                             aPosition.y, aPosition.x);
     vec4 final_pos = vec4((world_pos.xy + dest_origin) * uDevicePixelRatio, ci.z, 1.0);
 
     gl_Position = uTransform * final_pos;
 
-    vec2 uv_origin = src_task.render_target_origin;
-    vec2 uv_pos = uv_origin + world_pos.xy - src_task.screen_space_origin;
+    vec2 uv_origin = src_task.common_data.task_rect.p0;
+    vec2 uv_pos = uv_origin + world_pos.xy - src_task.content_origin;
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
-    vUv = vec3(uv_pos / texture_size, src_task.render_target_layer_index);
-    vUvTaskBounds = vec4(uv_origin, uv_origin + src_task.size) / texture_size.xyxy;
-    vUvSampleBounds = vec4(uv_origin + 0.5, uv_origin + src_task.size - 0.5) / texture_size.xyxy;
+    vUv = vec3(uv_pos / texture_size, src_task.common_data.texture_layer_index);
+    vUvTaskBounds = vec4(uv_origin, uv_origin + src_task.common_data.task_rect.size) / texture_size.xyxy;
+    vUvSampleBounds = vec4(uv_origin + 0.5, uv_origin + src_task.common_data.task_rect.size - 0.5) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     bvec4 inside = lessThanEqual(vec4(vUvTaskBounds.xy, vUv.xy),
                                  vec4(vUv.xy, vUvTaskBounds.zw));
     if (all(inside)) {
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -3,16 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
 use api::{ColorF, ImageFormat};
 use api::{DeviceIntRect, DeviceUintSize};
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::RenderTargetMode;
+use internal_types::FastHashMap;
 use std::fs::File;
 use std::io::Read;
 use std::iter::repeat;
 use std::mem;
 use std::ops::Add;
 use std::path::PathBuf;
 use std::ptr;
 use std::rc::Rc;
@@ -480,16 +481,71 @@ pub struct FBOId(gl::GLuint);
 pub struct RBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 pub struct VBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 struct IBOId(gl::GLuint);
 
+#[derive(PartialEq, Eq, Hash, Debug)]
+struct ProgramSources {
+    renderer_name: String,
+    vs_source: String,
+    fs_source: String,
+}
+
+impl ProgramSources {
+    fn new(renderer_name: String, vs_source: String, fs_source: String) -> Self {
+        ProgramSources {
+            renderer_name,
+            vs_source,
+            fs_source,
+        }
+    }
+}
+
+struct ProgramBinary {
+    binary: Vec<u8>,
+    format: gl::GLenum,
+}
+
+impl ProgramBinary {
+    fn new(binary: Vec<u8>, format: gl::GLenum) -> Self {
+        ProgramBinary {
+            binary,
+            format
+        }
+    }
+}
+
+pub struct ProgramCache {
+    binaries: FastHashMap<ProgramSources, ProgramBinary>,
+}
+
+impl ProgramCache {
+    pub fn new() -> Self {
+        ProgramCache {
+            binaries: FastHashMap::default(),
+        }
+    }
+
+    fn get(&self, sources: &ProgramSources) -> Option<&ProgramBinary> {
+      self.binaries.get(&sources)
+    }
+
+    fn contains(&self, sources: &ProgramSources) -> bool {
+      self.binaries.contains_key(&sources)
+    }
+
+    fn insert(&mut self, sources: ProgramSources, binary: ProgramBinary) {
+      self.binaries.insert(sources, binary);
+    }
+}
+
 #[derive(Debug, Copy, Clone)]
 pub enum VertexUsageHint {
     Static,
     Dynamic,
     Stream,
 }
 
 impl VertexUsageHint {
@@ -516,17 +572,17 @@ pub struct Capabilities {
 }
 
 #[derive(Clone, Debug)]
 pub enum ShaderError {
     Compilation(String, String), // name, error mssage
     Link(String, String),        // name, error message
 }
 
-pub struct Device {
+pub struct Device<'a> {
     gl: Rc<gl::Gl>,
     // device state
     bound_textures: [gl::GLuint; 16],
     bound_program: gl::GLuint,
     bound_vao: gl::GLuint,
     bound_pbo: gl::GLuint,
     bound_read_fbo: FBOId,
     bound_draw_fbo: FBOId,
@@ -539,29 +595,33 @@ pub struct Device {
 
     // debug
     inside_frame: bool,
 
     // resources
     resource_override_path: Option<PathBuf>,
 
     max_texture_size: u32,
+    renderer_name: String,
+    cached_programs: Option<&'a mut ProgramCache>,
 
     // Frame counter. This is used to map between CPU
     // frames and GPU frames.
     frame_id: FrameId,
 }
 
-impl Device {
+impl<'a> Device<'a> {
     pub fn new(
         gl: Rc<gl::Gl>,
         resource_override_path: Option<PathBuf>,
         _file_changed_handler: Box<FileWatcherHandler>,
+        cached_programs: Option<&mut ProgramCache>,
     ) -> Device {
         let max_texture_size = gl.get_integer_v(gl::MAX_TEXTURE_SIZE) as u32;
+        let renderer_name = gl.get_string(gl::RENDERER);
 
         Device {
             gl,
             resource_override_path,
             // This is initialized to 1 by default, but it is set
             // every frame by the call to begin_frame().
             device_pixel_ratio: 1.0,
             inside_frame: false,
@@ -575,28 +635,34 @@ impl Device {
             bound_vao: 0,
             bound_pbo: 0,
             bound_read_fbo: FBOId(0),
             bound_draw_fbo: FBOId(0),
             default_read_fbo: 0,
             default_draw_fbo: 0,
 
             max_texture_size,
+            renderer_name,
+            cached_programs,
             frame_id: FrameId(0),
         }
     }
 
     pub fn gl(&self) -> &gl::Gl {
         &*self.gl
     }
 
     pub fn rc_gl(&self) -> &Rc<gl::Gl> {
         &self.gl
     }
 
+    pub fn update_program_cache(&mut self, cached_programs: &'a mut ProgramCache) {
+        self.cached_programs = Some(cached_programs);
+    }
+
     pub fn max_texture_size(&self) -> u32 {
         self.max_texture_size
     }
 
     pub fn get_capabilities(&self) -> &Capabilities {
         &self.capabilities
     }
 
@@ -607,17 +673,17 @@ impl Device {
         self.bound_read_fbo = FBOId(0);
         self.bound_draw_fbo = FBOId(0);
     }
 
     pub fn compile_shader(
         gl: &gl::Gl,
         name: &str,
         shader_type: gl::GLenum,
-        source: String,
+        source: &String,
     ) -> Result<gl::GLuint, ShaderError> {
         debug!("compile {:?}", name);
         let id = gl.create_shader(shader_type);
         gl.shader_source(id, &[source.as_bytes()]);
         gl.compile_shader(id);
         let log = gl.get_shader_info_log(id);
         if gl.get_shader_iv(id, gl::COMPILE_STATUS) == (0 as gl::GLint) {
             println!("Failed to compile shader: {:?}\n{}", name, log);
@@ -1081,69 +1147,109 @@ impl Device {
 
         let (vs_source, fs_source) = build_shader_strings(
             gl_version_string,
             features,
             base_filename,
             &self.resource_override_path,
         );
 
-        // Compile the vertex shader
-        let vs_id =
-            match Device::compile_shader(&*self.gl, base_filename, gl::VERTEX_SHADER, vs_source) {
-                Ok(vs_id) => vs_id,
-                Err(err) => return Err(err),
-            };
+        let sources = ProgramSources::new(self.renderer_name.clone(), vs_source, fs_source);
+
+        // Create program
+        let pid = self.gl.create_program();
+
+        let mut loaded = false;
 
-        // Compiler the fragment shader
-        let fs_id =
-            match Device::compile_shader(&*self.gl, base_filename, gl::FRAGMENT_SHADER, fs_source) {
-                Ok(fs_id) => fs_id,
-                Err(err) => {
-                    self.gl.delete_shader(vs_id);
-                    return Err(err);
+        if let Some(ref cached_programs) = self.cached_programs {
+            if let Some(binary) = cached_programs.get(&sources)
+            {
+                self.gl.program_binary(pid, binary.format, &binary.binary);
+
+                if self.gl.get_program_iv(pid, gl::LINK_STATUS) == (0 as gl::GLint) {
+                    let error_log = self.gl.get_program_info_log(pid);
+                    println!(
+                      "Failed to load a program object with a program binary: {:?} renderer {}\n{}",
+                      base_filename,
+                      self.renderer_name,
+                      error_log
+                    );
+                } else {
+                    loaded = true;
                 }
-            };
-
-        // Create program and attach shaders
-        let pid = self.gl.create_program();
-        self.gl.attach_shader(pid, vs_id);
-        self.gl.attach_shader(pid, fs_id);
-
-        // Bind vertex attributes
-        for (i, attr) in descriptor
-            .vertex_attributes
-            .iter()
-            .chain(descriptor.instance_attributes.iter())
-            .enumerate()
-        {
-            self.gl
-                .bind_attrib_location(pid, i as gl::GLuint, attr.name);
+            }
         }
 
-        // Link!
-        self.gl.link_program(pid);
+        if loaded == false {
+            // Compile the vertex shader
+            let vs_id =
+                match Device::compile_shader(&*self.gl, base_filename, gl::VERTEX_SHADER, &sources.vs_source) {
+                    Ok(vs_id) => vs_id,
+                    Err(err) => return Err(err),
+                };
+
+            // Compiler the fragment shader
+            let fs_id =
+                match Device::compile_shader(&*self.gl, base_filename, gl::FRAGMENT_SHADER, &sources.fs_source) {
+                    Ok(fs_id) => fs_id,
+                    Err(err) => {
+                        self.gl.delete_shader(vs_id);
+                        return Err(err);
+                    }
+                };
+
+            // Attach shaders
+            self.gl.attach_shader(pid, vs_id);
+            self.gl.attach_shader(pid, fs_id);
+
+            // Bind vertex attributes
+            for (i, attr) in descriptor
+                .vertex_attributes
+                .iter()
+                .chain(descriptor.instance_attributes.iter())
+                .enumerate()
+            {
+                self.gl
+                    .bind_attrib_location(pid, i as gl::GLuint, attr.name);
+            }
 
-        // GL recommends detaching and deleting shaders once the link
-        // is complete (whether successful or not). This allows the driver
-        // to free any memory associated with the parsing and compilation.
-        self.gl.detach_shader(pid, vs_id);
-        self.gl.detach_shader(pid, fs_id);
-        self.gl.delete_shader(vs_id);
-        self.gl.delete_shader(fs_id);
+            if self.cached_programs.is_some() {
+                self.gl.program_parameter_i(pid, gl::PROGRAM_BINARY_RETRIEVABLE_HINT, gl::TRUE as gl::GLint);
+            }
+
+            // Link!
+            self.gl.link_program(pid);
+
+            // GL recommends detaching and deleting shaders once the link
+            // is complete (whether successful or not). This allows the driver
+            // to free any memory associated with the parsing and compilation.
+            self.gl.detach_shader(pid, vs_id);
+            self.gl.detach_shader(pid, fs_id);
+            self.gl.delete_shader(vs_id);
+            self.gl.delete_shader(fs_id);
 
-        if self.gl.get_program_iv(pid, gl::LINK_STATUS) == (0 as gl::GLint) {
-            let error_log = self.gl.get_program_info_log(pid);
-            println!(
-                "Failed to link shader program: {:?}\n{}",
-                base_filename,
-                error_log
-            );
-            self.gl.delete_program(pid);
-            return Err(ShaderError::Link(base_filename.to_string(), error_log));
+            if self.gl.get_program_iv(pid, gl::LINK_STATUS) == (0 as gl::GLint) {
+                let error_log = self.gl.get_program_info_log(pid);
+                println!(
+                    "Failed to link shader program: {:?}\n{}",
+                    base_filename,
+                    error_log
+                );
+                self.gl.delete_program(pid);
+                return Err(ShaderError::Link(base_filename.to_string(), error_log));
+            }
+        }
+
+        if let Some(ref mut cached_programs) = self.cached_programs {
+            if !cached_programs.contains(&sources) {
+                let (buffer, format) = self.gl.get_program_binary(pid);
+                if buffer.len() > 0 {
+                  cached_programs.insert(sources, ProgramBinary::new(buffer, format));
+                }
+            }
         }
 
         let u_transform = self.gl.get_uniform_location(pid, "uTransform");
         let u_device_pixel_ratio = self.gl.get_uniform_location(pid, "uDevicePixelRatio");
         let u_mode = self.gl.get_uniform_location(pid, "uMode");
 
         let program = Program {
             id: pid,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -6,41 +6,40 @@ use api::{BorderDetails, BorderDisplayIt
 use api::{ClipAndScrollInfo, ClipId, ColorF, PropertyBinding};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
 use api::{ExtendMode, FontRenderMode, LayoutTransform};
 use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
 use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, PipelineId, RepeatMode};
 use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
-use api::{WorldPoint, YuvColorSpace, YuvData};
+use api::{PremultipliedColorF, WorldPoint, YuvColorSpace, YuvData};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains, MAX_CLIP};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
 use euclid::{SideOffsets2D, vec2};
 use frame::FrameId;
 use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet};
-use picture::{PictureCompositeMode, PictureKind, PicturePrimitive};
+use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, RasterizationSpace};
 use prim_store::{TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectangleContent, RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
-use render_task::{RenderTask, RenderTaskLocation};
-use render_task::RenderTaskTree;
+use render_task::{ClearMode, RenderTask, RenderTaskTree};
 use resource_cache::ResourceCache;
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, usize, f32, i32};
 use tiling::{CompositeOps, Frame};
-use tiling::{RenderPass};
+use tiling::{RenderPass, RenderTargetKind};
 use tiling::{RenderTargetContext, ScrollbarPrimitive};
 use util::{self, pack_as_float, RectHelpers, recycle_vec};
 
 #[derive(Debug)]
 pub struct ScrollbarInfo(pub ClipId, pub LayerRect);
 
 /// Properties of a stacking context that are maintained
 /// during creation of the scene. These structures are
@@ -1574,21 +1573,25 @@ impl FrameBuilder {
             profile_counters,
             None,
             scene_properties,
         );
 
         let pic = &mut self.prim_store.cpu_pictures[0];
         pic.runs = prim_run_cmds;
 
-        let root_render_task = RenderTask::new_alpha_batch(
-            DeviceIntPoint::zero(),
-            RenderTaskLocation::Fixed,
+        let root_render_task = RenderTask::new_picture(
+            None,
             PrimitiveIndex(0),
-            None,
+            RenderTargetKind::Color,
+            0.0,
+            0.0,
+            PremultipliedColorF::TRANSPARENT,
+            ClearMode::Transparent,
+            RasterizationSpace::Screen,
             child_tasks,
         );
 
         pic.render_task_id = Some(render_tasks.add(root_render_task));
     }
 
     fn update_scroll_bars(&mut self, clip_scroll_tree: &ClipScrollTree, gpu_cache: &mut GpuCache) {
         static SCROLLBAR_PADDING: f32 = 8.0;
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -134,21 +134,16 @@ impl From<CompositePrimitiveInstance> fo
                 instance.data1,
                 instance.data2,
                 instance.data3,
             ],
         }
     }
 }
 
-// Whether this brush is being drawn on a Picture
-// task (new) or an alpha batch task (legacy).
-// Can be removed once everything uses pictures.
-pub const BRUSH_FLAG_USES_PICTURE: i32 = (1 << 0);
-
 // TODO(gw): While we are comverting things over, we
 //           need to have the instance be the same
 //           size as an old PrimitiveInstance. In the
 //           future, we can compress this vertex
 //           format a lot - e.g. z, render task
 //           addresses etc can reasonably become
 //           a u16 type.
 #[repr(C)]
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -145,15 +145,15 @@ extern crate serde_derive;
 #[cfg(feature = "debugger")]
 extern crate serde_json;
 extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 pub extern crate webrender_api;
 
 #[doc(hidden)]
-pub use device::build_shader_strings;
+pub use device::{build_shader_strings, ProgramCache};
 pub use renderer::{CpuProfile, DebugFlags, GpuProfile, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
 pub use renderer::{ThreadListener};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use webrender_api as api;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -32,16 +32,25 @@ pub enum PictureCompositeMode {
     MixBlend(MixBlendMode),
     /// Apply a CSS filter.
     Filter(FilterOp),
     /// Draw to intermediate surface, copy straight across. This
     /// is used for CSS isolation, and plane splitting.
     Blit,
 }
 
+/// Configure whether the primitives on this picture
+/// should be rasterized in screen space or local space.
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub enum RasterizationSpace {
+    Local = 0,
+    Screen = 1,
+}
+
 #[derive(Debug)]
 pub enum PictureKind {
     TextShadow {
         offset: LayerVector2D,
         color: ColorF,
         blur_radius: f32,
         content_rect: LayerRect,
     },
@@ -90,34 +99,35 @@ pub struct PicturePrimitive {
     // The pipeline that the primitives on this picture belong to.
     pub pipeline_id: PipelineId,
 
     // If true, apply visibility culling to primitives on this
     // picture. For text shadows and box shadows, we want to
     // unconditionally draw them.
     pub cull_children: bool,
 
-    // TODO(gw): Add a mode that specifies if this
-    //           picture should be rasterized in
-    //           screen-space or local-space.
+    /// Configure whether the primitives on this picture
+    /// should be rasterized in screen space or local space.
+    pub rasterization_kind: RasterizationSpace,
 }
 
 impl PicturePrimitive {
     pub fn new_text_shadow(shadow: Shadow, pipeline_id: PipelineId) -> Self {
         PicturePrimitive {
             runs: Vec::new(),
             render_task_id: None,
             kind: PictureKind::TextShadow {
                 offset: shadow.offset,
                 color: shadow.color,
                 blur_radius: shadow.blur_radius,
                 content_rect: LayerRect::zero(),
             },
             pipeline_id,
             cull_children: false,
+            rasterization_kind: RasterizationSpace::Local,
         }
     }
 
     pub fn resolve_scene_properties(&mut self, properties: &SceneProperties) -> bool {
         match self.kind {
             PictureKind::Image { ref mut composite_mode, .. } => {
                 match composite_mode {
                     &mut Some(PictureCompositeMode::Filter(ref mut filter)) => {
@@ -153,16 +163,17 @@ impl PicturePrimitive {
                 color,
                 blur_regions,
                 clip_mode,
                 radii_kind,
                 content_rect: LayerRect::zero(),
             },
             pipeline_id,
             cull_children: false,
+            rasterization_kind: RasterizationSpace::Local,
         }
     }
 
     pub fn new_image(
         composite_mode: Option<PictureCompositeMode>,
         is_in_3d_context: bool,
         pipeline_id: PipelineId,
         reference_frame_id: ClipId,
@@ -176,16 +187,19 @@ impl PicturePrimitive {
                 composite_mode,
                 is_in_3d_context,
                 frame_output_pipeline_id,
                 reference_frame_id,
                 real_local_rect: LayerRect::zero(),
             },
             pipeline_id,
             cull_children: true,
+            // TODO(gw): Make this configurable based on an
+            //           exposed API parameter in StackingContext.
+            rasterization_kind: RasterizationSpace::Screen,
         }
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ClipAndScrollInfo
     ) {
@@ -276,33 +290,37 @@ impl PicturePrimitive {
         }
     }
 
     pub fn prepare_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         render_tasks: &mut RenderTaskTree,
-        screen_rect: &DeviceIntRect,
+        prim_screen_rect: &DeviceIntRect,
         child_tasks: Vec<RenderTaskId>,
         parent_tasks: &mut Vec<RenderTaskId>,
     ) {
         match self.kind {
             PictureKind::Image {
                 ref mut readback_render_task_id,
                 composite_mode,
-                frame_output_pipeline_id,
                 ..
             } => {
                 match composite_mode {
                     Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
-                        let picture_task = RenderTask::new_dynamic_alpha_batch(
-                            screen_rect,
+                        let picture_task = RenderTask::new_picture(
+                            Some(prim_screen_rect.size),
                             prim_index,
-                            None,
+                            RenderTargetKind::Color,
+                            prim_screen_rect.origin.x as f32,
+                            prim_screen_rect.origin.y as f32,
+                            PremultipliedColorF::TRANSPARENT,
+                            ClearMode::Transparent,
+                            self.rasterization_kind,
                             child_tasks,
                         );
 
                         let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
                         let blur_std_deviation = blur_radius.0 as f32;
                         let picture_task_id = render_tasks.add(picture_task);
 
                         let blur_render_task = RenderTask::new_blur(
@@ -314,35 +332,45 @@ impl PicturePrimitive {
                             ClearMode::Transparent,
                             PremultipliedColorF::TRANSPARENT,
                         );
 
                         let blur_render_task_id = render_tasks.add(blur_render_task);
                         self.render_task_id = Some(blur_render_task_id);
                     }
                     Some(PictureCompositeMode::MixBlend(..)) => {
-                        let picture_task = RenderTask::new_dynamic_alpha_batch(
-                            screen_rect,
+                        let picture_task = RenderTask::new_picture(
+                            Some(prim_screen_rect.size),
                             prim_index,
-                            None,
+                            RenderTargetKind::Color,
+                            prim_screen_rect.origin.x as f32,
+                            prim_screen_rect.origin.y as f32,
+                            PremultipliedColorF::TRANSPARENT,
+                            ClearMode::Transparent,
+                            self.rasterization_kind,
                             child_tasks,
                         );
 
-                        let readback_task_id = render_tasks.add(RenderTask::new_readback(*screen_rect));
+                        let readback_task_id = render_tasks.add(RenderTask::new_readback(*prim_screen_rect));
 
                         *readback_render_task_id = Some(readback_task_id);
                         parent_tasks.push(readback_task_id);
 
                         self.render_task_id = Some(render_tasks.add(picture_task));
                     }
                     Some(PictureCompositeMode::Filter(..)) | Some(PictureCompositeMode::Blit) => {
-                        let picture_task = RenderTask::new_dynamic_alpha_batch(
-                            screen_rect,
+                        let picture_task = RenderTask::new_picture(
+                            Some(prim_screen_rect.size),
                             prim_index,
-                            frame_output_pipeline_id,
+                            RenderTargetKind::Color,
+                            prim_screen_rect.origin.x as f32,
+                            prim_screen_rect.origin.y as f32,
+                            PremultipliedColorF::TRANSPARENT,
+                            ClearMode::Transparent,
+                            self.rasterization_kind,
                             child_tasks,
                         );
 
                         self.render_task_id = Some(render_tasks.add(picture_task));
                     }
                     None => {
                         parent_tasks.extend(child_tasks);
                         self.render_task_id = None;
@@ -369,22 +397,25 @@ impl PicturePrimitive {
                 let cache_size = DeviceIntSize::new(cache_width, cache_height);
 
                 // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
                 // "the image that would be generated by applying to the shadow a
                 // Gaussian blur with a standard deviation equal to half the blur radius."
                 let blur_std_deviation = blur_radius.0 as f32 * 0.5;
 
                 let picture_task = RenderTask::new_picture(
-                    cache_size,
+                    Some(cache_size),
                     prim_index,
                     RenderTargetKind::Color,
-                    content_rect.origin,
+                    content_rect.origin.x,
+                    content_rect.origin.y,
                     color.premultiplied(),
                     ClearMode::Transparent,
+                    self.rasterization_kind,
+                    Vec::new(),
                 );
 
                 let picture_task_id = render_tasks.add(picture_task);
 
                 let render_task = RenderTask::new_blur(
                     blur_std_deviation,
                     picture_task_id,
                     render_tasks,
@@ -420,22 +451,25 @@ impl PicturePrimitive {
                         ClearMode::One
                     }
                     BoxShadowClipMode::Inset => {
                         ClearMode::Zero
                     }
                 };
 
                 let picture_task = RenderTask::new_picture(
-                    cache_size,
+                    Some(cache_size),
                     prim_index,
                     RenderTargetKind::Alpha,
-                    content_rect.origin,
+                    content_rect.origin.x,
+                    content_rect.origin.y,
                     color.premultiplied(),
                     ClearMode::Zero,
+                    self.rasterization_kind,
+                    Vec::new(),
                 );
 
                 let picture_task_id = render_tasks.add(picture_task);
 
                 let render_task = RenderTask::new_blur(
                     blur_std_deviation,
                     picture_task_id,
                     render_tasks,
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -11,17 +11,17 @@ use core_foundation::base::TCFType;
 use core_foundation::dictionary::{CFDictionary, CFDictionaryRef};
 use core_foundation::number::{CFNumber, CFNumberRef};
 use core_foundation::string::{CFString, CFStringRef};
 use core_graphics::base::{kCGImageAlphaNoneSkipFirst, kCGImageAlphaPremultipliedFirst};
 use core_graphics::base::kCGBitmapByteOrder32Little;
 use core_graphics::color_space::CGColorSpace;
 use core_graphics::context::{CGContext, CGTextDrawingMode};
 use core_graphics::data_provider::CGDataProvider;
-use core_graphics::font::{CGFont, CGFontRef, CGGlyph};
+use core_graphics::font::{CGFont, CGGlyph};
 use core_graphics::geometry::{CGPoint, CGRect, CGSize};
 use core_text;
 use core_text::font::{CTFont, CTFontRef};
 use core_text::font_descriptor::{kCTFontDefaultOrientation, kCTFontColorGlyphsTrait};
 use gamma_lut::{ColorLut, GammaLut};
 use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
@@ -143,18 +143,16 @@ fn get_glyph_metrics(
 extern {
     static kCTFontVariationAxisIdentifierKey: CFStringRef;
     static kCTFontVariationAxisNameKey: CFStringRef;
     static kCTFontVariationAxisMinimumValueKey: CFStringRef;
     static kCTFontVariationAxisMaximumValueKey: CFStringRef;
     static kCTFontVariationAxisDefaultValueKey: CFStringRef;
 
     fn CTFontCopyVariationAxes(font: CTFontRef) -> CFArrayRef;
-
-    fn CGFontCreateCopyWithVariations(font: CGFontRef, vars: CFDictionaryRef) -> CGFontRef;
 }
 
 fn new_ct_font_with_variations(cg_font: &CGFont, size: Au, variations: &[FontVariation]) -> CTFont {
     unsafe {
         let ct_font = core_text::font::new_from_CGFont(cg_font, size.to_f64_px());
         if variations.is_empty() {
             return ct_font;
         }
@@ -239,18 +237,17 @@ fn new_ct_font_with_variations(cg_font: 
             if val != def_val {
                 vals.push((name, CFNumber::from_f64(val)));
             }
         }
         if vals.is_empty() {
             return ct_font;
         }
         let vals_dict = CFDictionary::from_CFType_pairs(&vals);
-        let cg_var_font_ref = CGFontCreateCopyWithVariations(cg_font.as_concrete_TypeRef(), vals_dict.as_concrete_TypeRef());
-        let cg_var_font: CGFont = TCFType::wrap_under_create_rule(cg_var_font_ref);
+        let cg_var_font = cg_font.create_copy_from_variations(&vals_dict).unwrap();
         core_text::font::new_from_CGFont(&cg_var_font, size.to_f64_px())
     }
 }
 
 impl FontContext {
     pub fn new() -> FontContext {
         debug!("Test for subpixel AA support: {}", supports_subpixel_aa());
 
@@ -270,17 +267,17 @@ impl FontContext {
     }
 
     pub fn add_raw_font(&mut self, font_key: &FontKey, bytes: Arc<Vec<u8>>, index: u32) {
         if self.cg_fonts.contains_key(font_key) {
             return;
         }
 
         assert_eq!(index, 0);
-        let data_provider = CGDataProvider::from_buffer(&**bytes);
+        let data_provider = CGDataProvider::from_buffer(bytes);
         let cg_font = match CGFont::from_data_provider(data_provider) {
             Err(_) => return,
             Ok(cg_font) => cg_font,
         };
         self.cg_fonts.insert((*font_key).clone(), cg_font);
     }
 
     pub fn add_native_font(&mut self, font_key: &FontKey, native_font_handle: NativeFontHandle) {
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -1,18 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use api::{LayerPoint, LayerRect};
-use api::{PipelineId, PremultipliedColorF};
+use api::{LayerPoint, LayerRect, PremultipliedColorF};
 use clip::{ClipSource, ClipSourcesWeakHandle, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use gpu_types::{ClipScrollNodeIndex};
+use picture::RasterizationSpace;
 use prim_store::{PrimitiveIndex};
 use std::{cmp, usize, f32, i32};
 use std::rc::Rc;
 use tiling::{RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind};
 
 const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
 pub const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
@@ -145,25 +145,16 @@ pub enum RenderTaskKey {
 }
 
 #[derive(Debug)]
 pub enum RenderTaskLocation {
     Fixed,
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
 }
 
-#[derive(Debug)]
-pub struct AlphaRenderTask {
-    pub screen_origin: DeviceIntPoint,
-    pub prim_index: PrimitiveIndex,
-    // If this render task is a registered frame output, this
-    // contains the pipeline ID it maps to.
-    pub frame_output_pipeline_id: Option<PipelineId>,
-}
-
 #[derive(Debug, Copy, Clone)]
 #[repr(C)]
 pub enum MaskSegment {
     // This must match the SEGMENT_ values in clip_shared.glsl!
     All = 0,
     TopLeftCorner,
     TopRightCorner,
     BottomLeftCorner,
@@ -235,16 +226,17 @@ pub struct CacheMaskTask {
 }
 
 #[derive(Debug)]
 pub struct PictureTask {
     pub prim_index: PrimitiveIndex,
     pub target_kind: RenderTargetKind,
     pub content_origin: LayerPoint,
     pub color: PremultipliedColorF,
+    pub rasterization_kind: RasterizationSpace,
 }
 
 #[derive(Debug)]
 pub struct BlurTask {
     pub blur_std_deviation: f32,
     pub target_kind: RenderTargetKind,
     pub regions: Vec<LayerRect>,
     pub color: PremultipliedColorF,
@@ -253,17 +245,16 @@ pub struct BlurTask {
 
 #[derive(Debug)]
 pub struct RenderTaskData {
     pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 }
 
 #[derive(Debug)]
 pub enum RenderTaskKind {
-    Alpha(AlphaRenderTask),
     Picture(PictureTask),
     CacheMask(CacheMaskTask),
     VerticalBlur(BlurTask),
     HorizontalBlur(BlurTask),
     Readback(DeviceIntRect),
     Alias(RenderTaskId),
     Scaling(RenderTargetKind),
 }
@@ -283,72 +274,42 @@ pub struct RenderTask {
     pub cache_key: Option<RenderTaskKey>,
     pub location: RenderTaskLocation,
     pub children: Vec<RenderTaskId>,
     pub kind: RenderTaskKind,
     pub clear_mode: ClearMode,
 }
 
 impl RenderTask {
-    // TODO(gw): In the future we'll remove this
-    //           completely and convert everything
-    //           that is an alpha task to a Picture.
-    pub fn new_alpha_batch(
-        screen_origin: DeviceIntPoint,
-        location: RenderTaskLocation,
+    pub fn new_picture(
+        size: Option<DeviceIntSize>,
         prim_index: PrimitiveIndex,
-        frame_output_pipeline_id: Option<PipelineId>,
+        target_kind: RenderTargetKind,
+        content_origin_x: f32,
+        content_origin_y: f32,
+        color: PremultipliedColorF,
+        clear_mode: ClearMode,
+        rasterization_kind: RasterizationSpace,
         children: Vec<RenderTaskId>,
     ) -> Self {
+        let location = match size {
+            Some(size) => RenderTaskLocation::Dynamic(None, size),
+            None => RenderTaskLocation::Fixed,
+        };
+
         RenderTask {
             cache_key: None,
             children,
             location,
-            kind: RenderTaskKind::Alpha(AlphaRenderTask {
-                screen_origin,
-                prim_index,
-                frame_output_pipeline_id,
-            }),
-            clear_mode: ClearMode::Transparent,
-        }
-    }
-
-    pub fn new_dynamic_alpha_batch(
-        rect: &DeviceIntRect,
-        prim_index: PrimitiveIndex,
-        frame_output_pipeline_id: Option<PipelineId>,
-        children: Vec<RenderTaskId>,
-    ) -> Self {
-        let location = RenderTaskLocation::Dynamic(None, rect.size);
-        Self::new_alpha_batch(
-            rect.origin,
-            location,
-            prim_index,
-            frame_output_pipeline_id,
-            children,
-        )
-    }
-
-    pub fn new_picture(
-        size: DeviceIntSize,
-        prim_index: PrimitiveIndex,
-        target_kind: RenderTargetKind,
-        content_origin: LayerPoint,
-        color: PremultipliedColorF,
-        clear_mode: ClearMode,
-    ) -> Self {
-        RenderTask {
-            cache_key: None,
-            children: Vec::new(),
-            location: RenderTaskLocation::Dynamic(None, size),
             kind: RenderTaskKind::Picture(PictureTask {
                 prim_index,
                 target_kind,
-                content_origin,
+                content_origin: LayerPoint::new(content_origin_x, content_origin_y),
                 color,
+                rasterization_kind,
             }),
             clear_mode,
         }
     }
 
     pub fn new_readback(screen_rect: DeviceIntRect) -> Self {
         RenderTask {
             cache_key: None,
@@ -537,140 +498,92 @@ impl RenderTask {
             kind: RenderTaskKind::Scaling(target_kind),
             clear_mode: match target_kind {
                 RenderTargetKind::Color => ClearMode::Transparent,
                 RenderTargetKind::Alpha => ClearMode::One,
             },
         }
     }
 
-    pub fn as_alpha_batch<'a>(&'a self) -> &'a AlphaRenderTask {
-        match self.kind {
-            RenderTaskKind::Alpha(ref task) => task,
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::CacheMask(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Alias(..) |
-            RenderTaskKind::Scaling(..) => unreachable!(),
-        }
-    }
-
     // Write (up to) 8 floats of data specific to the type
     // of render task that is provided to the GPU shaders
     // via a vertex texture.
     pub fn write_task_data(&self) -> RenderTaskData {
         // NOTE: The ordering and layout of these structures are
         //       required to match both the GPU structures declared
         //       in prim_shared.glsl, and also the uses in submit_batch()
         //       in renderer.rs.
         // TODO(gw): Maybe there's a way to make this stuff a bit
         //           more type-safe. Although, it will always need
         //           to be kept in sync with the GLSL code anyway.
 
-        match self.kind {
-            RenderTaskKind::Alpha(ref task) => {
-                let (target_rect, target_index) = self.get_target_rect();
-                RenderTaskData {
-                    data: [
-                        target_rect.origin.x as f32,
-                        target_rect.origin.y as f32,
-                        target_rect.size.width as f32,
-                        target_rect.size.height as f32,
-                        task.screen_origin.x as f32,
-                        task.screen_origin.y as f32,
-                        target_index.0 as f32,
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
+        let (data1, data2) = match self.kind {
+            RenderTaskKind::Picture(ref task) => {
+                (
+                    [
+                        task.content_origin.x,
+                        task.content_origin.y,
+                        task.rasterization_kind as u32 as f32,
+                    ],
+                    task.color.to_array()
+                )
+            }
+            RenderTaskKind::CacheMask(ref task) => {
+                (
+                    [
+                        task.actual_rect.origin.x as f32,
+                        task.actual_rect.origin.y as f32,
                         0.0,
                     ],
-                }
-            }
-            RenderTaskKind::Picture(ref task) => {
-                let (target_rect, target_index) = self.get_target_rect();
-                RenderTaskData {
-                    data: [
-                        target_rect.origin.x as f32,
-                        target_rect.origin.y as f32,
-                        target_rect.size.width as f32,
-                        target_rect.size.height as f32,
-                        target_index.0 as f32,
-                        task.content_origin.x,
-                        task.content_origin.y,
-                        0.0,
-                        task.color.r,
-                        task.color.g,
-                        task.color.b,
-                        task.color.a,
-                    ],
-                }
-            }
-            RenderTaskKind::CacheMask(ref task) => {
-                let (target_rect, target_index) = self.get_target_rect();
-                RenderTaskData {
-                    data: [
-                        target_rect.origin.x as f32,
-                        target_rect.origin.y as f32,
-                        (target_rect.origin.x + target_rect.size.width) as f32,
-                        (target_rect.origin.y + target_rect.size.height) as f32,
-                        task.actual_rect.origin.x as f32,
-                        task.actual_rect.origin.y as f32,
-                        target_index.0 as f32,
-                        0.0,
+                    [
                         task.inner_rect.origin.x as f32,
                         task.inner_rect.origin.y as f32,
                         (task.inner_rect.origin.x + task.inner_rect.size.width) as f32,
                         (task.inner_rect.origin.y + task.inner_rect.size.height) as f32,
                     ],
-                }
+                )
             }
             RenderTaskKind::VerticalBlur(ref task) |
             RenderTaskKind::HorizontalBlur(ref task) => {
-                let (target_rect, target_index) = self.get_target_rect();
-                RenderTaskData {
-                    data: [
-                        target_rect.origin.x as f32,
-                        target_rect.origin.y as f32,
-                        target_rect.size.width as f32,
-                        target_rect.size.height as f32,
-                        target_index.0 as f32,
+                (
+                    [
                         task.blur_std_deviation,
                         task.scale_factor,
                         0.0,
-                        task.color.r,
-                        task.color.g,
-                        task.color.b,
-                        task.color.a,
                     ],
-                }
+                    task.color.to_array()
+                )
             }
             RenderTaskKind::Readback(..) |
-            RenderTaskKind::Scaling(..) => {
-                let (target_rect, target_index) = self.get_target_rect();
-                RenderTaskData {
-                    data: [
-                        target_rect.origin.x as f32,
-                        target_rect.origin.y as f32,
-                        target_rect.size.width as f32,
-                        target_rect.size.height as f32,
-                        target_index.0 as f32,
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
-                    ],
-                }
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::Alias(..) => {
+                (
+                    [0.0; 3],
+                    [0.0; 4],
+                )
             }
-            RenderTaskKind::Alias(..) => RenderTaskData { data: [0.0; 12] },
+        };
+
+        let (target_rect, target_index) = self.get_target_rect();
+
+        RenderTaskData {
+            data: [
+                target_rect.origin.x as f32,
+                target_rect.origin.y as f32,
+                target_rect.size.width as f32,
+                target_rect.size.height as f32,
+                target_index.0 as f32,
+                data1[0],
+                data1[1],
+                data1[2],
+                data2[0],
+                data2[1],
+                data2[2],
+                data2[3],
+            ]
         }
     }
 
     pub fn get_dynamic_size(&self) -> DeviceIntSize {
         match self.location {
             RenderTaskLocation::Fixed => DeviceIntSize::zero(),
             RenderTaskLocation::Dynamic(_, size) => size,
         }
@@ -701,17 +614,16 @@ impl RenderTask {
             RenderTaskLocation::Dynamic(None, _) => {
                 (DeviceIntRect::zero(), RenderTargetIndex(0))
             }
         }
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
-            RenderTaskKind::Alpha(..) |
             RenderTaskKind::Readback(..) => RenderTargetKind::Color,
 
             RenderTaskKind::CacheMask(..) => {
                 RenderTargetKind::Alpha
             }
 
             RenderTaskKind::VerticalBlur(ref task_info) |
             RenderTaskKind::HorizontalBlur(ref task_info) => {
@@ -735,17 +647,16 @@ impl RenderTask {
     // Check if this task wants to be made available as an input
     // to all passes (except the first) in the render task tree.
     // To qualify for this, the task needs to have no children / dependencies.
     // Currently, this is only supported for A8 targets, but it can be
     // trivially extended to also support RGBA8 targets in the future
     // if we decide that is useful.
     pub fn is_shared(&self) -> bool {
         match self.kind {
-            RenderTaskKind::Alpha(..) |
             RenderTaskKind::Picture(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::Scaling(..) => false,
 
             RenderTaskKind::CacheMask(..) => true,
 
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -25,33 +25,34 @@ use debug_render::DebugRenderer;
 #[cfg(feature = "debugger")]
 use debug_server::{self, DebugServer};
 use device::{DepthFunction, Device, FrameId, Program, Texture,
              VertexDescriptor, PBO};
 use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
              VertexAttributeKind};
 use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
              VertexUsageHint, VAO};
+use device::ProgramCache;
 use euclid::{rect, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
 use internal_types::{BatchTextures, SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use internal_types::{CacheTextureId, FastHashMap, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{DebugOutput, RenderTargetMode, TextureUpdateList, TextureUpdateSource};
 use profiler::{BackendProfileCounters, Profiler};
 use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use query::{GpuProfiler, GpuTimer};
 use rayon::Configuration as ThreadPoolConfig;
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
-use render_task::RenderTaskTree;
+use render_task::{RenderTaskKind, RenderTaskTree};
 #[cfg(feature = "debugger")]
 use serde_json;
 use std;
 use std::cmp;
 use std::collections::VecDeque;
 use std::collections::hash_map::Entry;
 use std::f32;
 use std::mem;
@@ -166,46 +167,75 @@ const GPU_SAMPLER_TAG_OPAQUE: GpuProfile
     label: "Opaque Pass",
     color: debug_colors::BLACK,
 };
 const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag {
     label: "Transparent Pass",
     color: debug_colors::BLACK,
 };
 
-#[cfg(feature = "debugger")]
+impl TransformBatchKind {
+    #[cfg(feature = "debugger")]
+    fn debug_name(&self) -> &'static str {
+        match *self {
+            TransformBatchKind::Rectangle(..) => "Rectangle",
+            TransformBatchKind::TextRun(..) => "TextRun",
+            TransformBatchKind::Image(image_buffer_kind, ..) => match image_buffer_kind {
+                ImageBufferKind::Texture2D => "Image (2D)",
+                ImageBufferKind::TextureRect => "Image (Rect)",
+                ImageBufferKind::TextureExternal => "Image (External)",
+                ImageBufferKind::Texture2DArray => "Image (Array)",
+            },
+            TransformBatchKind::YuvImage(..) => "YuvImage",
+            TransformBatchKind::AlignedGradient => "AlignedGradient",
+            TransformBatchKind::AngleGradient => "AngleGradient",
+            TransformBatchKind::RadialGradient => "RadialGradient",
+            TransformBatchKind::BorderCorner => "BorderCorner",
+            TransformBatchKind::BorderEdge => "BorderEdge",
+            TransformBatchKind::Line => "Line",
+        }
+    }
+
+    fn gpu_sampler_tag(&self) -> GpuProfileTag {
+        match *self {
+            TransformBatchKind::Rectangle(_) => GPU_TAG_PRIM_RECT,
+            TransformBatchKind::Line => GPU_TAG_PRIM_LINE,
+            TransformBatchKind::TextRun(..) => GPU_TAG_PRIM_TEXT_RUN,
+            TransformBatchKind::Image(..) => GPU_TAG_PRIM_IMAGE,
+            TransformBatchKind::YuvImage(..) => GPU_TAG_PRIM_YUV_IMAGE,
+            TransformBatchKind::BorderCorner => GPU_TAG_PRIM_BORDER_CORNER,
+            TransformBatchKind::BorderEdge => GPU_TAG_PRIM_BORDER_EDGE,
+            TransformBatchKind::AlignedGradient => GPU_TAG_PRIM_GRADIENT,
+            TransformBatchKind::AngleGradient => GPU_TAG_PRIM_ANGLE_GRADIENT,
+            TransformBatchKind::RadialGradient => GPU_TAG_PRIM_RADIAL_GRADIENT,
+        }
+    }
+}
+
 impl BatchKind {
+    #[cfg(feature = "debugger")]
     fn debug_name(&self) -> &'static str {
         match *self {
             BatchKind::Composite { .. } => "Composite",
             BatchKind::HardwareComposite => "HardwareComposite",
             BatchKind::SplitComposite => "SplitComposite",
             BatchKind::Blend => "Blend",
-            BatchKind::Brush(kind) => {
-                match kind {
-                    BrushBatchKind::Image(..) => "Brush (Image)",
-                }
-            }
-            BatchKind::Transformable(_, kind) => match kind {
-                TransformBatchKind::Rectangle(..) => "Rectangle",
-                TransformBatchKind::TextRun(..) => "TextRun",
-                TransformBatchKind::Image(image_buffer_kind, ..) => match image_buffer_kind {
-                    ImageBufferKind::Texture2D => "Image (2D)",
-                    ImageBufferKind::TextureRect => "Image (Rect)",
-                    ImageBufferKind::TextureExternal => "Image (External)",
-                    ImageBufferKind::Texture2DArray => "Image (Array)",
-                },
-                TransformBatchKind::YuvImage(..) => "YuvImage",
-                TransformBatchKind::AlignedGradient => "AlignedGradient",
-                TransformBatchKind::AngleGradient => "AngleGradient",
-                TransformBatchKind::RadialGradient => "RadialGradient",
-                TransformBatchKind::BorderCorner => "BorderCorner",
-                TransformBatchKind::BorderEdge => "BorderEdge",
-                TransformBatchKind::Line => "Line",
-            },
+            BatchKind::Brush(BrushBatchKind::Image(..)) => "Brush (Image)",
+            BatchKind::Transformable(_, batch_kind) => batch_kind.debug_name(),
+        }
+    }
+
+    fn gpu_sampler_tag(&self) -> GpuProfileTag {
+        match *self {
+            BatchKind::Composite { .. } => GPU_TAG_PRIM_COMPOSITE,
+            BatchKind::HardwareComposite => GPU_TAG_PRIM_HW_COMPOSITE,
+            BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
+            BatchKind::Blend => GPU_TAG_PRIM_BLEND,
+            BatchKind::Brush(BrushBatchKind::Image(_)) => GPU_TAG_BRUSH_IMAGE,
+            BatchKind::Transformable(_, batch_kind) => batch_kind.gpu_sampler_tag(),
         }
     }
 }
 
 bitflags! {
     #[derive(Default)]
     pub struct DebugFlags: u32 {
         const PROFILER_DBG      = 1 << 0;
@@ -1170,20 +1200,20 @@ pub enum ReadPixelsFormat {
 
 struct FrameOutput {
     last_access: FrameId,
     fbo_id: FBOId,
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
-pub struct Renderer {
+pub struct Renderer<'a> {
     result_rx: Receiver<ResultMsg>,
     debug_server: DebugServer,
-    device: Device,
+    device: Device<'a>,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     current_frame: Option<RendererFrame>,
 
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
@@ -1300,17 +1330,17 @@ impl From<ShaderError> for RendererError
 }
 
 impl From<std::io::Error> for RendererError {
     fn from(err: std::io::Error) -> Self {
         RendererError::Thread(err)
     }
 }
 
-impl Renderer {
+impl<'a> Renderer<'a> {
     /// Initializes webrender and creates a `Renderer` and `RenderApiSender`.
     ///
     /// # Examples
     /// Initializes a `Renderer` with some reasonable values. For more information see
     /// [`RendererOptions`][rendereroptions].
     ///
     /// ```rust,ignore
     /// # use webrender::renderer::Renderer;
@@ -1339,16 +1369,17 @@ impl Renderer {
             result_tx: result_tx.clone(),
             notifier: notifier.clone(),
         };
 
         let mut device = Device::new(
             gl,
             options.resource_override_path.clone(),
             Box::new(file_watch_handler),
+            options.cached_programs,
         );
 
         let device_max_size = device.max_texture_size();
         // 512 is the minimum that the texture cache can work with.
         // Broken GL contexts can return a max texture size of zero (See #1260). Better to
         // gracefully fail now than panic as soon as a texture is allocated.
         let min_texture_size = 512;
         if device_max_size < min_texture_size {
@@ -1935,16 +1966,22 @@ impl Renderer {
     }
 
     /// Returns a HashMap containing the pipeline ids that have been received by the renderer and
     /// their respective epochs since the last time the method was called.
     pub fn flush_rendered_epochs(&mut self) -> FastHashMap<PipelineId, Epoch> {
         mem::replace(&mut self.pipeline_epoch_map, FastHashMap::default())
     }
 
+    // update the program cache with new binaries, e.g. when some of the lazy loaded
+    // shader programs got activated in the mean time
+    pub fn update_program_cache(&mut self, cached_programs: &'a mut ProgramCache) {
+        self.device.update_program_cache(cached_programs);
+    }
+
     /// Processes the result queue.
     ///
     /// Should be called before `render()`, as texture cache updates are done here.
     pub fn update(&mut self) {
         profile_scope!("update");
 
         // Pull any pending results and return the most recent.
         while let Ok(msg) = self.result_rx.try_recv() {
@@ -2472,56 +2509,49 @@ impl Renderer {
         &mut self,
         key: &BatchKey,
         instances: &[PrimitiveInstance],
         projection: &Transform3D<f32>,
         render_tasks: &RenderTaskTree,
         render_target: Option<(&Texture, i32)>,
         target_dimensions: DeviceUintSize,
     ) {
-        let marker = match key.kind {
+        match key.kind {
             BatchKind::Composite { .. } => {
-                self.ps_composite
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
-                GPU_TAG_PRIM_COMPOSITE
+                self.ps_composite.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             }
             BatchKind::HardwareComposite => {
                 self.ps_hw_composite
                     .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
-                GPU_TAG_PRIM_HW_COMPOSITE
             }
             BatchKind::SplitComposite => {
                 self.ps_split_composite.bind(
                     &mut self.device,
                     projection,
                     0,
                     &mut self.renderer_errors,
                 );
-                GPU_TAG_PRIM_SPLIT_COMPOSITE
             }
             BatchKind::Blend => {
-                self.ps_blend
-                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
-                GPU_TAG_PRIM_BLEND
+                self.ps_blend.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             }
             BatchKind::Brush(brush_kind) => {
                 match brush_kind {
                     BrushBatchKind::Image(target_kind) => {
                         let shader = match target_kind {
                             RenderTargetKind::Alpha => &mut self.brush_image_a8,
                             RenderTargetKind::Color => &mut self.brush_image_rgba8,
                         };
                         shader.bind(
                             &mut self.device,
                             key.blend_mode,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
-                        GPU_TAG_BRUSH_IMAGE
                     }
                 }
             }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
                 TransformBatchKind::Rectangle(needs_clipping) => {
                     debug_assert!(
                         !needs_clipping || match key.blend_mode {
                             BlendMode::PremultipliedAlpha |
@@ -2545,108 +2575,99 @@ impl Renderer {
                         self.ps_rectangle.bind(
                             &mut self.device,
                             transform_kind,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
                     }
-                    GPU_TAG_PRIM_RECT
                 }
                 TransformBatchKind::Line => {
                     self.ps_line.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
-                    GPU_TAG_PRIM_LINE
                 }
                 TransformBatchKind::TextRun(..) => {
                     unreachable!("bug: text batches are special cased");
                 }
                 TransformBatchKind::Image(image_buffer_kind) => {
                     self.ps_image[image_buffer_kind as usize]
                         .as_mut()
                         .expect("Unsupported image shader kind")
                         .bind(
                             &mut self.device,
                             transform_kind,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
-                    GPU_TAG_PRIM_IMAGE
                 }
                 TransformBatchKind::YuvImage(image_buffer_kind, format, color_space) => {
                     let shader_index =
                         Renderer::get_yuv_shader_index(image_buffer_kind, format, color_space);
                     self.ps_yuv_image[shader_index]
                         .as_mut()
                         .expect("Unsupported YUV shader kind")
                         .bind(
                             &mut self.device,
                             transform_kind,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
-                    GPU_TAG_PRIM_YUV_IMAGE
                 }
                 TransformBatchKind::BorderCorner => {
                     self.ps_border_corner.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
-                    GPU_TAG_PRIM_BORDER_CORNER
                 }
                 TransformBatchKind::BorderEdge => {
                     self.ps_border_edge.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
-                    GPU_TAG_PRIM_BORDER_EDGE
                 }
                 TransformBatchKind::AlignedGradient => {
                     self.ps_gradient.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
-                    GPU_TAG_PRIM_GRADIENT
                 }
                 TransformBatchKind::AngleGradient => {
                     self.ps_angle_gradient.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
-                    GPU_TAG_PRIM_ANGLE_GRADIENT
                 }
                 TransformBatchKind::RadialGradient => {
                     self.ps_radial_gradient.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
-                    GPU_TAG_PRIM_RADIAL_GRADIENT
                 }
             },
         };
 
         // Handle special case readback for composites.
         match key.kind {
             BatchKind::Composite {
                 task_id,
@@ -2666,31 +2687,39 @@ impl Renderer {
                 let cache_texture_dimensions = cache_texture.get_dimensions();
 
                 let source = render_tasks.get(source_id);
                 let backdrop = render_tasks.get(task_id);
                 let readback = render_tasks.get(backdrop_id);
 
                 let (readback_rect, readback_layer) = readback.get_target_rect();
                 let (backdrop_rect, _) = backdrop.get_target_rect();
-                let backdrop_screen_origin = backdrop.as_alpha_batch().screen_origin;
-                let source_screen_origin = source.as_alpha_batch().screen_origin;
+                let backdrop_screen_origin = match backdrop.kind {
+                    RenderTaskKind::Picture(ref task_info) => task_info.content_origin,
+                    _ => panic!("bug: composite on non-picture?"),
+                };
+                let source_screen_origin = match source.kind {
+                    RenderTaskKind::Picture(ref task_info) => task_info.content_origin,
+                    _ => panic!("bug: composite on non-picture?"),
+                };
 
                 // Bind the FBO to blit the backdrop to.
                 // Called per-instance in case the layer (and therefore FBO)
                 // changes. The device will skip the GL call if the requested
                 // target is already bound.
                 let cache_draw_target = (cache_texture, readback_layer.0 as i32);
                 self.device
                     .bind_draw_target(Some(cache_draw_target), Some(cache_texture_dimensions));
 
-                let src_x =
-                    backdrop_rect.origin.x - backdrop_screen_origin.x + source_screen_origin.x;
-                let src_y =
-                    backdrop_rect.origin.y - backdrop_screen_origin.y + source_screen_origin.y;
+                let src_x = backdrop_rect.origin.x -
+                            backdrop_screen_origin.x as i32 +
+                            source_screen_origin.x as i32;
+                let src_y = backdrop_rect.origin.y -
+                            backdrop_screen_origin.y as i32 +
+                            source_screen_origin.y as i32;
 
                 let dest_x = readback_rect.origin.x;
                 let dest_y = readback_rect.origin.y;
 
                 let width = readback_rect.size.width;
                 let height = readback_rect.size.height;
 
                 let mut src = DeviceIntRect::new(
@@ -2715,17 +2744,17 @@ impl Renderer {
 
                 // Restore draw target to current pass render target + layer.
                 self.device
                     .bind_draw_target(render_target, Some(target_dimensions));
             }
             _ => {}
         }
 
-        let _timer = self.gpu_profile.start_timer(marker);
+        let _timer = self.gpu_profile.start_timer(key.kind.gpu_sampler_tag());
         self.draw_instanced_batch(instances, VertexArrayKind::Primitive, &key.textures);
     }
 
     fn handle_scaling(
         &mut self,
         render_tasks: &RenderTaskTree,
         scalings: &Vec<ScalingInfo>,
         source: SourceTexture,
@@ -3595,17 +3624,17 @@ impl Renderer {
                 } else {
                     true
                 });
         }
 
         self.unlock_external_images();
     }
 
-    pub fn debug_renderer<'a>(&'a mut self) -> &'a mut DebugRenderer {
+    pub fn debug_renderer<'b>(&'b mut self) -> &'b mut DebugRenderer {
         &mut self.debug
     }
 
     pub fn get_debug_flags(&self) -> DebugFlags {
         self.debug_flags
     }
 
     pub fn set_debug_flags(&mut self, flags: DebugFlags) {
@@ -3854,17 +3883,17 @@ pub trait OutputImageHandler {
     fn unlock(&mut self, pipeline_id: PipelineId);
 }
 
 pub trait ThreadListener {
     fn thread_started(&self, thread_name: &str);
     fn thread_stopped(&self, thread_name: &str);
 }
 
-pub struct RendererOptions {
+pub struct RendererOptions<'a> {
     pub device_pixel_ratio: f32,
     pub resource_override_path: Option<PathBuf>,
     pub enable_aa: bool,
     pub enable_dithering: bool,
     pub max_recorded_profiles: usize,
     pub debug: bool,
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
@@ -3875,22 +3904,23 @@ pub struct RendererOptions {
     pub enable_clear_scissor: bool,
     pub enable_batcher: bool,
     pub max_texture_size: Option<u32>,
     pub workers: Option<Arc<ThreadPool>>,
     pub blob_image_renderer: Option<Box<BlobImageRenderer>>,
     pub recorder: Option<Box<ApiRecordingReceiver>>,
     pub thread_listener: Option<Box<ThreadListener + Send + Sync>>,
     pub enable_render_on_scroll: bool,
+    pub cached_programs: Option<&'a mut ProgramCache>,
     pub debug_flags: DebugFlags,
     pub renderer_id: Option<u64>,
 }
 
-impl Default for RendererOptions {
-    fn default() -> RendererOptions {
+impl<'a> Default for RendererOptions<'a> {
+    fn default() -> RendererOptions<'a> {
         RendererOptions {
             device_pixel_ratio: 1.0,
             resource_override_path: None,
             enable_aa: true,
             enable_dithering: true,
             debug_flags: DebugFlags::empty(),
             max_recorded_profiles: 0,
             debug: false,
@@ -3904,16 +3934,17 @@ impl Default for RendererOptions {
             enable_batcher: true,
             max_texture_size: None,
             workers: None,
             blob_image_renderer: None,
             recorder: None,
             thread_listener: None,
             enable_render_on_scroll: true,
             renderer_id: None,
+            cached_programs: None,
         }
     }
 }
 
 #[cfg(not(feature = "debugger"))]
 pub struct DebugServer;
 
 #[cfg(not(feature = "debugger"))]
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -12,17 +12,17 @@ use border::{BorderCornerInstance, Borde
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
 use device::Texture;
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, BrushImageKind, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
-use gpu_types::{BRUSH_FLAG_USES_PICTURE, ClipScrollNodeIndex, ClipScrollNodeData};
+use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
 use picture::{PictureCompositeMode, PictureKind, PicturePrimitive};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, PrimitiveRun, RectangleContent};
 use profiler::FrameProfileCounters;
 use render_task::{ClipWorkItem, MaskGeometryKind, MaskSegment};
@@ -1016,27 +1016,35 @@ impl AlphaBatcher {
         &mut self,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         for task_id in &self.tasks {
             let task_id = *task_id;
-            let task = render_tasks.get(task_id).as_alpha_batch();
-            let pic = &ctx.prim_store.cpu_pictures[ctx.prim_store.cpu_metadata[task.prim_index.0].cpu_prim_index.0];
-            pic.add_to_batch(
-                task_id,
-                ctx,
-                gpu_cache,
-                render_tasks,
-                deferred_resolves,
-                &mut self.batch_list,
-                &mut self.glyph_fetch_buffer
-            );
+            let task = render_tasks.get(task_id);
+            match task.kind {
+                RenderTaskKind::Picture(ref pic_task) => {
+                    let pic_index = ctx.prim_store.cpu_metadata[pic_task.prim_index.0].cpu_prim_index;
+                    let pic = &ctx.prim_store.cpu_pictures[pic_index.0];
+                    pic.add_to_batch(
+                        task_id,
+                        ctx,
+                        gpu_cache,
+                        render_tasks,
+                        deferred_resolves,
+                        &mut self.batch_list,
+                        &mut self.glyph_fetch_buffer
+                    );
+                }
+                _ => {
+                    unreachable!();
+                }
+            }
         }
 
         self.batch_list.finalize();
     }
 
     pub fn is_empty(&self) -> bool {
         self.batch_list.opaque_batch_list.batches.is_empty() &&
             self.batch_list.alpha_batch_list.batches.is_empty()
@@ -1408,28 +1416,16 @@ impl RenderTarget for ColorRenderTarget 
         _: &ClipStore,
     ) {
         let task = render_tasks.get(task_id);
 
         match task.kind {
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: add_task() called on invalidated task");
             }
-            RenderTaskKind::Alpha(ref info) => {
-                self.alpha_batcher.add_task(task_id);
-
-                // If this pipeline is registered as a frame output
-                // store the information necessary to do the copy.
-                if let Some(pipeline_id) = info.frame_output_pipeline_id {
-                    self.outputs.push(FrameOutput {
-                        pipeline_id,
-                        task_id,
-                    });
-                }
-            }
             RenderTaskKind::VerticalBlur(ref info) => {
                 info.add_instances(
                     &mut self.vertical_blurs,
                     task_id,
                     task.children[0],
                     BlurDirection::Vertical,
                     render_tasks,
                 );
@@ -1444,71 +1440,88 @@ impl RenderTarget for ColorRenderTarget 
                 );
             }
             RenderTaskKind::Picture(ref task_info) => {
                 let prim_metadata = ctx.prim_store.get_metadata(task_info.prim_index);
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Picture => {
                         let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
 
-                        let task_index = render_tasks.get_task_address(task_id);
-
-                        for run in &prim.runs {
-                            for i in 0 .. run.count {
-                                let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
+                        match prim.kind {
+                            PictureKind::Image { frame_output_pipeline_id, .. } => {
+                                self.alpha_batcher.add_task(task_id);
 
-                                let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
-                                let sub_prim_address =
-                                    gpu_cache.get_address(&sub_metadata.gpu_location);
-                                let instance = SimplePrimitiveInstance::new(
-                                    sub_prim_address,
-                                    task_index,
-                                    RenderTaskAddress(0),
-                                    ClipScrollNodeIndex(0),
-                                    ClipScrollNodeIndex(0),
-                                    0,
-                                ); // z is disabled for rendering cache primitives
+                                // If this pipeline is registered as a frame output
+                                // store the information necessary to do the copy.
+                                if let Some(pipeline_id) = frame_output_pipeline_id {
+                                    self.outputs.push(FrameOutput {
+                                        pipeline_id,
+                                        task_id,
+                                    });
+                                }
+                            }
+                            PictureKind::TextShadow { .. } |
+                            PictureKind::BoxShadow { .. } => {
+                                let task_index = render_tasks.get_task_address(task_id);
 
-                                match sub_metadata.prim_kind {
-                                    PrimitiveKind::TextRun => {
-                                        // Add instances that reference the text run GPU location. Also supply
-                                        // the parent shadow prim address as a user data field, allowing
-                                        // the shader to fetch the shadow parameters.
-                                        let text = &ctx.prim_store.cpu_text_runs
-                                            [sub_metadata.cpu_prim_index.0];
-                                        let text_run_cache_prims = &mut self.text_run_cache_prims;
+                                for run in &prim.runs {
+                                    for i in 0 .. run.count {
+                                        let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
+
+                                        let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
+                                        let sub_prim_address =
+                                            gpu_cache.get_address(&sub_metadata.gpu_location);
+                                        let instance = SimplePrimitiveInstance::new(
+                                            sub_prim_address,
+                                            task_index,
+                                            RenderTaskAddress(0),
+                                            ClipScrollNodeIndex(0),
+                                            ClipScrollNodeIndex(0),
+                                            0,
+                                        ); // z is disabled for rendering cache primitives
 
-                                        let font = text.get_font(ctx.device_pixel_ratio);
+                                        match sub_metadata.prim_kind {
+                                            PrimitiveKind::TextRun => {
+                                                // Add instances that reference the text run GPU location. Also supply
+                                                // the parent shadow prim address as a user data field, allowing
+                                                // the shader to fetch the shadow parameters.
+                                                let text = &ctx.prim_store.cpu_text_runs
+                                                    [sub_metadata.cpu_prim_index.0];
+                                                let text_run_cache_prims = &mut self.text_run_cache_prims;
 
-                                        ctx.resource_cache.fetch_glyphs(
-                                            font,
-                                            &text.glyph_keys,
-                                            &mut self.glyph_fetch_buffer,
-                                            gpu_cache,
-                                            |texture_id, _glyph_format, glyphs| {
-                                                let batch = text_run_cache_prims
-                                                    .entry(texture_id)
-                                                    .or_insert(Vec::new());
+                                                let font = text.get_font(ctx.device_pixel_ratio);
 
-                                                for glyph in glyphs {
-                                                    batch.push(instance.build(
-                                                        glyph.index_in_text_run,
-                                                        glyph.uv_rect_address.as_int(),
-                                                        0
-                                                    ));
-                                                }
-                                            },
-                                        );
-                                    }
-                                    PrimitiveKind::Line => {
-                                        self.line_cache_prims
-                                            .push(instance.build(0, 0, 0));
-                                    }
-                                    _ => {
-                                        unreachable!("Unexpected sub primitive type");
+                                                ctx.resource_cache.fetch_glyphs(
+                                                    font,
+                                                    &text.glyph_keys,
+                                                    &mut self.glyph_fetch_buffer,
+                                                    gpu_cache,
+                                                    |texture_id, _glyph_format, glyphs| {
+                                                        let batch = text_run_cache_prims
+                                                            .entry(texture_id)
+                                                            .or_insert(Vec::new());
+
+                                                        for glyph in glyphs {
+                                                            batch.push(instance.build(
+                                                                glyph.index_in_text_run,
+                                                                glyph.uv_rect_address.as_int(),
+                                                                0
+                                                            ));
+                                                        }
+                                                    },
+                                                );
+                                            }
+                                            PrimitiveKind::Line => {
+                                                self.line_cache_prims
+                                                    .push(instance.build(0, 0, 0));
+                                            }
+                                            _ => {
+                                                unreachable!("Unexpected sub primitive type");
+                                            }
+                                        }
                                     }
                                 }
                             }
                         }
                     }
                     _ => {
                         // No other primitives make use of primitive caching yet!
                         unreachable!()
@@ -1587,17 +1600,16 @@ impl RenderTarget for AlphaRenderTarget 
                 panic!("bug: invalid clear mode for alpha task");
             }
         }
 
         match task.kind {
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: add_task() called on invalidated task");
             }
-            RenderTaskKind::Alpha(..) |
             RenderTaskKind::Readback(..) => {
                 panic!("Should not be added to alpha target!");
             }
             RenderTaskKind::VerticalBlur(ref info) => {
                 info.add_instances(
                     &mut self.vertical_blurs,
                     task_id,
                     task.children[0],
@@ -1640,17 +1652,17 @@ impl RenderTarget for AlphaRenderTarget 
                                             //           primitives on picture backed
                                             //           tasks support clip masks and
                                             //           transform primitives, these
                                             //           will need to be filled out!
                                             clip_id: ClipScrollNodeIndex(0),
                                             scroll_id: ClipScrollNodeIndex(0),
                                             clip_task_address: RenderTaskAddress(0),
                                             z: 0,
-                                            flags: BRUSH_FLAG_USES_PICTURE,
+                                            flags: 0,
                                             user_data0: 0,
                                             user_data1: 0,
                                         };
                                         let brush = &ctx.prim_store.cpu_brushes[sub_metadata.cpu_prim_index.0];
                                         let batch = match brush.kind {
                                             BrushKind::Mask { ref kind, .. } => {
                                                 match *kind {
                                                     BrushMaskKind::Corner(..) => &mut self.brush_mask_corners,
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
 name = "webrender_api"
-version = "0.53.2"
+version = "0.54.0"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 
@@ -16,12 +16,12 @@ bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.15"
 ipc-channel = {version = "0.9", optional = true}
 serde = { version = "1.0", features = ["rc", "derive"] }
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
-core-graphics = "0.9"
+core-graphics = "0.12.2"
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
--- a/gfx/webrender_api/src/color.rs
+++ b/gfx/webrender_api/src/color.rs
@@ -21,16 +21,20 @@ pub struct PremultipliedColorF {
     pub a: f32,
 }
 
 impl PremultipliedColorF {
     ///
     pub const BLACK: Self = PremultipliedColorF { r: 0.0, g: 0.0, b: 0.0, a: 1.0 };
     ///
     pub const TRANSPARENT: Self = PremultipliedColorF { r: 0.0, g: 0.0, b: 0.0, a: 0.0 };
+
+    pub fn to_array(&self) -> [f32; 4] {
+        [self.r, self.g, self.b, self.a]
+    }
 }
 
 /// Represents RGBA screen colors with floating point numbers.
 ///
 /// All components must be between 0.0 and 1.0.
 /// An alpha value of 1.0 is opaque while 0.0 is fully transparent.
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -1,27 +1,27 @@
 [package]
 name = "webrender_bindings"
 version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
-webrender_api = {path = "../webrender_api", version = "0.53.2"}
+webrender_api = {path = "../webrender_api", version = "0.54.0"}
 rayon = "0.8"
 thread_profiler = "0.1.1"
 euclid = "0.15"
 app_units = "0.5.6"
-gleam = "0.4"
+gleam = "0.4.14"
 log = "0.3"
 
 [dependencies.webrender]
 path = "../webrender"
-version = "0.53.2"
+version = "0.54.0"
 default-features = false
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
-core-graphics = "0.9"
+core-graphics = "0.12.2"