Bug 1453688 - Update webrender to commit 5bcb7f46c6931633fd20813c46cd69af164effe7. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Tue, 17 Apr 2018 08:22:52 -0400
changeset 783578 aa15aefef0733399173e1431cf53213330d131ae
parent 783549 f94b64e0020225c71701930f193bd96c3ad1d150
child 783579 761c958520b3462cc128d78bc423279580bf71dc
push id106722
push userkgupta@mozilla.com
push dateTue, 17 Apr 2018 12:24:20 +0000
reviewersjrmuizel
bugs1453688
milestone61.0a1
Bug 1453688 - Update webrender to commit 5bcb7f46c6931633fd20813c46cd69af164effe7. r?jrmuizel MozReview-Commit-ID: B71quJ8o6RG
gfx/webrender/res/ps_border_edge.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/border.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/display_list_flattener.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/freelist.rs
gfx/webrender/src/gpu_cache.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/units.rs
gfx/webrender_bindings/revision.txt
gfx/wrench/src/blob.rs
gfx/wrench/src/rawtest.rs
--- a/gfx/webrender/res/ps_border_edge.glsl
+++ b/gfx/webrender/res/ps_border_edge.glsl
@@ -95,17 +95,19 @@ void write_color1(vec4 color, float styl
 
     vColor1 = vec4(min(color.rgb * modulate.y, vec3(color.a)), color.a);
 }
 
 void write_clip_params(float style,
                        float border_width,
                        float edge_length,
                        float edge_offset,
-                       float center_line) {
+                       float center_line,
+                       bool start_corner_has_radius,
+                       bool end_corner_has_radius) {
     // x = offset
     // y = dash on + off length
     // z = dash length
     // w = center line of edge cross-axis (for dots only)
     switch (int(style)) {
         case BORDER_STYLE_DASHED: {
             float desired_dash_length = border_width * 3.0;
             // Consider half total length since there is an equal on/off for each dash.
@@ -116,33 +118,51 @@ void write_clip_params(float style,
                                dash_length,
                                0.0);
             vClipSelect = 0.0;
             break;
         }
         case BORDER_STYLE_DOTTED: {
             float diameter = border_width;
             float radius = 0.5 * diameter;
-            float dot_count = ceil(0.5 * edge_length / diameter);
-            float empty_space = edge_length - dot_count * diameter;
+
+            // If this edge connects a corner with a radius to a corner without a radius, we
+            // act as if we have space for one more dot. This will position the dots so that
+            // there is a half dot on one of the ends.
+            float full_edge_length = edge_length +
+                (float(start_corner_has_radius ^^ end_corner_has_radius) * diameter);
+
+            float dot_count = ceil(0.5 * full_edge_length / diameter);
+            float empty_space = full_edge_length - (dot_count * diameter);
             float distance_between_centers = diameter + empty_space / dot_count;
-            vClipParams = vec4(edge_offset - radius,
+
+            // If the starting corner has a radius, we want to position the half dot right
+            // against that edge.
+            float starting_offset =
+                edge_offset + radius + (-diameter * float(start_corner_has_radius));
+
+            vClipParams = vec4(starting_offset,
                                distance_between_centers,
                                radius,
                                center_line);
+
             vClipSelect = 1.0;
             break;
         }
         default:
             vClipParams = vec4(1.0);
             vClipSelect = 0.0;
             break;
     }
 }
 
+bool hasRadius(vec2 radius) {
+    return any(notEqual(radius, vec2(0.0)));
+}
+
 void main(void) {
     Primitive prim = load_primitive();
     Border border = fetch_border(prim.specific_prim_address);
     int sub_part = prim.user_data0;
     BorderCorners corners = get_border_corners(border, prim.local_rect);
     vec4 color = border.colors[sub_part];
 
     // TODO(gw): Now that all border styles are supported, the
@@ -161,62 +181,70 @@ void main(void) {
             vec4 adjusted_widths = get_effective_border_widths(border, int(border.style.x));
             write_edge_distance(segment_rect.p0.x, border.widths.x, adjusted_widths.x, border.style.x, 0.0, 1.0);
             style = border.style.x;
             color_flip = false;
             write_clip_params(border.style.x,
                               border.widths.x,
                               segment_rect.size.y,
                               segment_rect.p0.y,
-                              segment_rect.p0.x + 0.5 * segment_rect.size.x);
+                              segment_rect.p0.x + 0.5 * segment_rect.size.x,
+                              hasRadius(border.radii[0].xy),
+                              hasRadius(border.radii[1].zw));
             edge_mask = vec4(1.0, 0.0, 1.0, 0.0);
             break;
         }
         case 1: {
             segment_rect.p0 = vec2(corners.tl_inner.x, corners.tl_outer.y);
             segment_rect.size = vec2(corners.tr_inner.x - corners.tl_inner.x, border.widths.y);
             vec4 adjusted_widths = get_effective_border_widths(border, int(border.style.y));
             write_edge_distance(segment_rect.p0.y, border.widths.y, adjusted_widths.y, border.style.y, 1.0, 1.0);
             style = border.style.y;
             color_flip = false;
             write_clip_params(border.style.y,
                               border.widths.y,
                               segment_rect.size.x,
                               segment_rect.p0.x,
-                              segment_rect.p0.y + 0.5 * segment_rect.size.y);
+                              segment_rect.p0.y + 0.5 * segment_rect.size.y,
+                              hasRadius(border.radii[0].xy),
+                              hasRadius(border.radii[0].zw));
             edge_mask = vec4(0.0, 1.0, 0.0, 1.0);
             break;
         }
         case 2: {
             segment_rect.p0 = vec2(corners.tr_outer.x - border.widths.z, corners.tr_inner.y);
             segment_rect.size = vec2(border.widths.z, corners.br_inner.y - corners.tr_inner.y);
             vec4 adjusted_widths = get_effective_border_widths(border, int(border.style.z));
             write_edge_distance(segment_rect.p0.x, border.widths.z, adjusted_widths.z, border.style.z, 0.0, -1.0);
             style = border.style.z;
             color_flip = true;
             write_clip_params(border.style.z,
                               border.widths.z,
                               segment_rect.size.y,
                               segment_rect.p0.y,
-                              segment_rect.p0.x + 0.5 * segment_rect.size.x);
+                              segment_rect.p0.x + 0.5 * segment_rect.size.x,
+                              hasRadius(border.radii[0].zw),
+                              hasRadius(border.radii[1].xy));
             edge_mask = vec4(1.0, 0.0, 1.0, 0.0);
             break;
         }
         case 3: {
             segment_rect.p0 = vec2(corners.bl_inner.x, corners.bl_outer.y - border.widths.w);
             segment_rect.size = vec2(corners.br_inner.x - corners.bl_inner.x, border.widths.w);
             vec4 adjusted_widths = get_effective_border_widths(border, int(border.style.w));
             write_edge_distance(segment_rect.p0.y, border.widths.w, adjusted_widths.w, border.style.w, 1.0, -1.0);
             style = border.style.w;
             color_flip = true;
             write_clip_params(border.style.w,
                               border.widths.w,
                               segment_rect.size.x,
                               segment_rect.p0.x,
-                              segment_rect.p0.y + 0.5 * segment_rect.size.y);
+                              segment_rect.p0.y + 0.5 * segment_rect.size.y,
+                              hasRadius(border.radii[1].zw),
+                              hasRadius(border.radii[1].xy));
             edge_mask = vec4(0.0, 1.0, 0.0, 1.0);
             break;
         }
         default:
             segment_rect.p0 = segment_rect.size = vec2(0.0);
             style = 0.0;
             color_flip = false;
             edge_mask = vec4(0.0);
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -11,17 +11,17 @@ use clip::{ClipSource, ClipStore, ClipWo
 use clip_scroll_tree::{CoordinateSystemId};
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, ClipChainRectIndex, ZBufferId, ZBufferIdGenerator};
 use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, RasterizationSpace};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
-use picture::{PictureCompositeMode, PicturePrimitive};
+use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{CachedGradient, ImageSource, PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushPrimitive, BrushKind, DeferredResolve, EdgeAaSegmentMask, PictureIndex, PrimitiveRun};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind};
 use renderer::BLOCKS_PER_UV_RECT;
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache};
 use std::{usize, f32, i32};
@@ -509,18 +509,22 @@ impl AlphaBatchBuilder {
                 BlendMode::PremultipliedAlpha,
                 BatchTextures::no_texture(),
             );
             let pic_metadata = &ctx.prim_store.cpu_metadata[prim_index.0];
             let brush = &ctx.prim_store.cpu_brushes[pic_metadata.cpu_prim_index.0];
             let pic = &ctx.prim_store.pictures[brush.get_picture_index().0];
             let batch = self.batch_list.get_suitable_batch(key, &pic_metadata.screen_rect.as_ref().expect("bug").clipped);
 
-            let render_task_id = pic.surface.expect("BUG: unexpected surface in splitting");
-            let source_task_address = render_tasks.get_task_address(render_task_id);
+            let source_task_id = pic
+                .surface
+                .as_ref()
+                .expect("BUG: unexpected surface in splitting")
+                .resolve_render_task_id();
+            let source_task_address = render_tasks.get_task_address(source_task_id);
             let gpu_address = gpu_handle.as_int(gpu_cache);
 
             let instance = CompositePrimitiveInstance::new(
                 task_address,
                 source_task_address,
                 RenderTaskAddress(0),
                 gpu_address,
                 0,
@@ -633,17 +637,17 @@ impl AlphaBatchBuilder {
             BlendMode::None
         };
 
         match prim_metadata.prim_kind {
             PrimitiveKind::Brush => {
                 let brush = &ctx.prim_store.cpu_brushes[prim_metadata.cpu_prim_index.0];
 
                 match brush.kind {
-                    BrushKind::Picture { pic_index, source_kind, .. } => {
+                    BrushKind::Picture { pic_index, .. } => {
                         let picture =
                             &ctx.prim_store.pictures[pic_index.0];
 
                         // If this picture is participating in a 3D rendering context,
                         // then don't add it to any batches here. Instead, create a polygon
                         // for it and add it to the current plane splitter.
                         if picture.is_in_3d_context {
                             // Push into parent plane splitter.
@@ -664,118 +668,144 @@ impl AlphaBatchBuilder {
                             return;
                         }
 
                         let add_to_parent_pic = match picture.composite_mode {
                             Some(PictureCompositeMode::Filter(filter)) => {
                                 match filter {
                                     FilterOp::Blur(..) => {
                                         match picture.surface {
-                                            Some(cache_task_id) => {
+                                            Some(ref surface) => {
                                                 let kind = BatchKind::Brush(
                                                     BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
                                                 );
+                                                let (uv_rect_address, textures) = surface
+                                                    .resolve(
+                                                        render_tasks,
+                                                        ctx.resource_cache,
+                                                        gpu_cache,
+                                                    );
                                                 let key = BatchKey::new(
                                                     kind,
                                                     non_segmented_blend_mode,
-                                                    BatchTextures::render_target_cache(),
+                                                    textures,
                                                 );
                                                 let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
 
-                                                let uv_rect_address = render_tasks[cache_task_id]
-                                                    .get_texture_handle()
-                                                    .as_int(gpu_cache);
-
                                                 let instance = BrushInstance {
                                                     picture_address: task_address,
                                                     prim_address: prim_cache_address,
                                                     clip_chain_rect_index,
                                                     scroll_id,
                                                     clip_task_address,
                                                     z,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
                                                     user_data: [
-                                                        uv_rect_address,
+                                                        uv_rect_address.as_int(),
                                                         (BrushImageSourceKind::Color as i32) << 16 |
                                                         RasterizationSpace::Screen as i32,
                                                         picture.extra_gpu_data_handle.as_int(gpu_cache),
                                                     ],
                                                 };
                                                 batch.push(PrimitiveInstance::from(instance));
                                                 false
                                             }
                                             None => {
                                                 true
                                             }
                                         }
                                     }
                                     FilterOp::DropShadow(..) => {
-                                        if let Some(cache_task_id) = picture.surface {
+                                        // Draw an instance of the shadow first, following by the content.
+
+                                        // Both the shadow and the content get drawn as a brush image.
+                                        if let Some(ref surface) = picture.surface {
                                             let kind = BatchKind::Brush(
                                                 BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                                             );
 
-                                            let (textures, task_id) = match source_kind {
-                                                BrushImageSourceKind::Color => {
-                                                    let secondary_id = picture.secondary_render_task_id.expect("no secondary!?");
-                                                    let saved_index = render_tasks[secondary_id].saved_index.expect("no saved index!?");
-                                                    debug_assert_ne!(saved_index, SavedTargetIndex::PENDING);
-                                                    let textures = BatchTextures {
-                                                        colors: [
-                                                            SourceTexture::RenderTaskCache(saved_index),
-                                                            SourceTexture::Invalid,
-                                                            SourceTexture::Invalid,
-                                                        ],
-                                                    };
-                                                    (textures, secondary_id)
-                                                }
-                                                BrushImageSourceKind::ColorAlphaMask => {
-                                                    (BatchTextures::render_target_cache(), cache_task_id)
-                                                }
+                                            // Gets the saved render task ID of the content, which is
+                                            // deeper in the render task tree than the direct child.
+                                            let secondary_id = picture.secondary_render_task_id.expect("no secondary!?");
+                                            let saved_index = render_tasks[secondary_id].saved_index.expect("no saved index!?");
+                                            debug_assert_ne!(saved_index, SavedTargetIndex::PENDING);
+
+                                            // Build BatchTextures for shadow/content
+                                            let shadow_textures = BatchTextures::render_target_cache();
+                                            let content_textures = BatchTextures {
+                                                colors: [
+                                                    SourceTexture::RenderTaskCache(saved_index),
+                                                    SourceTexture::Invalid,
+                                                    SourceTexture::Invalid,
+                                                ],
                                             };
 
-                                            let key = BatchKey::new(
-                                                kind,
-                                                non_segmented_blend_mode,
-                                                textures,
-                                            );
+                                            // Build batch keys for shadow/content
+                                            let shadow_key = BatchKey::new(kind, non_segmented_blend_mode, shadow_textures);
+                                            let content_key = BatchKey::new(kind, non_segmented_blend_mode, content_textures);
 
-                                            let uv_rect_address = render_tasks[task_id]
-                                                .get_texture_handle()
-                                                .as_int(gpu_cache);
+                                            // Retrieve the UV rect addresses for shadow/content.
+                                            let cache_task_id = surface.resolve_render_task_id();
+                                            let shadow_uv_rect_address = render_tasks[cache_task_id]
+                                                .get_texture_address(gpu_cache)
+                                                .as_int();
+                                            let content_uv_rect_address = render_tasks[secondary_id]
+                                                .get_texture_address(gpu_cache)
+                                                .as_int();
 
-                                            let instance = BrushInstance {
+                                            // Get the GPU cache address of the extra data handle.
+                                            let extra_data_address = gpu_cache.get_address(&picture.extra_gpu_data_handle);
+                                            let shadow_prim_address = extra_data_address.offset(3);
+                                            let shadow_data_address = extra_data_address.offset(7);
+
+                                            let shadow_instance = BrushInstance {
                                                 picture_address: task_address,
-                                                prim_address: prim_cache_address,
+                                                prim_address: shadow_prim_address,
                                                 clip_chain_rect_index,
                                                 scroll_id,
                                                 clip_task_address,
                                                 z,
                                                 segment_index: 0,
                                                 edge_flags: EdgeAaSegmentMask::empty(),
                                                 brush_flags: BrushFlags::empty(),
                                                 user_data: [
-                                                    uv_rect_address,
-                                                    (source_kind as i32) << 16 |
+                                                    shadow_uv_rect_address,
+                                                    (BrushImageSourceKind::ColorAlphaMask as i32) << 16 |
                                                     RasterizationSpace::Screen as i32,
-                                                    picture.extra_gpu_data_handle.as_int(gpu_cache),
+                                                    shadow_data_address.as_int(),
                                                 ],
                                             };
 
-                                            let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
-                                            batch.push(PrimitiveInstance::from(instance));
+                                            let content_instance = BrushInstance {
+                                                prim_address: prim_cache_address,
+                                                user_data: [
+                                                    content_uv_rect_address,
+                                                    (BrushImageSourceKind::Color as i32) << 16 |
+                                                    RasterizationSpace::Screen as i32,
+                                                    extra_data_address.as_int(),
+                                                ],
+                                                ..shadow_instance
+                                            };
+
+                                            self.batch_list
+                                                .get_suitable_batch(shadow_key, &task_relative_bounding_rect)
+                                                .push(PrimitiveInstance::from(shadow_instance));
+
+                                            self.batch_list
+                                                .get_suitable_batch(content_key, &task_relative_bounding_rect)
+                                                .push(PrimitiveInstance::from(content_instance));
                                         }
 
                                         false
                                     }
                                     _ => {
                                         match picture.surface {
-                                            Some(cache_task_id) => {
+                                            Some(ref surface) => {
                                                 let key = BatchKey::new(
                                                     BatchKind::Brush(BrushBatchKind::Blend),
                                                     BlendMode::PremultipliedAlpha,
                                                     BatchTextures::render_target_cache(),
                                                 );
 
                                                 let filter_mode = match filter {
                                                     FilterOp::Blur(..) => 0,
@@ -809,16 +839,17 @@ impl AlphaBatchBuilder {
                                                     FilterOp::DropShadow(..) => {
                                                         unreachable!();
                                                     }
                                                     FilterOp::ColorMatrix(_) => {
                                                         picture.extra_gpu_data_handle.as_int(gpu_cache)
                                                     }
                                                 };
 
+                                                let cache_task_id = surface.resolve_render_task_id();
                                                 let cache_task_address = render_tasks.get_task_address(cache_task_id);
 
                                                 let instance = BrushInstance {
                                                     picture_address: task_address,
                                                     prim_address: prim_cache_address,
                                                     clip_chain_rect_index,
                                                     scroll_id,
                                                     clip_task_address,
@@ -840,17 +871,21 @@ impl AlphaBatchBuilder {
                                             None => {
                                                 true
                                             }
                                         }
                                     }
                                 }
                             }
                             Some(PictureCompositeMode::MixBlend(mode)) => {
-                                let cache_task_id = picture.surface.expect("bug: no surface allocated");
+                                let cache_task_id = picture
+                                    .surface
+                                    .as_ref()
+                                    .expect("bug: no surface allocated")
+                                    .resolve_render_task_id();
                                 let backdrop_id = picture.secondary_render_task_id.expect("no backdrop!?");
 
                                 let key = BatchKey::new(
                                     BatchKind::Brush(
                                         BrushBatchKind::MixBlend {
                                             task_id,
                                             source_id: cache_task_id,
                                             backdrop_id,
@@ -879,34 +914,37 @@ impl AlphaBatchBuilder {
                                         source_task_address.0 as i32,
                                     ],
                                 };
 
                                 batch.push(PrimitiveInstance::from(instance));
                                 false
                             }
                             Some(PictureCompositeMode::Blit) => {
-                                let cache_task_id =
-                                    picture.surface.expect("bug: no surface allocated");
+                                let cache_task_id = picture
+                                    .surface
+                                    .as_ref()
+                                    .expect("bug: no surface allocated")
+                                    .resolve_render_task_id();
                                 let kind = BatchKind::Brush(
                                     BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
                                 );
                                 let key = BatchKey::new(
                                     kind,
                                     non_segmented_blend_mode,
                                     BatchTextures::render_target_cache(),
                                 );
                                 let batch = self.batch_list.get_suitable_batch(
                                     key,
                                     &task_relative_bounding_rect
                                 );
 
                                 let uv_rect_address = render_tasks[cache_task_id]
-                                    .get_texture_handle()
-                                    .as_int(gpu_cache);
+                                    .get_texture_address(gpu_cache)
+                                    .as_int();
 
                                 let instance = BrushInstance {
                                     picture_address: task_address,
                                     prim_address: prim_cache_address,
                                     clip_chain_rect_index,
                                     scroll_id,
                                     clip_task_address,
                                     z,
@@ -1039,18 +1077,24 @@ impl AlphaBatchBuilder {
                     ImageSource::Default => {
                         resolve_image(
                             image_cpu.key.request,
                             ctx.resource_cache,
                             gpu_cache,
                             deferred_resolves,
                         )
                     }
-                    ImageSource::Cache { ref item, .. } => {
-                        item.clone()
+                    ImageSource::Cache { ref handle, .. } => {
+                        let rt_handle = handle
+                            .as_ref()
+                            .expect("bug: render task handle not allocated");
+                        let rt_cache_entry = ctx
+                            .resource_cache
+                            .get_cached_render_task(rt_handle);
+                        ctx.resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
                     }
                 };
 
                 if cache_item.texture_id == SourceTexture::Invalid {
                     warn!("Warnings: skip a PrimitiveKind::Image");
                     debug!("at {:?}.", task_relative_bounding_rect);
                     return;
                 }
@@ -1258,28 +1302,32 @@ impl BrushPrimitive {
         &self,
         resource_cache: &ResourceCache,
         gpu_cache: &mut GpuCache,
         deferred_resolves: &mut Vec<DeferredResolve>,
         cached_gradients: &[CachedGradient],
     ) -> Option<(BrushBatchKind, BatchTextures, [i32; 3])> {
         match self.kind {
             BrushKind::Image { request, ref source, .. } => {
-
                 let cache_item = match *source {
                     ImageSource::Default => {
                         resolve_image(
                             request,
                             resource_cache,
                             gpu_cache,
                             deferred_resolves,
                         )
                     }
-                    ImageSource::Cache { ref item, .. } => {
-                        item.clone()
+                    ImageSource::Cache { ref handle, .. } => {
+                        let rt_handle = handle
+                            .as_ref()
+                            .expect("bug: render task handle not allocated");
+                        let rt_cache_entry = resource_cache
+                            .get_cached_render_task(rt_handle);
+                        resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
                     }
                 };
 
                 if cache_item.texture_id == SourceTexture::Invalid {
                     None
                 } else {
                     let textures = BatchTextures::color(cache_item.texture_id);
 
@@ -1437,16 +1485,60 @@ impl AlphaBatchHelpers for PrimitiveStor
                     AlphaType::PremultipliedAlpha => BlendMode::PremultipliedAlpha,
                     AlphaType::Alpha => BlendMode::Alpha,
                 }
             }
         }
     }
 }
 
+impl PictureSurface {
+    // Retrieve the uv rect handle, and texture for a picture surface.
+    fn resolve(
+        &self,
+        render_tasks: &RenderTaskTree,
+        resource_cache: &ResourceCache,
+        gpu_cache: &GpuCache,
+    ) -> (GpuCacheAddress, BatchTextures) {
+        match *self {
+            PictureSurface::TextureCache(ref handle) => {
+                let rt_cache_entry = resource_cache
+                    .get_cached_render_task(handle);
+                let cache_item = resource_cache
+                    .get_texture_cache_item(&rt_cache_entry.handle);
+
+                (
+                    gpu_cache.get_address(&cache_item.uv_rect_handle),
+                    BatchTextures::color(cache_item.texture_id),
+                )
+            }
+            PictureSurface::RenderTask(task_id) => {
+                (
+                    render_tasks[task_id].get_texture_address(gpu_cache),
+                    BatchTextures::render_target_cache(),
+                )
+            }
+        }
+    }
+
+    // Retrieve the render task id for a picture surface. Should only
+    // be used where it's known that this picture surface will never
+    // be persisted in the texture cache.
+    fn resolve_render_task_id(&self) -> RenderTaskId {
+        match *self {
+            PictureSurface::TextureCache(..) => {
+                panic!("BUG: unexpectedly cached render task");
+            }
+            PictureSurface::RenderTask(task_id) => {
+                task_id
+            }
+        }
+    }
+}
+
 pub fn resolve_image(
     request: ImageRequest,
     resource_cache: &ResourceCache,
     gpu_cache: &mut GpuCache,
     deferred_resolves: &mut Vec<DeferredResolve>,
 ) -> CacheItem {
     match resource_cache.get_image_properties(request.key) {
         Some(image_properties) => {
@@ -1615,24 +1707,32 @@ impl ClipBatcher {
                     }
                     ClipSource::LineDecoration(..) => {
                         self.line_decorations.push(ClipMaskInstance {
                             clip_data_address: gpu_address,
                             ..instance
                         });
                     }
                     ClipSource::BoxShadow(ref info) => {
-                        debug_assert_ne!(info.cache_item.texture_id, SourceTexture::Invalid);
+                        let rt_handle = info
+                            .cache_handle
+                            .as_ref()
+                            .expect("bug: render task handle not allocated");
+                        let rt_cache_entry = resource_cache
+                            .get_cached_render_task(rt_handle);
+                        let cache_item = resource_cache
+                            .get_texture_cache_item(&rt_cache_entry.handle);
+                        debug_assert_ne!(cache_item.texture_id, SourceTexture::Invalid);
 
                         self.box_shadows
-                            .entry(info.cache_item.texture_id)
+                            .entry(cache_item.texture_id)
                             .or_insert(Vec::new())
                             .push(ClipMaskInstance {
                                 clip_data_address: gpu_address,
-                                resource_address: gpu_cache.get_address(&info.cache_item.uv_rect_handle),
+                                resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
                                 ..instance
                             });
                     }
                     ClipSource::Rectangle(_, mode) => {
                         if work_item.coordinate_system_id != coordinate_system_id ||
                            mode == ClipMode::ClipOut {
                             self.rectangles.push(ClipMaskInstance {
                                 clip_data_address: gpu_address,
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -89,16 +89,25 @@ impl BorderCornerKind {
         let clip_data = BorderCornerClipData {
             corner_rect: LayerRect::new(origin, size),
             clip_center,
             corner: pack_as_float(corner as u32),
             kind: pack_as_float(kind as u32),
         };
         BorderCornerKind::Mask(clip_data, radius, LayerSize::new(width0, width1), kind)
     }
+
+    fn get_radius(&self, original_radius: &LayerSize) -> LayerSize {
+        match *self {
+            BorderCornerKind::Solid => *original_radius,
+            BorderCornerKind::Clip(..) => *original_radius,
+            BorderCornerKind::Mask(_, ref radius, _, _) => *radius,
+            BorderCornerKind::None => *original_radius,
+        }
+    }
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum BorderEdgeKind {
     None,
     Solid,
     Clip,
 }
@@ -163,24 +172,33 @@ fn get_corner(
         (BorderStyle::Dashed, BorderStyle::Dashed) => BorderCornerKind::new_mask(
             BorderCornerClipKind::Dash,
             width0,
             width1,
             corner,
             *radius,
             *border_rect,
         ),
-        (BorderStyle::Dotted, BorderStyle::Dotted) => BorderCornerKind::new_mask(
-            BorderCornerClipKind::Dot,
-            width0,
-            width1,
-            corner,
-            *radius,
-            *border_rect,
-        ),
+        (BorderStyle::Dotted, BorderStyle::Dotted) => {
+            let mut radius = *radius;
+            if radius.width < width0 {
+                radius.width = 0.0;
+            }
+            if radius.height < width1 {
+                radius.height = 0.0;
+            }
+            BorderCornerKind::new_mask(
+                BorderCornerClipKind::Dot,
+                width0,
+                width1,
+                corner,
+                radius,
+                *border_rect,
+             )
+        }
 
         // Draw border transitions with dots and/or dashes as
         // solid segments. The old border path didn't support
         // this anyway, so we might as well start using the new
         // border path here, since the dashing in the edges is
         // much higher quality anyway.
         (BorderStyle::Dotted, _) |
         (_, BorderStyle::Dotted) |
@@ -261,23 +279,23 @@ pub fn ensure_no_corner_overlap(
     }
 }
 
 impl<'a> DisplayListFlattener<'a> {
     fn add_normal_border_primitive(
         &mut self,
         info: &LayerPrimitiveInfo,
         border: &NormalBorder,
+        radius: &BorderRadius,
         widths: &BorderWidths,
         clip_and_scroll: ScrollNodeAndClipChain,
         corner_instances: [BorderCornerInstance; 4],
         edges: [BorderEdgeKind; 4],
         clip_sources: Vec<ClipSource>,
     ) {
-        let radius = &border.radius;
         let left = &border.left;
         let right = &border.right;
         let top = &border.top;
         let bottom = &border.bottom;
 
         // These colors are used during inset/outset scaling.
         let left_color = left.border_color(1.0, 2.0 / 3.0, 0.3, 0.7).premultiplied();
         let top_color = top.border_color(1.0, 2.0 / 3.0, 0.3, 0.7).premultiplied();
@@ -564,19 +582,27 @@ impl<'a> DisplayListFlattener<'a> {
                     Vec::new(),
                 );
             }
         } else {
             // Create clip masks for border corners, if required.
             let mut extra_clips = Vec::new();
             let mut corner_instances = [BorderCornerInstance::Single; 4];
 
+            let radius = &border.radius;
+            let radius = BorderRadius {
+                top_left: corners[0].get_radius(&radius.top_left),
+                top_right: corners[1].get_radius(&radius.top_right),
+                bottom_right: corners[2].get_radius(&radius.bottom_right),
+                bottom_left: corners[3].get_radius(&radius.bottom_left),
+            };
+
             for (i, corner) in corners.iter().enumerate() {
                 match *corner {
-                    BorderCornerKind::Mask(corner_data, corner_radius, widths, kind) => {
+                    BorderCornerKind::Mask(corner_data, mut corner_radius, widths, kind) => {
                         let clip_source =
                             BorderCornerClipSource::new(corner_data, corner_radius, widths, kind);
                         extra_clips.push(ClipSource::BorderCorner(clip_source));
                     }
                     BorderCornerKind::Clip(instance_kind) => {
                         corner_instances[i] = instance_kind;
                     }
                     BorderCornerKind::Solid => {}
@@ -584,16 +610,17 @@ impl<'a> DisplayListFlattener<'a> {
                         corner_instances[i] = BorderCornerInstance::None;
                     }
                 }
             }
 
             self.add_normal_border_primitive(
                 info,
                 &border,
+                &radius,
                 widths,
                 clip_and_scroll,
                 corner_instances,
                 edges,
                 extra_clips,
             );
         }
     }
@@ -685,33 +712,45 @@ impl BorderCornerClipSource {
                 let desired_count = 0.5 * ellipse.total_arc_length / desired_dash_arc_length;
 
                 // Round that up to the nearest integer, so that the dash length
                 // doesn't exceed the ratio above. Add one extra dash to cover
                 // the last half-dash of the arc.
                 (ellipse, 1 + desired_count.ceil() as usize)
             }
             BorderCornerClipKind::Dot => {
-                // The centers of dots follow an ellipse along the middle of the
-                // border radius.
-                let inner_radius = (corner_radius - widths * 0.5).abs();
-                let ellipse = Ellipse::new(inner_radius);
+                let mut corner_radius = corner_radius;
+                if corner_radius.width < (widths.width / 2.0) {
+                    corner_radius.width = 0.0;
+                }
+                if corner_radius.height < (widths.height / 2.0) {
+                    corner_radius.height = 0.0;
+                }
 
-                // Allocate a "worst case" number of dot clips. This can be
-                // calculated by taking the minimum edge radius, since that
-                // will result in the maximum number of dots along the path.
-                let min_diameter = widths.width.min(widths.height);
+                if corner_radius.width == 0. && corner_radius.height == 0. {
+                    (Ellipse::new(corner_radius), 1)
+                } else {
+                    // The centers of dots follow an ellipse along the middle of the
+                    // border radius.
+                    let inner_radius = (corner_radius - widths * 0.5).abs();
+                    let ellipse = Ellipse::new(inner_radius);
 
-                // Get the number of circles (assuming spacing of one diameter
-                // between dots).
-                let max_dot_count = 0.5 * ellipse.total_arc_length / min_diameter;
+                    // Allocate a "worst case" number of dot clips. This can be
+                    // calculated by taking the minimum edge radius, since that
+                    // will result in the maximum number of dots along the path.
+                    let min_diameter = widths.width.min(widths.height);
 
-                // Add space for one extra dot since they are centered at the
-                // start of the arc.
-                (ellipse, 1 + max_dot_count.ceil() as usize)
+                    // Get the number of circles (assuming spacing of one diameter
+                    // between dots).
+                    let max_dot_count = 0.5 * ellipse.total_arc_length / min_diameter;
+
+                    // Add space for one extra dot since they are centered at the
+                    // start of the arc.
+                    (ellipse, 1 + max_dot_count.ceil() as usize)
+                }
             }
         };
 
         BorderCornerClipSource {
             kind,
             corner_data,
             max_clip_count,
             actual_clip_count: 0,
@@ -739,16 +778,26 @@ impl BorderCornerClipSource {
 
                     let dash_data =
                         BorderCornerDashClipData::new(arc_length0, arc_length1, &self.ellipse);
                     dash_data.write(&mut request);
                 }
 
                 assert_eq!(request.close(), 2 + 2 * self.actual_clip_count);
             }
+            BorderCornerClipKind::Dot if self.max_clip_count == 1 => {
+                let dot_diameter = lerp(self.widths.width, self.widths.height, 0.5);
+                let dot = BorderCornerDotClipData {
+                    center: LayerPoint::new(self.widths.width / 2.0, self.widths.height / 2.0),
+                    radius: 0.5 * dot_diameter,
+                };
+                self.actual_clip_count = 1;
+                dot.write(&mut request);
+                assert_eq!(request.close(), 3);
+            }
             BorderCornerClipKind::Dot => {
                 let mut forward_dots = Vec::new();
                 let mut back_dots = Vec::new();
                 let mut leftover_arc_length = 0.0;
 
                 // Alternate between adding dots at the start and end of the
                 // ellipse arc. This ensures that we always end up with an exact
                 // half dot at each end of the arc, to match up with the edges.
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -5,32 +5,32 @@
 use api::{BorderRadius, BoxShadowClipMode, ClipMode, ColorF, DeviceIntSize, LayerPrimitiveInfo};
 use api::{LayerRect, LayerSize, LayerVector2D, LayoutSize};
 use clip::ClipSource;
 use display_list_flattener::DisplayListFlattener;
 use gpu_cache::GpuCacheHandle;
 use gpu_types::BoxShadowStretchMode;
 use prim_store::{BrushKind, BrushPrimitive, PrimitiveContainer};
 use prim_store::ScrollNodeAndClipChain;
-use resource_cache::CacheItem;
+use render_task::RenderTaskCacheEntryHandle;
 use util::RectHelpers;
 
 #[derive(Debug)]
 pub struct BoxShadowClipSource {
     // Parameters that define the shadow and are constant.
     pub shadow_radius: BorderRadius,
     pub blur_radius: f32,
     pub clip_mode: BoxShadowClipMode,
     pub stretch_mode_x: BoxShadowStretchMode,
     pub stretch_mode_y: BoxShadowStretchMode,
 
     // The current cache key (in device-pixels), and handles
     // to the cached clip region and blurred texture.
     pub cache_key: Option<(DeviceIntSize, BoxShadowCacheKey)>,
-    pub cache_item: CacheItem,
+    pub cache_handle: Option<RenderTaskCacheEntryHandle>,
     pub clip_data_handle: GpuCacheHandle,
 
     // Local-space size of the required render task size.
     pub shadow_rect_alloc_size: LayerSize,
 
     // The minimal shadow rect for the parameters above,
     // used when drawing the shadow rect to be blurred.
     pub minimal_shadow_rect: LayerRect,
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -9,24 +9,27 @@ use border::{BorderCornerClipSource, ens
 use box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowClipSource, BoxShadowCacheKey};
 use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId};
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use gpu_types::{BoxShadowStretchMode, ClipScrollNodeIndex};
 use prim_store::{ClipData, ImageMaskData};
 use render_task::to_cache_size;
-use resource_cache::{CacheItem, ImageRequest, ResourceCache};
+use resource_cache::{ImageRequest, ResourceCache};
 use util::{LayerToWorldFastTransform, MaxRect, calculate_screen_bounding_rect};
 use util::{extract_inner_rect_safe, pack_as_float};
 use std::sync::Arc;
 
-pub type ClipStore = FreeList<ClipSources>;
-pub type ClipSourcesHandle = FreeListHandle<ClipSources>;
-pub type ClipSourcesWeakHandle = WeakFreeListHandle<ClipSources>;
+#[derive(Debug)]
+pub enum ClipStoreMarker {}
+
+pub type ClipStore = FreeList<ClipSources, ClipStoreMarker>;
+pub type ClipSourcesHandle = FreeListHandle<ClipStoreMarker>;
+pub type ClipSourcesWeakHandle = WeakFreeListHandle<ClipStoreMarker>;
 
 #[derive(Debug)]
 pub struct LineDecorationClipSource {
     rect: LayerRect,
     style: LineStyle,
     orientation: LineOrientation,
     wavy_line_thickness: f32,
 }
@@ -232,17 +235,17 @@ impl ClipSource {
         ClipSource::BoxShadow(BoxShadowClipSource {
             shadow_rect_alloc_size,
             shadow_radius,
             prim_shadow_rect,
             blur_radius,
             clip_mode,
             stretch_mode_x,
             stretch_mode_y,
-            cache_item: CacheItem::invalid(),
+            cache_handle: None,
             cache_key: None,
             clip_data_handle: GpuCacheHandle::new(),
             minimal_shadow_rect,
         })
     }
 
     // Return a modified clip source that is the same as self
     // but offset in local-space by a specified amount.
@@ -553,25 +556,27 @@ pub struct ClipChainNode {
 }
 
 #[derive(Debug, Clone)]
 pub struct ClipChain {
     pub parent_index: Option<ClipChainIndex>,
     pub combined_outer_screen_rect: DeviceIntRect,
     pub combined_inner_screen_rect: DeviceIntRect,
     pub nodes: ClipChainNodeRef,
+    pub has_non_root_coord_system: bool,
 }
 
 impl ClipChain {
     pub fn empty(screen_rect: &DeviceIntRect) -> ClipChain {
         ClipChain {
             parent_index: None,
             combined_inner_screen_rect: *screen_rect,
             combined_outer_screen_rect: *screen_rect,
             nodes: None,
+            has_non_root_coord_system: false,
         }
     }
 
     pub fn new_with_added_node(&self, new_node: &ClipChainNode) -> ClipChain {
         // If the new node's inner rectangle completely surrounds our outer rectangle,
         // we can discard the new node entirely since it isn't going to affect anything.
         if new_node.screen_inner_rect.contains_rect(&self.combined_outer_screen_rect) {
             return self.clone();
@@ -594,16 +599,18 @@ impl ClipChain {
 
         self.combined_outer_screen_rect =
             self.combined_outer_screen_rect.intersection(&new_node.screen_outer_rect)
             .unwrap_or_else(DeviceIntRect::zero);
         self.combined_inner_screen_rect =
             self.combined_inner_screen_rect.intersection(&new_node.screen_inner_rect)
             .unwrap_or_else(DeviceIntRect::zero);
 
+        self.has_non_root_coord_system |= new_node.work_item.coordinate_system_id != CoordinateSystemId::root();
+
         self.nodes = Some(Arc::new(new_node));
     }
 }
 
 pub struct ClipChainNodeIter {
     pub current: ClipChainNodeRef,
 }
 
--- a/gfx/webrender/src/display_list_flattener.rs
+++ b/gfx/webrender/src/display_list_flattener.rs
@@ -9,17 +9,16 @@ use api::{DevicePixelScale, DeviceUintRe
 use api::{FilterOp, FontInstanceKey, FontRenderMode, GlyphInstance, GlyphOptions, GradientStop};
 use api::{IframeDisplayItem, ImageKey, ImageRendering, ItemRange, LayerPoint, LayerPrimitiveInfo};
 use api::{LayerRect, LayerSize, LayerVector2D, LayoutRect, LayoutSize, LayoutTransform};
 use api::{LayoutVector2D, LineOrientation, LineStyle, LocalClip, PipelineId, PropertyBinding};
 use api::{RepeatMode, ScrollFrameDisplayItem, ScrollPolicy, ScrollSensitivity, Shadow};
 use api::{SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, TexelRect, TileOffset};
 use api::{TransformStyle, YuvColorSpace, YuvData};
 use app_units::Au;
-use batch::BrushImageSourceKind;
 use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore};
 use clip_scroll_node::{ClipScrollNode, NodeType, StickyFrameInfo};
 use clip_scroll_tree::{ClipChainIndex, ClipScrollNodeIndex, ClipScrollTree};
 use euclid::{SideOffsets2D, vec2};
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use glyph_rasterizer::FontInstance;
 use hit_test::{HitTestingItem, HitTestingRun};
@@ -1065,21 +1064,17 @@ impl<'a> DisplayListFlattener<'a> {
                 None,
                 false,
                 pipeline_id,
                 current_reference_frame_index,
                 None,
                 true,
             );
 
-            let prim = BrushPrimitive::new_picture(
-                container_index,
-                BrushImageSourceKind::Color,
-                LayerVector2D::zero(),
-            );
+            let prim = BrushPrimitive::new_picture(container_index);
 
             let prim_index = self.prim_store.add_primitive(
                 &LayerRect::zero(),
                 &max_clip,
                 is_backface_visible,
                 None,
                 None,
                 PrimitiveContainer::Brush(prim),
@@ -1119,84 +1114,46 @@ impl<'a> DisplayListFlattener<'a> {
                 Some(PictureCompositeMode::Filter(*filter)),
                 false,
                 pipeline_id,
                 current_reference_frame_index,
                 None,
                 true,
             );
 
-            // For drop shadows, add an extra brush::picture primitive
-            // that will draw the picture as an alpha mask.
-            let shadow_prim_index = match *filter {
-                FilterOp::DropShadow(offset, ..) => {
-                    let shadow_prim = BrushPrimitive::new_picture(
-                        src_pic_index,
-                        BrushImageSourceKind::ColorAlphaMask,
-                        offset,
-                    );
-                    Some(self.prim_store.add_primitive(
-                        &LayerRect::zero(),
-                        &max_clip,
-                        is_backface_visible,
-                        None,
-                        None,
-                        PrimitiveContainer::Brush(shadow_prim),
-                    ))
-                }
-                _ => {
-                    None
-                }
-            };
-
-            let src_prim = BrushPrimitive::new_picture(
-                src_pic_index,
-                BrushImageSourceKind::Color,
-                LayoutVector2D::zero(),
-            );
+            let src_prim = BrushPrimitive::new_picture(src_pic_index);
             let src_prim_index = self.prim_store.add_primitive(
                 &LayerRect::zero(),
                 &max_clip,
                 is_backface_visible,
                 None,
                 None,
                 PrimitiveContainer::Brush(src_prim),
             );
 
             let parent_pic = &mut self.prim_store.pictures[parent_pic_index.0];
             parent_pic_index = src_pic_index;
 
-            if let Some(shadow_prim_index) = shadow_prim_index {
-                parent_pic.add_primitive(
-                    shadow_prim_index,
-                    clip_and_scroll,
-                );
-            }
-
             parent_pic.add_primitive(src_prim_index, clip_and_scroll);
 
             self.picture_stack.push(src_pic_index);
         }
 
         // Same for mix-blend-mode.
         if let Some(mix_blend_mode) = composite_ops.mix_blend_mode {
             let src_pic_index = self.prim_store.add_image_picture(
                 Some(PictureCompositeMode::MixBlend(mix_blend_mode)),
                 false,
                 pipeline_id,
                 current_reference_frame_index,
                 None,
                 true,
             );
 
-            let src_prim = BrushPrimitive::new_picture(
-                src_pic_index,
-                BrushImageSourceKind::Color,
-                LayoutVector2D::zero(),
-            );
+            let src_prim = BrushPrimitive::new_picture(src_pic_index);
 
             let src_prim_index = self.prim_store.add_primitive(
                 &LayerRect::zero(),
                 &max_clip,
                 is_backface_visible,
                 None,
                 None,
                 PrimitiveContainer::Brush(src_prim),
@@ -1242,21 +1199,17 @@ impl<'a> DisplayListFlattener<'a> {
             participating_in_3d_context,
             pipeline_id,
             current_reference_frame_index,
             frame_output_pipeline_id,
             true,
         );
 
         // Create a brush primitive that draws this picture.
-        let sc_prim = BrushPrimitive::new_picture(
-            pic_index,
-            BrushImageSourceKind::Color,
-            LayoutVector2D::zero(),
-        );
+        let sc_prim = BrushPrimitive::new_picture(pic_index);
 
         // Add the brush to the parent picture.
         let sc_prim_index = self.prim_store.add_primitive(
             &LayerRect::zero(),
             &max_clip,
             is_backface_visible,
             None,
             None,
@@ -1267,17 +1220,18 @@ impl<'a> DisplayListFlattener<'a> {
         parent_pic.add_primitive(sc_prim_index, clip_and_scroll);
 
         // Add this as the top-most picture for primitives to be added to.
         self.picture_stack.push(pic_index);
 
         // TODO(gw): This is super conservative. We can expand on this a lot
         //           once all the picture code is in place and landed.
         let allow_subpixel_aa = composite_ops.count() == 0 &&
-                                transform_style == TransformStyle::Flat;
+                                transform_style == TransformStyle::Flat &&
+                                composite_mode.is_none();
 
         // Push the SC onto the stack, so we know how to handle things in
         // pop_stacking_context.
         let sc = FlattenedStackingContext {
             composite_ops,
             is_backface_visible,
             pipeline_id,
             allow_subpixel_aa,
@@ -1481,21 +1435,17 @@ impl<'a> DisplayListFlattener<'a> {
             false,
             pipeline_id,
             current_reference_frame_index,
             None,
             apply_local_clip_rect,
         );
 
         // Create the primitive to draw the shadow picture into the scene.
-        let shadow_prim = BrushPrimitive::new_picture(
-            shadow_pic_index,
-            BrushImageSourceKind::Color,
-            LayoutVector2D::zero(),
-        );
+        let shadow_prim = BrushPrimitive::new_picture(shadow_pic_index);
         let shadow_prim_index = self.prim_store.add_primitive(
             &LayerRect::zero(),
             &max_clip,
             info.is_backface_visible,
             None,
             None,
             PrimitiveContainer::Brush(shadow_prim),
         );
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -8,16 +8,17 @@ use api::{LayerRect, LayerSize, Pipeline
 use clip::{ClipChain, ClipStore};
 use clip_scroll_node::{ClipScrollNode};
 use clip_scroll_tree::{ClipScrollNodeIndex, ClipScrollTree};
 use display_list_flattener::{DisplayListFlattener};
 use gpu_cache::GpuCache;
 use gpu_types::{ClipChainRectIndex, ClipScrollNodeData};
 use hit_test::{HitTester, HitTestingRun};
 use internal_types::{FastHashMap};
+use picture::PictureSurface;
 use prim_store::{CachedGradient, PrimitiveIndex, PrimitiveRun, PrimitiveStore};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTask, RenderTaskId, RenderTaskLocation, RenderTaskTree};
 use resource_cache::{ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, f32};
 use std::sync::Arc;
@@ -75,22 +76,24 @@ pub struct PictureContext<'a> {
     pub display_list: &'a BuiltDisplayList,
     pub inv_world_transform: Option<WorldToLayerFastTransform>,
     pub apply_local_clip_rect: bool,
     pub inflation_factor: f32,
 }
 
 pub struct PictureState {
     pub tasks: Vec<RenderTaskId>,
+    pub has_non_root_coord_system: bool,
 }
 
 impl PictureState {
     pub fn new() -> PictureState {
         PictureState {
             tasks: Vec::new(),
+            has_non_root_coord_system: false,
         }
     }
 }
 
 pub struct PrimitiveRunContext<'a> {
     pub clip_chain: &'a ClipChain,
     pub scroll_node: &'a ClipScrollNode,
     pub clip_chain_rect_index: ClipChainRectIndex,
@@ -226,17 +229,17 @@ impl FrameBuilder {
         let root_render_task = RenderTask::new_picture(
             RenderTaskLocation::Fixed(frame_context.screen_rect),
             PrimitiveIndex(0),
             DeviceIntPoint::zero(),
             pic_state.tasks,
         );
 
         let render_task_id = frame_state.render_tasks.add(root_render_task);
-        pic.surface = Some(render_task_id);
+        pic.surface = Some(PictureSurface::RenderTask(render_task_id));
         Some(render_task_id)
     }
 
     fn update_scroll_bars(&mut self, clip_scroll_tree: &ClipScrollTree, gpu_cache: &mut GpuCache) {
         static SCROLLBAR_PADDING: f32 = 8.0;
 
         for scrollbar_prim in &self.scrollbar_prims {
             let metadata = &mut self.prim_store.cpu_metadata[scrollbar_prim.prim_index.0];
@@ -384,17 +387,17 @@ impl FrameBuilder {
 
             if let RenderPassKind::OffScreen { ref texture_cache, .. } = pass.kind {
                 has_texture_cache_tasks |= !texture_cache.is_empty();
             }
         }
 
         let gpu_cache_frame_id = gpu_cache.end_frame(gpu_cache_profile);
 
-        render_tasks.build();
+        render_tasks.write_task_data();
 
         resource_cache.end_frame();
 
         Frame {
             window_size: self.window_size,
             inner_rect: self.screen_rect,
             device_pixel_ratio: device_pixel_scale.0,
             background_color: self.background_color,
--- a/gfx/webrender/src/freelist.rs
+++ b/gfx/webrender/src/freelist.rs
@@ -12,139 +12,142 @@ use util::recycle_vec;
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct Epoch(u32);
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct FreeListHandle<T> {
+pub struct FreeListHandle<M> {
     index: u32,
     epoch: Epoch,
-    _marker: PhantomData<T>,
+    _marker: PhantomData<M>,
 }
 
-impl<T> FreeListHandle<T> {
-    pub fn weak(&self) -> WeakFreeListHandle<T> {
+impl<M> FreeListHandle<M> {
+    pub fn weak(&self) -> WeakFreeListHandle<M> {
         WeakFreeListHandle {
             index: self.index,
             epoch: self.epoch,
             _marker: PhantomData,
         }
     }
 }
 
-impl<T> Clone for WeakFreeListHandle<T> {
+impl<M> Clone for WeakFreeListHandle<M> {
     fn clone(&self) -> Self {
         WeakFreeListHandle {
             index: self.index,
             epoch: self.epoch,
             _marker: PhantomData,
         }
     }
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct WeakFreeListHandle<T> {
+pub struct WeakFreeListHandle<M> {
     index: u32,
     epoch: Epoch,
-    _marker: PhantomData<T>,
+    _marker: PhantomData<M>,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct Slot<T> {
     next: Option<u32>,
     epoch: Epoch,
     value: Option<T>,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct FreeList<T> {
+pub struct FreeList<T, M> {
     slots: Vec<Slot<T>>,
     free_list_head: Option<u32>,
     active_count: usize,
+    _marker: PhantomData<M>,
 }
 
-pub enum UpsertResult<T> {
+pub enum UpsertResult<T, M> {
     Updated(T),
-    Inserted(FreeListHandle<T>),
+    Inserted(FreeListHandle<M>),
 }
 
-impl<T> FreeList<T> {
+impl<T, M> FreeList<T, M> {
     pub fn new() -> Self {
         FreeList {
             slots: Vec::new(),
             free_list_head: None,
             active_count: 0,
+            _marker: PhantomData,
         }
     }
 
-    pub fn recycle(self) -> FreeList<T> {
+    pub fn recycle(self) -> FreeList<T, M> {
         FreeList {
             slots: recycle_vec(self.slots),
             free_list_head: None,
             active_count: 0,
+            _marker: PhantomData,
         }
     }
 
     pub fn clear(&mut self) {
         self.slots.clear();
         self.free_list_head = None;
         self.active_count = 0;
     }
 
     #[allow(dead_code)]
-    pub fn get(&self, id: &FreeListHandle<T>) -> &T {
+    pub fn get(&self, id: &FreeListHandle<M>) -> &T {
         self.slots[id.index as usize].value.as_ref().unwrap()
     }
 
     #[allow(dead_code)]
-    pub fn get_mut(&mut self, id: &FreeListHandle<T>) -> &mut T {
+    pub fn get_mut(&mut self, id: &FreeListHandle<M>) -> &mut T {
         self.slots[id.index as usize].value.as_mut().unwrap()
     }
 
-    pub fn get_opt(&self, id: &WeakFreeListHandle<T>) -> Option<&T> {
+    pub fn get_opt(&self, id: &WeakFreeListHandle<M>) -> Option<&T> {
         let slot = &self.slots[id.index as usize];
         if slot.epoch == id.epoch {
             slot.value.as_ref()
         } else {
             None
         }
     }
 
-    pub fn get_opt_mut(&mut self, id: &WeakFreeListHandle<T>) -> Option<&mut T> {
+    pub fn get_opt_mut(&mut self, id: &WeakFreeListHandle<M>) -> Option<&mut T> {
         let slot = &mut self.slots[id.index as usize];
         if slot.epoch == id.epoch {
             slot.value.as_mut()
         } else {
             None
         }
     }
 
     // Perform a database style UPSERT operation. If the provided
     // handle is a valid entry, update the value and return the
     // previous data. If the provided handle is invalid, then
     // insert the data into a new slot and return the new handle.
-    pub fn upsert(&mut self, id: &WeakFreeListHandle<T>, data: T) -> UpsertResult<T> {
+    pub fn upsert(&mut self, id: &WeakFreeListHandle<M>, data: T) -> UpsertResult<T, M> {
         if self.slots[id.index as usize].epoch == id.epoch {
             let slot = &mut self.slots[id.index as usize];
             let result = UpsertResult::Updated(slot.value.take().unwrap());
             slot.value = Some(data);
             result
         } else {
             UpsertResult::Inserted(self.insert(data))
         }
     }
 
-    pub fn insert(&mut self, item: T) -> FreeListHandle<T> {
+    pub fn insert(&mut self, item: T) -> FreeListHandle<M> {
         self.active_count += 1;
 
         match self.free_list_head {
             Some(free_index) => {
                 let slot = &mut self.slots[free_index as usize];
 
                 // Remove from free list.
                 self.free_list_head = slot.next;
@@ -171,17 +174,17 @@ impl<T> FreeList<T> {
                     index,
                     epoch,
                     _marker: PhantomData,
                 }
             }
         }
     }
 
-    pub fn free(&mut self, id: FreeListHandle<T>) -> T {
+    pub fn free(&mut self, id: FreeListHandle<M>) -> T {
         self.active_count -= 1;
         let slot = &mut self.slots[id.index as usize];
         slot.next = self.free_list_head;
         slot.epoch = Epoch(slot.epoch.0 + 1);
         self.free_list_head = Some(id.index);
         slot.value.take().unwrap()
     }
 
--- a/gfx/webrender/src/gpu_cache.rs
+++ b/gfx/webrender/src/gpu_cache.rs
@@ -146,16 +146,23 @@ impl GpuCacheAddress {
     }
 
     pub fn invalid() -> Self {
         GpuCacheAddress {
             u: u16::MAX,
             v: u16::MAX,
         }
     }
+
+    pub fn offset(&self, offset: usize) -> Self {
+        GpuCacheAddress {
+            u: self.u + offset as u16,
+            v: self.v
+        }
+    }
 }
 
 impl Add<usize> for GpuCacheAddress {
     type Output = GpuCacheAddress;
 
     fn add(self, other: usize) -> GpuCacheAddress {
         GpuCacheAddress {
             u: self.u + other as u16,
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -177,14 +177,14 @@ extern crate image as image_loader;
 extern crate base64;
 #[cfg(all(feature = "capture", feature = "png"))]
 extern crate png;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
 pub use device::{build_shader_strings, ProgramCache, ReadPixelsFormat, UploadMethod, VertexUsageHint};
-pub use renderer::{CpuProfile, DebugFlags, GpuProfile, OutputImageHandler, RendererKind};
-pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
+pub use renderer::{AsyncPropertySampler, CpuProfile, DebugFlags, OutputImageHandler, RendererKind};
+pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource, GpuProfile};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, PipelineInfo, Renderer, RendererOptions};
 pub use renderer::{RendererStats, SceneBuilderHooks, ThreadListener};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use webrender_api as api;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -1,23 +1,25 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{FilterOp, LayerVector2D, MixBlendMode, PipelineId, PremultipliedColorF};
-use api::{DeviceIntRect, LayerRect};
+use api::{FilterOp, MixBlendMode, PipelineId, PremultipliedColorF};
+use api::{DeviceIntRect, DeviceIntSize, LayerRect};
+use api::{PictureIntPoint, PictureIntRect, PictureIntSize};
 use box_shadow::{BLUR_SAMPLE_SCALE};
 use clip_scroll_tree::ClipScrollNodeIndex;
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState};
 use gpu_cache::{GpuCacheHandle};
 use prim_store::{PrimitiveIndex, PrimitiveRun, PrimitiveRunLocalRect};
 use prim_store::{PrimitiveMetadata, ScrollNodeAndClipChain};
-use render_task::{ClearMode, RenderTask};
-use render_task::{RenderTaskId, RenderTaskLocation};
+use render_task::{ClearMode, RenderTask, RenderTaskCacheEntryHandle};
+use render_task::{RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskId, RenderTaskLocation};
 use scene::{FilterOpHelpers, SceneProperties};
+use std::mem;
 use tiling::RenderTargetKind;
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
  * A number of primitives that are drawn onto the picture.
  * A composite operation describing how to composite this
    picture into its parent.
@@ -33,21 +35,89 @@ pub enum PictureCompositeMode {
     MixBlend(MixBlendMode),
     /// Apply a CSS filter.
     Filter(FilterOp),
     /// Draw to intermediate surface, copy straight across. This
     /// is used for CSS isolation, and plane splitting.
     Blit,
 }
 
+// Stores the location of the picture if it is drawn to
+// an intermediate surface. This can be a render task if
+// it is not persisted, or a texture cache item if the
+// picture is cached in the texture cache.
+#[derive(Debug)]
+pub enum PictureSurface {
+    RenderTask(RenderTaskId),
+    TextureCache(RenderTaskCacheEntryHandle),
+}
+
+// A unique identifier for a Picture. Once we start
+// doing deep compares of picture content, these
+// may be the same across display lists, but that's
+// not currently supported.
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureId(pub u64);
+
+// Cache key that determines whether a pre-existing
+// picture in the texture cache matches the content
+// of the current picture.
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureCacheKey {
+    // NOTE: We specifically want to ensure that we
+    //       don't include the device space origin
+    //       of this picture in the cache key, because
+    //       we want the cache to remain valid as it
+    //       is scrolled and/or translated by animation.
+    //       This is valid while we have the restriction
+    //       in place that only pictures that use the
+    //       root coordinate system are cached - once
+    //       we relax that, we'll need to consider some
+    //       extra parameters, depending on transform.
+
+    // The unique identifier for this picture.
+    // TODO(gw): Currently, these will not be
+    //           shared across new display lists,
+    //           so will only remain valid during
+    //           scrolling. Next step will be to
+    //           allow deep comparisons on pictures
+    //           between display lists, allowing
+    //           pictures that are the same to be
+    //           cached across display lists!
+    picture_id: PictureId,
+
+    // Store the rect within the unclipped device
+    // rect that we are actually rendering. This ensures
+    // that if the 'clipped' rect changes, we will see
+    // that the cache is invalid and re-draw the picture.
+    // TODO(gw): To reduce the number of invalidations that
+    //           happen as a cached picture scrolls off-screen,
+    //           we could round up the size of the off-screen
+    //           targets we draw (e.g. 512 pixels). This may
+    //           also simplify other parts of the code that
+    //           deal with clipped/unclipped rects, such as
+    //           the code to inflate the device rect for blurs.
+    pic_relative_render_rect: PictureIntRect,
+
+    // Ensure that if the overall size of the picture
+    // changes, the cache key will not match. This can
+    // happen, for example, during zooming or changes
+    // in device-pixel-ratio.
+    unclipped_size: DeviceIntSize,
+}
+
 #[derive(Debug)]
 pub struct PicturePrimitive {
     // If this picture is drawn to an intermediate surface,
     // the associated target information.
-    pub surface: Option<RenderTaskId>,
+    pub surface: Option<PictureSurface>,
 
     // List of primitive runs that make up this picture.
     pub runs: Vec<PrimitiveRun>,
 
     // The pipeline that the primitives on this picture belong to.
     pub pipeline_id: PipelineId,
 
     // If true, apply the local clip rect to primitive drawn
@@ -78,16 +148,19 @@ pub struct PicturePrimitive {
     // It is only different if this is part of a 3D
     // rendering context.
     pub reference_frame_index: ClipScrollNodeIndex,
     pub real_local_rect: LayerRect,
     // An optional cache handle for storing extra data
     // in the GPU cache, depending on the type of
     // picture.
     pub extra_gpu_data_handle: GpuCacheHandle,
+
+    // Unique identifier for this picture.
+    pub id: PictureId,
 }
 
 impl PicturePrimitive {
     pub fn resolve_scene_properties(&mut self, properties: &SceneProperties) -> bool {
         match self.composite_mode {
             Some(PictureCompositeMode::Filter(ref mut filter)) => {
                 match *filter {
                     FilterOp::Opacity(ref binding, ref mut value) => {
@@ -98,16 +171,17 @@ impl PicturePrimitive {
 
                 filter.is_visible()
             }
             _ => true,
         }
     }
 
     pub fn new_image(
+        id: PictureId,
         composite_mode: Option<PictureCompositeMode>,
         is_in_3d_context: bool,
         pipeline_id: PipelineId,
         reference_frame_index: ClipScrollNodeIndex,
         frame_output_pipeline_id: Option<PipelineId>,
         apply_local_clip_rect: bool,
     ) -> Self {
         PicturePrimitive {
@@ -118,16 +192,17 @@ impl PicturePrimitive {
             is_in_3d_context,
             frame_output_pipeline_id,
             reference_frame_index,
             real_local_rect: LayerRect::zero(),
             extra_gpu_data_handle: GpuCacheHandle::new(),
             apply_local_clip_rect,
             pipeline_id,
             task_rect: DeviceIntRect::zero(),
+            id,
         }
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ScrollNodeAndClipChain
     ) {
@@ -157,16 +232,26 @@ impl PicturePrimitive {
         match self.composite_mode {
             Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
                 let inflate_size = (blur_radius * BLUR_SAMPLE_SCALE).ceil();
                 local_content_rect.inflate(inflate_size, inflate_size)
             }
             Some(PictureCompositeMode::Filter(FilterOp::DropShadow(_, blur_radius, _))) => {
                 let inflate_size = (blur_radius * BLUR_SAMPLE_SCALE).ceil();
                 local_content_rect.inflate(inflate_size, inflate_size)
+
+                // TODO(gw): When we support culling rect being separate from
+                //           the task/screen rect, we should include both the
+                //           content and shadow rect here, which will prevent
+                //           drop-shadows from disappearing if the main content
+                //           rect is not visible. Something like:
+                // let shadow_rect = local_content_rect
+                //     .inflate(inflate_size, inflate_size)
+                //     .translate(&offset);
+                // shadow_rect.union(&local_content_rect)
             }
             _ => {
                 local_content_rect
             }
         }
     }
 
     pub fn can_draw_directly_to_parent_surface(&self) -> bool {
@@ -183,17 +268,17 @@ impl PicturePrimitive {
             }
         }
     }
 
     pub fn prepare_for_render_inner(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_metadata: &mut PrimitiveMetadata,
-        pic_state_for_children: PictureState,
+        mut pic_state_for_children: PictureState,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
     ) -> Option<DeviceIntRect> {
         let prim_screen_rect = prim_metadata
                                 .screen_rect
                                 .as_ref()
                                 .expect("bug: trying to draw an off-screen picture!?");
@@ -224,36 +309,106 @@ impl PicturePrimitive {
                 // then intersect with the total screen rect, to minimize the
                 // allocation size.
                 let device_rect = prim_screen_rect
                     .clipped
                     .inflate(blur_range, blur_range)
                     .intersection(&prim_screen_rect.unclipped)
                     .unwrap();
 
-                let picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, device_rect.size),
-                    prim_index,
-                    device_rect.origin,
-                    pic_state_for_children.tasks,
-                );
+                // If we are drawing a blur that has primitives or clips that contain
+                // a complex coordinate system, don't bother caching them (for now).
+                // It's likely that they are animating and caching may not help here
+                // anyway. In the future we should relax this a bit, so that we can
+                // cache tasks with complex coordinate systems if we detect the
+                // relevant transforms haven't changed from frame to frame.
+                let surface = if pic_state_for_children.has_non_root_coord_system {
+                    let picture_task = RenderTask::new_picture(
+                        RenderTaskLocation::Dynamic(None, Some(device_rect.size)),
+                        prim_index,
+                        device_rect.origin,
+                        pic_state_for_children.tasks,
+                    );
+
+                    let picture_task_id = frame_state.render_tasks.add(picture_task);
 
-                let picture_task_id = frame_state.render_tasks.add(picture_task);
+                    let blur_render_task = RenderTask::new_blur(
+                        blur_std_deviation,
+                        picture_task_id,
+                        frame_state.render_tasks,
+                        RenderTargetKind::Color,
+                        ClearMode::Transparent,
+                    );
+
+                    let render_task_id = frame_state.render_tasks.add(blur_render_task);
+
+                    pic_state.tasks.push(render_task_id);
+
+                    PictureSurface::RenderTask(render_task_id)
+                } else {
+                    // Get the relative clipped rect within the overall prim rect, that
+                    // forms part of the cache key.
+                    let pic_relative_render_rect = PictureIntRect::new(
+                        PictureIntPoint::new(
+                            device_rect.origin.x - prim_screen_rect.unclipped.origin.x,
+                            device_rect.origin.y - prim_screen_rect.unclipped.origin.y,
+                        ),
+                        PictureIntSize::new(
+                            device_rect.size.width,
+                            device_rect.size.height,
+                        ),
+                    );
 
-                let blur_render_task = RenderTask::new_blur(
-                    blur_std_deviation,
-                    picture_task_id,
-                    frame_state.render_tasks,
-                    RenderTargetKind::Color,
-                    ClearMode::Transparent,
-                );
+                    // Request a render task that will cache the output in the
+                    // texture cache.
+                    let cache_item = frame_state.resource_cache.request_render_task(
+                        RenderTaskCacheKey {
+                            size: device_rect.size,
+                            kind: RenderTaskCacheKeyKind::Picture(PictureCacheKey {
+                                picture_id: self.id,
+                                unclipped_size: prim_screen_rect.unclipped.size,
+                                pic_relative_render_rect,
+                            }),
+                        },
+                        frame_state.gpu_cache,
+                        frame_state.render_tasks,
+                        None,
+                        false,
+                        |render_tasks| {
+                            let child_tasks = mem::replace(&mut pic_state_for_children.tasks, Vec::new());
 
-                let render_task_id = frame_state.render_tasks.add(blur_render_task);
-                pic_state.tasks.push(render_task_id);
-                self.surface = Some(render_task_id);
+                            let picture_task = RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, Some(device_rect.size)),
+                                prim_index,
+                                device_rect.origin,
+                                child_tasks,
+                            );
+
+                            let picture_task_id = render_tasks.add(picture_task);
+
+                            let blur_render_task = RenderTask::new_blur(
+                                blur_std_deviation,
+                                picture_task_id,
+                                render_tasks,
+                                RenderTargetKind::Color,
+                                ClearMode::Transparent,
+                            );
+
+                            let render_task_id = render_tasks.add(blur_render_task);
+
+                            pic_state.tasks.push(render_task_id);
+
+                            render_task_id
+                        }
+                    );
+
+                    PictureSurface::TextureCache(cache_item)
+                };
+
+                self.surface = Some(surface);
 
                 Some(device_rect)
             }
             Some(PictureCompositeMode::Filter(FilterOp::DropShadow(_, blur_radius, _))) => {
                 let blur_std_deviation = blur_radius * frame_context.device_pixel_scale.0;
                 let blur_range = (blur_std_deviation * BLUR_SAMPLE_SCALE).ceil() as i32;
 
                 // The clipped field is the part of the picture that is visible
@@ -266,17 +421,17 @@ impl PicturePrimitive {
                 // allocation size.
                 let device_rect = prim_screen_rect
                     .clipped
                     .inflate(blur_range, blur_range)
                     .intersection(&prim_screen_rect.unclipped)
                     .unwrap();
 
                 let mut picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, device_rect.size),
+                    RenderTaskLocation::Dynamic(None, Some(device_rect.size)),
                     prim_index,
                     device_rect.origin,
                     pic_state_for_children.tasks,
                 );
                 picture_task.mark_for_saving();
 
                 let picture_task_id = frame_state.render_tasks.add(picture_task);
 
@@ -287,38 +442,38 @@ impl PicturePrimitive {
                     RenderTargetKind::Color,
                     ClearMode::Transparent,
                 );
 
                 self.secondary_render_task_id = Some(picture_task_id);
 
                 let render_task_id = frame_state.render_tasks.add(blur_render_task);
                 pic_state.tasks.push(render_task_id);
-                self.surface = Some(render_task_id);
+                self.surface = Some(PictureSurface::RenderTask(render_task_id));
 
                 Some(device_rect)
             }
             Some(PictureCompositeMode::MixBlend(..)) => {
                 let picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, prim_screen_rect.clipped.size),
+                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
                     prim_index,
                     prim_screen_rect.clipped.origin,
                     pic_state_for_children.tasks,
                 );
 
                 let readback_task_id = frame_state.render_tasks.add(
                     RenderTask::new_readback(prim_screen_rect.clipped)
                 );
 
                 self.secondary_render_task_id = Some(readback_task_id);
                 pic_state.tasks.push(readback_task_id);
 
                 let render_task_id = frame_state.render_tasks.add(picture_task);
                 pic_state.tasks.push(render_task_id);
-                self.surface = Some(render_task_id);
+                self.surface = Some(PictureSurface::RenderTask(render_task_id));
 
                 Some(prim_screen_rect.clipped)
             }
             Some(PictureCompositeMode::Filter(filter)) => {
                 let device_rect = match filter {
                     FilterOp::ColorMatrix(m) => {
                         if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handle) {
                             for i in 0..5 {
@@ -327,39 +482,39 @@ impl PicturePrimitive {
                         }
 
                         None
                     }
                     _ => Some(prim_screen_rect.clipped),
                 };
 
                 let picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, prim_screen_rect.clipped.size),
+                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
                     prim_index,
                     prim_screen_rect.clipped.origin,
                     pic_state_for_children.tasks,
                 );
 
                 let render_task_id = frame_state.render_tasks.add(picture_task);
                 pic_state.tasks.push(render_task_id);
-                self.surface = Some(render_task_id);
+                self.surface = Some(PictureSurface::RenderTask(render_task_id));
 
                 device_rect
             }
             Some(PictureCompositeMode::Blit) | None => {
                 let picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, prim_screen_rect.clipped.size),
+                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
                     prim_index,
                     prim_screen_rect.clipped.origin,
                     pic_state_for_children.tasks,
                 );
 
                 let render_task_id = frame_state.render_tasks.add(picture_task);
                 pic_state.tasks.push(render_task_id);
-                self.surface = Some(render_task_id);
+                self.surface = Some(PictureSurface::RenderTask(render_task_id));
 
                 Some(prim_screen_rect.clipped)
             }
         }
     }
 
     pub fn prepare_for_render(
         &mut self,
@@ -387,28 +542,45 @@ impl PicturePrimitive {
             // cache entry for this picture to ensure that the correct
             // task rect is provided to the image shader.
             if self.task_rect != device_rect {
                 frame_state.gpu_cache.invalidate(&self.extra_gpu_data_handle);
                 self.task_rect = device_rect;
             }
 
             if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handle) {
+                // [GLSL ImageBrush: task_rect, offset, color]
                 request.push(self.task_rect.to_f32());
+                request.push([0.0; 4]);
+                request.push(PremultipliedColorF::WHITE);
 
                 // TODO(gw): It would make the shaders a bit simpler if the offset
                 //           was provided as part of the brush::picture instance,
                 //           rather than in the Picture data itself.
-                let (offset, color) = match self.composite_mode {
-                    Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, _, color))) => {
-                        (offset, color.premultiplied())
-                    }
-                    _ => {
-                        (LayerVector2D::zero(), PremultipliedColorF::WHITE)
-                    }
-                };
+                if let Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, _, color))) = self.composite_mode {
+                    // TODO(gw): This is very hacky code below! It stores an extra
+                    //           brush primitive below for the special case of a
+                    //           drop-shadow where we need a different local
+                    //           rect for the shadow. To tidy this up in future,
+                    //           we could consider abstracting the code in prim_store.rs
+                    //           that writes a brush primitive header.
 
-                request.push([offset.x, offset.y, 0.0, 0.0]);
-                request.push(color);
+                    // Basic brush primitive header is (see end of prepare_prim_for_render_inner in prim_store.rs)
+                    //  local_rect
+                    //  clip_rect
+                    //  [segment_rect, segment_data]
+                    let shadow_rect = prim_metadata.local_rect.translate(&offset);
+                    let shadow_clip_rect = prim_metadata.local_clip_rect.translate(&offset);
+
+                    request.push(shadow_rect);
+                    request.push(shadow_clip_rect);
+                    request.push(shadow_rect);
+                    request.push([0.0; 4]);
+
+                    // Now write another GLSL ImageBrush struct, for the shadow to reference.
+                    request.push(self.task_rect.to_f32());
+                    request.push([offset.x, offset.y, 0.0, 0.0]);
+                    request.push(color.premultiplied());
+                }
             }
         }
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -2,34 +2,33 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderRadius, BoxShadowClipMode, BuiltDisplayList, ClipMode, ColorF, ComplexClipRegion};
 use api::{DeviceIntRect, DeviceIntSize, DevicePixelScale, Epoch, ExtendMode, FontRenderMode};
 use api::{FilterOp, GlyphInstance, GlyphKey, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag};
 use api::{LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, LayerVector2D};
 use api::{PipelineId, PremultipliedColorF, Shadow, YuvColorSpace, YuvFormat};
-use batch::BrushImageSourceKind;
 use border::{BorderCornerInstance, BorderEdgeKind};
 use box_shadow::BLUR_SAMPLE_SCALE;
 use clip_scroll_tree::{ClipChainIndex, ClipScrollNodeIndex, CoordinateSystemId};
 use clip_scroll_node::ClipScrollNode;
 use clip::{ClipChain, ClipChainNode, ClipChainNodeIter, ClipChainNodeRef, ClipSource};
 use clip::{ClipSourcesHandle, ClipWorkItem};
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState};
 use frame_builder::PrimitiveRunContext;
 use glyph_rasterizer::{FontInstance, FontTransform};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
 use gpu_types::{ClipChainRectIndex};
-use picture::{PictureCompositeMode, PicturePrimitive};
+use picture::{PictureCompositeMode, PictureId, PicturePrimitive};
 use render_task::{BlitSource, RenderTask, RenderTaskCacheKey};
-use render_task::{RenderTaskCacheKeyKind, RenderTaskId};
+use render_task::{RenderTaskCacheKeyKind, RenderTaskId, RenderTaskCacheEntryHandle};
 use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
-use resource_cache::{CacheItem, ImageProperties, ImageRequest};
+use resource_cache::{ImageProperties, ImageRequest};
 use segment::SegmentBuilder;
 use std::{mem, usize};
 use std::sync::Arc;
 use util::{MatrixHelpers, WorldToLayerFastTransform, calculate_screen_bounding_rect};
 use util::{pack_as_float, recycle_vec};
 
 
 const MIN_BRUSH_SPLIT_AREA: f32 = 256.0 * 256.0;
@@ -197,22 +196,16 @@ pub struct PrimitiveMetadata {
 #[derive(Debug)]
 pub enum BrushKind {
     Solid {
         color: ColorF,
     },
     Clear,
     Picture {
         pic_index: PictureIndex,
-        // What kind of texels to sample from the
-        // picture (e.g color or alpha mask).
-        source_kind: BrushImageSourceKind,
-        // A local space offset to apply when drawing
-        // this picture.
-        local_offset: LayerVector2D,
     },
     Image {
         request: ImageRequest,
         current_epoch: Epoch,
         alpha_type: AlphaType,
         stretch_size: LayerSize,
         tile_spacing: LayerSize,
         source: ImageSource,
@@ -326,26 +319,20 @@ impl BrushPrimitive {
         segment_desc: Option<BrushSegmentDescriptor>,
     ) -> BrushPrimitive {
         BrushPrimitive {
             kind,
             segment_desc,
         }
     }
 
-    pub fn new_picture(
-        pic_index: PictureIndex,
-        source_kind: BrushImageSourceKind,
-        local_offset: LayerVector2D,
-    ) -> BrushPrimitive {
+    pub fn new_picture(pic_index: PictureIndex) -> BrushPrimitive {
         BrushPrimitive {
             kind: BrushKind::Picture {
                 pic_index,
-                source_kind,
-                local_offset,
             },
             segment_desc: None,
         }
     }
 
     fn write_gpu_blocks(
         &self,
         request: &mut GpuDataRequest,
@@ -408,17 +395,17 @@ pub struct ImageCacheKey {
 #[derive(Debug)]
 pub enum ImageSource {
     // A normal image - just reference the texture cache.
     Default,
     // An image that is pre-rendered into the texture cache
     // via a render task.
     Cache {
         size: DeviceIntSize,
-        item: CacheItem,
+        handle: Option<RenderTaskCacheEntryHandle>,
     },
 }
 
 #[derive(Debug)]
 pub struct ImagePrimitiveCpu {
     pub tile_spacing: LayerSize,
     pub alpha_type: AlphaType,
     pub stretch_size: LayerSize,
@@ -1012,63 +999,68 @@ pub struct PrimitiveStore {
     /// CPU side information only.
     pub cpu_brushes: Vec<BrushPrimitive>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
     pub cpu_images: Vec<ImagePrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
 
     pub pictures: Vec<PicturePrimitive>,
+    next_picture_id: u64,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_brushes: Vec::new(),
             cpu_text_runs: Vec::new(),
             cpu_images: Vec::new(),
             cpu_borders: Vec::new(),
 
             pictures: Vec::new(),
+            next_picture_id: 0,
         }
     }
 
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
             cpu_brushes: recycle_vec(self.cpu_brushes),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
             cpu_images: recycle_vec(self.cpu_images),
             cpu_borders: recycle_vec(self.cpu_borders),
 
             pictures: recycle_vec(self.pictures),
+            next_picture_id: self.next_picture_id,
         }
     }
 
     pub fn add_image_picture(
         &mut self,
         composite_mode: Option<PictureCompositeMode>,
         is_in_3d_context: bool,
         pipeline_id: PipelineId,
         reference_frame_index: ClipScrollNodeIndex,
         frame_output_pipeline_id: Option<PipelineId>,
         apply_local_clip_rect: bool,
     ) -> PictureIndex {
         let picture = PicturePrimitive::new_image(
+            PictureId(self.next_picture_id),
             composite_mode,
             is_in_3d_context,
             pipeline_id,
             reference_frame_index,
             frame_output_pipeline_id,
             apply_local_clip_rect,
         );
 
         let picture_index = PictureIndex(self.pictures.len());
         self.pictures.push(picture);
+        self.next_picture_id += 1;
         picture_index
     }
 
     pub fn add_primitive(
         &mut self,
         local_rect: &LayerRect,
         local_clip_rect: &LayerRect,
         is_backface_visible: bool,
@@ -1212,17 +1204,17 @@ impl PrimitiveStore {
 
                         // Work out whether this image is a normal / simple type, or if
                         // we need to pre-render it to the render task cache.
                         image_cpu.source = match image_cpu.key.texel_rect {
                             Some(texel_rect) => {
                                 ImageSource::Cache {
                                     // Size in device-pixels we need to allocate in render task cache.
                                     size: texel_rect.size,
-                                    item: CacheItem::invalid(),
+                                    handle: None,
                                 }
                             }
                             None => {
                                 // Simple image - just use a normal texture cache entry.
                                 ImageSource::Default
                             }
                         };
                     }
@@ -1230,28 +1222,29 @@ impl PrimitiveStore {
                     // Set if we need to request the source image from the cache this frame.
                     let mut request_source_image = false;
 
                     // Every frame, for cached items, we need to request the render
                     // task cache item. The closure will be invoked on the first
                     // time through, and any time the render task output has been
                     // evicted from the texture cache.
                     match image_cpu.source {
-                        ImageSource::Cache { size, ref mut item } => {
+                        ImageSource::Cache { size, ref mut handle } => {
                             let key = image_cpu.key;
 
                             // Request a pre-rendered image task.
-                            *item = frame_state.resource_cache.request_render_task(
+                            *handle = Some(frame_state.resource_cache.request_render_task(
                                 RenderTaskCacheKey {
                                     size,
                                     kind: RenderTaskCacheKeyKind::Image(key),
                                 },
                                 frame_state.gpu_cache,
                                 frame_state.render_tasks,
                                 None,
+                                image_properties.descriptor.is_opaque,
                                 |render_tasks| {
                                     // We need to render the image cache this frame,
                                     // so will need access to the source texture.
                                     request_source_image = true;
 
                                     // Create a task to blit from the texture cache to
                                     // a normal transient render task surface. This will
                                     // copy only the sub-rect, if specified.
@@ -1274,19 +1267,19 @@ impl PrimitiveStore {
                                     );
                                     let target_to_cache_task_id = render_tasks.add(target_to_cache_task);
 
                                     // Hook this into the render task tree at the right spot.
                                     pic_state.tasks.push(target_to_cache_task_id);
 
                                     // Pass the image opacity, so that the cached render task
                                     // item inherits the same opacity properties.
-                                    (target_to_cache_task_id, image_properties.descriptor.is_opaque)
+                                    target_to_cache_task_id
                                 }
-                            );
+                            ));
                         }
                         ImageSource::Default => {
                             // Normal images just reference the source texture each frame.
                             request_source_image = true;
                         }
                     }
 
                     // Request source image from the texture cache, if required.
@@ -1317,42 +1310,43 @@ impl PrimitiveStore {
                             }
 
                             // Work out whether this image is a normal / simple type, or if
                             // we need to pre-render it to the render task cache.
                             if let Some(rect) = sub_rect {
                                 *source = ImageSource::Cache {
                                     // Size in device-pixels we need to allocate in render task cache.
                                     size: rect.size,
-                                    item: CacheItem::invalid(),
+                                    handle: None,
                                 };
                             }
 
                             let mut request_source_image = false;
 
                             // Every frame, for cached items, we need to request the render
                             // task cache item. The closure will be invoked on the first
                             // time through, and any time the render task output has been
                             // evicted from the texture cache.
                             match *source {
-                                ImageSource::Cache { size, ref mut item } => {
+                                ImageSource::Cache { size, ref mut handle } => {
                                     let image_cache_key = ImageCacheKey {
                                         request,
                                         texel_rect: sub_rect,
                                     };
 
                                     // Request a pre-rendered image task.
-                                    *item = frame_state.resource_cache.request_render_task(
+                                    *handle = Some(frame_state.resource_cache.request_render_task(
                                         RenderTaskCacheKey {
                                             size,
                                             kind: RenderTaskCacheKeyKind::Image(image_cache_key),
                                         },
                                         frame_state.gpu_cache,
                                         frame_state.render_tasks,
                                         None,
+                                        image_properties.descriptor.is_opaque,
                                         |render_tasks| {
                                             // We need to render the image cache this frame,
                                             // so will need access to the source texture.
                                             request_source_image = true;
 
                                             // Create a task to blit from the texture cache to
                                             // a normal transient render task surface. This will
                                             // copy only the sub-rect, if specified.
@@ -1373,20 +1367,19 @@ impl PrimitiveStore {
                                             );
                                             let target_to_cache_task_id = render_tasks.add(target_to_cache_task);
 
                                             // Hook this into the render task tree at the right spot.
                                             pic_state.tasks.push(target_to_cache_task_id);
 
                                             // Pass the image opacity, so that the cached render task
                                             // item inherits the same opacity properties.
-                                            (target_to_cache_task_id, image_properties.descriptor.is_opaque)
+                                            target_to_cache_task_id
                                         }
-                                    );
-
+                                    ));
                                 }
                                 ImageSource::Default => {
                                     // Normal images just reference the source texture each frame.
                                     request_source_image = true;
                                 }
                             }
 
                             if request_source_image {
@@ -1433,52 +1426,26 @@ impl PrimitiveStore {
                                 pic_context.display_list,
                             );
                             gradient_builder.build(
                                 reverse_stops,
                                 &mut request,
                             );
                         }
                     }
-                    BrushKind::Picture { pic_index, source_kind, .. } => {
+                    BrushKind::Picture { pic_index, .. } => {
                         let pic = &mut self.pictures[pic_index.0];
-                        // If this picture is referenced by multiple brushes,
-                        // we only want to prepare it once per frame. It
-                        // should be prepared for the main color pass.
-                        // TODO(gw): Make this a bit more explicit - perhaps
-                        //           we could mark which brush::picture is
-                        //           the owner of the picture, vs the shadow
-                        //           which is just referencing it.
-                        match source_kind {
-                            BrushImageSourceKind::Color => {
-                                pic.prepare_for_render(
-                                    prim_index,
-                                    metadata,
-                                    pic_state_for_children,
-                                    pic_state,
-                                    frame_context,
-                                    frame_state,
-                                );
-                            }
-                            BrushImageSourceKind::ColorAlphaMask => {
-                                // Since we will always visit the shadow
-                                // brush first, use this to clear out the
-                                // render tasks from the previous frame.
-                                // This ensures that if the primary brush
-                                // is found to be non-visible, then we
-                                // won't try to draw the drop-shadow either.
-                                // This isn't quite correct - it can result
-                                // in clipping artifacts if the image is
-                                // off-screen, but the drop-shadow is
-                                // partially visible - we can fix this edge
-                                // case as a follow up.
-                                pic.surface = None;
-                                pic.secondary_render_task_id = None;
-                            }
-                        }
+                        pic.prepare_for_render(
+                            prim_index,
+                            metadata,
+                            pic_state_for_children,
+                            pic_state,
+                            frame_context,
+                            frame_state,
+                        );
                     }
                     BrushKind::Solid { .. } |
                     BrushKind::Clear => {}
                 }
             }
         }
 
         // Mark this GPU resource as required for this frame.
@@ -1927,17 +1894,17 @@ impl PrimitiveStore {
         };
 
         // If we have dependencies, we need to prepare them first, in order
         // to know the actual rect of this primitive.
         // For example, scrolling may affect the location of an item in
         // local space, which may force us to render this item on a larger
         // picture target, if being composited.
         if let PrimitiveKind::Brush = prim_kind {
-            if let BrushKind::Picture { pic_index, local_offset, .. } = self.cpu_brushes[cpu_prim_index.0].kind {
+            if let BrushKind::Picture { pic_index, .. } = self.cpu_brushes[cpu_prim_index.0].kind {
                 let pic_context_for_children = {
                     let pic = &mut self.pictures[pic_index.0];
 
                     if !pic.resolve_scene_properties(frame_context.scene_properties) {
                         return None;
                     }
 
                     may_need_clip_mask = pic.composite_mode.is_some();
@@ -1959,16 +1926,20 @@ impl PrimitiveStore {
                         .expect("No display list?")
                         .display_list;
 
                     let inv_world_transform = prim_run_context
                         .scroll_node
                         .world_content_transform
                         .inverse();
 
+                    // Mark whether this picture has a complex coordinate system.
+                    pic_state_for_children.has_non_root_coord_system |=
+                        prim_run_context.scroll_node.coordinate_system_id != CoordinateSystemId::root();
+
                     PictureContext {
                         pipeline_id: pic.pipeline_id,
                         prim_runs: mem::replace(&mut pic.runs, Vec::new()),
                         original_reference_frame_index: Some(pic.reference_frame_index),
                         display_list,
                         inv_world_transform,
                         apply_local_clip_rect: pic.apply_local_clip_rect,
                         inflation_factor,
@@ -1982,21 +1953,17 @@ impl PrimitiveStore {
                     frame_state,
                 );
 
                 // Restore the dependencies (borrow check dance)
                 let pic = &mut self.pictures[pic_index.0];
                 pic.runs = pic_context_for_children.prim_runs;
 
                 let metadata = &mut self.cpu_metadata[prim_index.0];
-                // Store local rect of the picture for this brush,
-                // also applying any local offset for the instance.
-                metadata.local_rect = pic
-                    .update_local_rect(result)
-                    .translate(&local_offset);
+                metadata.local_rect = pic.update_local_rect(result);
             }
         }
 
         let (local_rect, unclipped_device_rect) = {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             if metadata.local_rect.size.width <= 0.0 ||
                metadata.local_rect.size.height <= 0.0 {
                 //warn!("invalid primitive rect {:?}", metadata.local_rect);
@@ -2090,16 +2057,22 @@ impl PrimitiveStore {
             //           lookups ever show up in a profile).
             let scroll_node = &frame_context
                 .clip_scroll_tree
                 .nodes[run.clip_and_scroll.scroll_node_id.0];
             let clip_chain = frame_context
                 .clip_scroll_tree
                 .get_clip_chain(run.clip_and_scroll.clip_chain_index);
 
+            // Mark whether this picture contains any complex coordinate
+            // systems, due to either the scroll node or the clip-chain.
+            pic_state.has_non_root_coord_system |=
+                scroll_node.coordinate_system_id != CoordinateSystemId::root();
+            pic_state.has_non_root_coord_system |= clip_chain.has_non_root_coord_system;
+
             if !scroll_node.invertible {
                 debug!("{:?} {:?}: position not invertible", run.base_prim_index, pic_context.pipeline_id);
                 continue;
             }
 
             if clip_chain.combined_outer_screen_rect.is_empty() {
                 debug!("{:?} {:?}: clipped out", run.base_prim_index, pic_context.pipeline_id);
                 continue;
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -19,17 +19,17 @@ use clip_scroll_tree::ClipScrollTree;
 use debug_server;
 use display_list_flattener::DisplayListFlattener;
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use gpu_cache::GpuCache;
 use hit_test::{HitTest, HitTester};
 use internal_types::{DebugOutput, FastHashMap, FastHashSet, RenderedDocument, ResultMsg};
 use profiler::{BackendProfileCounters, IpcProfileCounters, ResourceProfileCounters};
 use record::ApiRecordingReceiver;
-use renderer::PipelineInfo;
+use renderer::{AsyncPropertySampler, PipelineInfo};
 use resource_cache::ResourceCache;
 #[cfg(feature = "replay")]
 use resource_cache::PlainCacheOwn;
 #[cfg(any(feature = "capture", feature = "replay"))]
 use resource_cache::PlainResources;
 use scene::{Scene, SceneProperties};
 use scene_builder::*;
 #[cfg(feature = "serialize")]
@@ -424,32 +424,34 @@ pub struct RenderBackend {
     gpu_cache: GpuCache,
     resource_cache: ResourceCache,
 
     frame_config: FrameBuilderConfig,
     documents: FastHashMap<DocumentId, Document>,
 
     notifier: Box<RenderNotifier>,
     recorder: Option<Box<ApiRecordingReceiver>>,
+    sampler: Option<Box<AsyncPropertySampler + Send>>,
 
     enable_render_on_scroll: bool,
 }
 
 impl RenderBackend {
     pub fn new(
         api_rx: MsgReceiver<ApiMsg>,
         payload_rx: Receiver<Payload>,
         result_tx: Sender<ResultMsg>,
         scene_tx: Sender<SceneBuilderRequest>,
         scene_rx: Receiver<SceneBuilderResult>,
         default_device_pixel_ratio: f32,
         resource_cache: ResourceCache,
         notifier: Box<RenderNotifier>,
         frame_config: FrameBuilderConfig,
         recorder: Option<Box<ApiRecordingReceiver>>,
+        sampler: Option<Box<AsyncPropertySampler + Send>>,
         enable_render_on_scroll: bool,
     ) -> RenderBackend {
         // The namespace_id should start from 1.
         NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed);
 
         RenderBackend {
             api_rx,
             payload_rx,
@@ -459,16 +461,17 @@ impl RenderBackend {
             payload_buffer: Vec::new(),
             default_device_pixel_ratio,
             resource_cache,
             gpu_cache: GpuCache::new(),
             frame_config,
             documents: FastHashMap::default(),
             notifier,
             recorder,
+            sampler,
             enable_render_on_scroll,
         }
     }
 
     fn process_scene_msg(
         &mut self,
         document_id: DocumentId,
         message: SceneMsg,
@@ -676,16 +679,20 @@ impl RenderBackend {
     fn next_namespace_id(&self) -> IdNamespace {
         IdNamespace(NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed) as u32)
     }
 
     pub fn run(&mut self, mut profile_counters: BackendProfileCounters) {
         let mut frame_counter: u32 = 0;
         let mut keep_going = true;
 
+        if let Some(ref sampler) = self.sampler {
+            sampler.register();
+        }
+
         while keep_going {
             profile_scope!("handle_msg");
 
             while let Ok(msg) = self.scene_rx.try_recv() {
                 match msg {
                     SceneBuilderResult::Transaction {
                         document_id,
                         mut built_scene,
@@ -736,16 +743,21 @@ impl RenderBackend {
                     self.process_api_msg(msg, &mut profile_counters, &mut frame_counter)
                 }
                 Err(..) => { false }
             };
         }
 
         let _ = self.scene_tx.send(SceneBuilderRequest::Stop);
         self.notifier.shut_down();
+
+        if let Some(ref sampler) = self.sampler {
+            sampler.deregister();
+        }
+
     }
 
     fn process_api_msg(
         &mut self,
         msg: ApiMsg,
         profile_counters: &mut BackendProfileCounters,
         frame_counter: &mut u32,
     ) -> bool {
@@ -941,16 +953,27 @@ impl RenderBackend {
             let doc = self.documents.get_mut(&document_id).unwrap();
             let _timer = profile_counters.total_time.timer();
             profile_scope!("build scene");
 
             doc.build_scene(&mut self.resource_cache);
             doc.render_on_hittest = true;
         }
 
+        // If we have a sampler, get more frame ops from it and add them
+        // to the transaction. This is a hook to allow the WR user code to
+        // fiddle with things after a potentially long scene build, but just
+        // before rendering. This is useful for rendering with the latest
+        // async transforms.
+        if transaction_msg.generate_frame {
+            if let Some(ref sampler) = self.sampler {
+                transaction_msg.frame_ops.append(&mut sampler.sample());
+            }
+        }
+
         for frame_msg in transaction_msg.frame_ops {
             let _timer = profile_counters.total_time.timer();
             op.combine(self.process_frame_msg(document_id, frame_msg));
         }
 
         let doc = self.documents.get_mut(&document_id).unwrap();
 
         if transaction_msg.generate_frame {
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -6,23 +6,24 @@ use api::{DeviceIntPoint, DeviceIntRect,
 #[cfg(feature = "pathfinder")]
 use api::FontRenderMode;
 use box_shadow::{BoxShadowCacheKey};
 use clip::{ClipSource, ClipStore, ClipWorkItem};
 use clip_scroll_tree::CoordinateSystemId;
 use device::TextureFilter;
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
-use freelist::{FreeList, FreeListHandle};
+use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use glyph_rasterizer::GpuGlyphCacheKey;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::{ImageSource, RasterizationSpace};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
+use picture::PictureCacheKey;
 use prim_store::{PrimitiveIndex, ImageCacheKey};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use render_backend::FrameId;
 use resource_cache::{CacheItem, ResourceCache};
 use std::{cmp, ops, usize, f32, i32};
 use texture_cache::{TextureCache, TextureCacheHandle};
 use tiling::{RenderPass, RenderTargetIndex};
@@ -114,22 +115,28 @@ impl RenderTaskTree {
         } else {
             pass_index
         };
 
         let pass = &mut passes[pass_index];
         pass.add_render_task(id, task.get_dynamic_size(), task.target_kind());
     }
 
+    pub fn prepare_for_render(&mut self) {
+        for task in &mut self.tasks {
+            task.prepare_for_render();
+        }
+    }
+
     pub fn get_task_address(&self, id: RenderTaskId) -> RenderTaskAddress {
         debug_assert_eq!(self.frame_id, id.1);
         RenderTaskAddress(id.0)
     }
 
-    pub fn build(&mut self) {
+    pub fn write_task_data(&mut self) {
         for task in &self.tasks {
             self.task_data.push(task.write_task_data());
         }
     }
 
     pub fn save_target(&mut self) -> SavedTargetIndex {
         let id = self.next_saved;
         self.next_saved.0 += 1;
@@ -152,17 +159,17 @@ impl ops::IndexMut<RenderTaskId> for Ren
     }
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTaskLocation {
     Fixed(DeviceIntRect),
-    Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
+    Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, Option<DeviceIntSize>),
     TextureCache(SourceTexture, i32, DeviceIntRect),
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheMaskTask {
     actual_rect: DeviceIntRect,
@@ -306,17 +313,17 @@ impl RenderTask {
             clear_mode: ClearMode::Transparent,
             saved_index: None,
         }
     }
 
     pub fn new_readback(screen_rect: DeviceIntRect) -> Self {
         RenderTask {
             children: Vec::new(),
-            location: RenderTaskLocation::Dynamic(None, screen_rect.size),
+            location: RenderTaskLocation::Dynamic(None, Some(screen_rect.size)),
             kind: RenderTaskKind::Readback(screen_rect),
             clear_mode: ClearMode::Transparent,
             saved_index: None,
         }
     }
 
     pub fn new_blit(
         size: DeviceIntSize,
@@ -330,17 +337,17 @@ impl RenderTask {
         // and made available as an input when this task
         // executes.
         if let BlitSource::RenderTask { task_id } = source {
             children.push(task_id);
         }
 
         RenderTask {
             children,
-            location: RenderTaskLocation::Dynamic(None, size),
+            location: RenderTaskLocation::Dynamic(None, Some(size)),
             kind: RenderTaskKind::Blit(BlitTask {
                 source,
             }),
             clear_mode: ClearMode::Transparent,
             saved_index: None,
         }
     }
 
@@ -373,24 +380,25 @@ impl RenderTask {
                             .as_ref()
                             .expect("bug: no cache key set")
                             .clone();
                         let blur_radius_dp = cache_key.blur_radius_dp as f32;
                         let clip_data_address = gpu_cache.get_address(&info.clip_data_handle);
 
                         // Request a cacheable render task with a blurred, minimal
                         // sized box-shadow rect.
-                        info.cache_item = resource_cache.request_render_task(
+                        info.cache_handle = Some(resource_cache.request_render_task(
                             RenderTaskCacheKey {
                                 size: cache_size,
                                 kind: RenderTaskCacheKeyKind::BoxShadow(cache_key),
                             },
                             gpu_cache,
                             render_tasks,
                             None,
+                            false,
                             |render_tasks| {
                                 // Draw the rounded rect.
                                 let mask_task = RenderTask::new_rounded_rect_mask(
                                     cache_size,
                                     clip_data_address,
                                 );
 
                                 let mask_task_id = render_tasks.add(mask_task);
@@ -402,49 +410,49 @@ impl RenderTask {
                                     render_tasks,
                                     RenderTargetKind::Alpha,
                                     ClearMode::Zero,
                                 );
 
                                 let root_task_id = render_tasks.add(blur_render_task);
                                 children.push(root_task_id);
 
-                                (root_task_id, false)
+                                root_task_id
                             }
-                        );
+                        ));
                     }
                     ClipSource::Rectangle(..) |
                     ClipSource::RoundedRectangle(..) |
                     ClipSource::Image(..) |
                     ClipSource::LineDecoration(..) |
                     ClipSource::BorderCorner(..) => {}
                 }
             }
         }
 
         RenderTask {
             children,
-            location: RenderTaskLocation::Dynamic(None, outer_rect.size),
+            location: RenderTaskLocation::Dynamic(None, Some(outer_rect.size)),
             kind: RenderTaskKind::CacheMask(CacheMaskTask {
                 actual_rect: outer_rect,
                 clips,
                 coordinate_system_id: prim_coordinate_system_id,
             }),
             clear_mode: ClearMode::One,
             saved_index: None,
         }
     }
 
     pub fn new_rounded_rect_mask(
         size: DeviceIntSize,
         clip_data_address: GpuCacheAddress,
     ) -> Self {
         RenderTask {
             children: Vec::new(),
-            location: RenderTaskLocation::Dynamic(None, size),
+            location: RenderTaskLocation::Dynamic(None, Some(size)),
             kind: RenderTaskKind::ClipRegion(ClipRegionTask {
                 clip_data_address,
             }),
             clear_mode: ClearMode::One,
             saved_index: None,
         }
     }
 
@@ -492,31 +500,31 @@ impl RenderTask {
                 downscaling_src_task_id,
                 adjusted_blur_target_size,
             );
             downscaling_src_task_id = render_tasks.add(downscaling_task);
         }
 
         let blur_task_v = RenderTask {
             children: vec![downscaling_src_task_id],
-            location: RenderTaskLocation::Dynamic(None, adjusted_blur_target_size),
+            location: RenderTaskLocation::Dynamic(None, Some(adjusted_blur_target_size)),
             kind: RenderTaskKind::VerticalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 uv_rect_handle: GpuCacheHandle::new(),
             }),
             clear_mode,
             saved_index: None,
         };
 
         let blur_task_v_id = render_tasks.add(blur_task_v);
 
         RenderTask {
             children: vec![blur_task_v_id],
-            location: RenderTaskLocation::Dynamic(None, adjusted_blur_target_size),
+            location: RenderTaskLocation::Dynamic(None, Some(adjusted_blur_target_size)),
             kind: RenderTaskKind::HorizontalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 uv_rect_handle: GpuCacheHandle::new(),
             }),
             clear_mode,
             saved_index: None,
         }
@@ -524,17 +532,17 @@ impl RenderTask {
 
     pub fn new_scaling(
         target_kind: RenderTargetKind,
         src_task_id: RenderTaskId,
         target_size: DeviceIntSize,
     ) -> Self {
         RenderTask {
             children: vec![src_task_id],
-            location: RenderTaskLocation::Dynamic(None, target_size),
+            location: RenderTaskLocation::Dynamic(None, Some(target_size)),
             kind: RenderTaskKind::Scaling(target_kind),
             clear_mode: match target_kind {
                 RenderTargetKind::Color => ClearMode::Transparent,
                 RenderTargetKind::Alpha => ClearMode::One,
             },
             saved_index: None,
         }
     }
@@ -626,40 +634,43 @@ impl RenderTask {
                 target_index.0 as f32,
                 data[0],
                 data[1],
                 data[2],
             ]
         }
     }
 
-    pub fn get_texture_handle(&self) -> &GpuCacheHandle {
+    pub fn get_texture_address(&self, gpu_cache: &GpuCache) -> GpuCacheAddress {
         match self.kind {
             RenderTaskKind::Picture(ref info) => {
-                &info.uv_rect_handle
+                gpu_cache.get_address(&info.uv_rect_handle)
             }
             RenderTaskKind::VerticalBlur(ref info) |
             RenderTaskKind::HorizontalBlur(ref info) => {
-                &info.uv_rect_handle
+                gpu_cache.get_address(&info.uv_rect_handle)
             }
             RenderTaskKind::ClipRegion(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::Scaling(..) |
             RenderTaskKind::Blit(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::Glyph(..) => {
                 panic!("texture handle not supported for this task kind");
             }
         }
     }
 
     pub fn get_dynamic_size(&self) -> DeviceIntSize {
         match self.location {
             RenderTaskLocation::Fixed(..) => DeviceIntSize::zero(),
-            RenderTaskLocation::Dynamic(_, size) => size,
+            RenderTaskLocation::Dynamic(_, Some(size)) => size,
+            RenderTaskLocation::Dynamic(_, None) => {
+                panic!("bug: render task must have assigned size by now");
+            }
             RenderTaskLocation::TextureCache(_, _, rect) => rect.size,
         }
     }
 
     pub fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
         match self.location {
             RenderTaskLocation::Fixed(rect) => {
                 (rect, RenderTargetIndex(0))
@@ -674,16 +685,17 @@ impl RenderTask {
             // Render tasks that are created but not assigned to
             // passes consume a row in the render task texture, but
             // don't allocate any space in render targets nor
             // draw any pixels.
             // TODO(gw): Consider some kind of tag or other method
             //           to mark a task as unused explicitly. This
             //           would allow us to restore this debug check.
             RenderTaskLocation::Dynamic(Some((origin, target_index)), size) => {
+                let size = size.expect("bug: must be assigned a size by now");
                 (DeviceIntRect::new(origin, size), target_index)
             }
             RenderTaskLocation::Dynamic(None, _) => {
                 (DeviceIntRect::zero(), RenderTargetIndex(0))
             }
             RenderTaskLocation::TextureCache(_, layer, rect) => {
                 (rect, RenderTargetIndex(layer as usize))
             }
@@ -743,17 +755,24 @@ impl RenderTask {
             //           optimization. It's of dubious value in the
             //           future once we start to cache clip tasks anyway.
             //           I have left shared texture support here though,
             //           just in case we want it in the future.
             RenderTaskKind::CacheMask(..) => false,
         }
     }
 
-    pub fn prepare_for_render(
+    // Optionally, prepare the render task for drawing. This is executed
+    // after all resource cache items (textures and glyphs) have been
+    // resolved and can be queried. It also allows certain render tasks
+    // to defer calculating an exact size until now, if desired.
+    pub fn prepare_for_render(&mut self) {
+    }
+
+    pub fn write_gpu_blocks(
         &mut self,
         gpu_cache: &mut GpuCache,
     ) {
         let (target_rect, target_index) = self.get_target_rect();
 
         let cache_handle = match self.kind {
             RenderTaskKind::HorizontalBlur(ref mut info) |
             RenderTaskKind::VerticalBlur(ref mut info) => {
@@ -850,41 +869,51 @@ impl RenderTask {
 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTaskCacheKeyKind {
     BoxShadow(BoxShadowCacheKey),
     Image(ImageCacheKey),
     #[allow(dead_code)]
     Glyph(GpuGlyphCacheKey),
+    Picture(PictureCacheKey),
 }
 
 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTaskCacheKey {
     pub size: DeviceIntSize,
     pub kind: RenderTaskCacheKeyKind,
 }
 
+#[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTaskCacheEntry {
+    pending_render_task_id: Option<RenderTaskId>,
+    user_data: Option<[f32; 3]>,
+    is_opaque: bool,
     pub handle: TextureCacheHandle,
 }
 
+#[derive(Debug)]
+pub enum RenderTaskCacheMarker {}
+
 // A cache of render tasks that are stored in the texture
 // cache for usage across frames.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTaskCache {
-    map: FastHashMap<RenderTaskCacheKey, FreeListHandle<RenderTaskCacheEntry>>,
-    cache_entries: FreeList<RenderTaskCacheEntry>,
+    map: FastHashMap<RenderTaskCacheKey, FreeListHandle<RenderTaskCacheMarker>>,
+    cache_entries: FreeList<RenderTaskCacheEntry, RenderTaskCacheMarker>,
 }
 
+pub type RenderTaskCacheEntryHandle = WeakFreeListHandle<RenderTaskCacheMarker>;
+
 impl RenderTaskCache {
     pub fn new() -> Self {
         RenderTaskCache {
             map: FastHashMap::default(),
             cache_entries: FreeList::new(),
         }
     }
 
@@ -920,102 +949,137 @@ impl RenderTaskCache {
         }
 
         for key in &keys_to_remove {
             let handle = self.map.remove(key).unwrap();
             self.cache_entries.free(handle);
         }
     }
 
+    pub fn update(
+        &mut self,
+        gpu_cache: &mut GpuCache,
+        texture_cache: &mut TextureCache,
+        render_tasks: &mut RenderTaskTree,
+    ) {
+        // Iterate the list of render task cache entries,
+        // and allocate / update the texture cache location
+        // if the entry has been evicted or not yet allocated.
+        for (_, handle) in &self.map {
+            let entry = self.cache_entries.get_mut(handle);
+
+            if let Some(pending_render_task_id) = entry.pending_render_task_id.take() {
+                let render_task = &mut render_tasks[pending_render_task_id];
+                let target_kind = render_task.target_kind();
+
+                // Find out what size to alloc in the texture cache.
+                let size = match render_task.location {
+                    RenderTaskLocation::Fixed(..) |
+                    RenderTaskLocation::TextureCache(..) => {
+                        panic!("BUG: dynamic task was expected");
+                    }
+                    RenderTaskLocation::Dynamic(_, None) => {
+                        panic!("BUG: must have assigned size by now");
+                    }
+                    RenderTaskLocation::Dynamic(_, Some(size)) => size,
+                };
+
+                // Select the right texture page to allocate from.
+                let image_format = match target_kind {
+                    RenderTargetKind::Color => ImageFormat::BGRA8,
+                    RenderTargetKind::Alpha => ImageFormat::R8,
+                };
+
+                let descriptor = ImageDescriptor::new(
+                    size.width as u32,
+                    size.height as u32,
+                    image_format,
+                    entry.is_opaque,
+                    false,
+                );
+
+                // Allocate space in the texture cache, but don't supply
+                // and CPU-side data to be uploaded.
+                texture_cache.update(
+                    &mut entry.handle,
+                    descriptor,
+                    TextureFilter::Linear,
+                    None,
+                    entry.user_data.unwrap_or([0.0; 3]),
+                    None,
+                    gpu_cache,
+                    None,
+                );
+
+                // Get the allocation details in the texture cache, and store
+                // this in the render task. The renderer will draw this
+                // task into the appropriate layer and rect of the texture
+                // cache on this frame.
+                let (texture_id, texture_layer, uv_rect) =
+                    texture_cache.get_cache_location(&entry.handle);
+
+                render_task.location = RenderTaskLocation::TextureCache(
+                    texture_id,
+                    texture_layer,
+                    uv_rect.to_i32()
+                );
+            }
+        }
+    }
+
     pub fn request_render_task<F>(
         &mut self,
         key: RenderTaskCacheKey,
         texture_cache: &mut TextureCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         user_data: Option<[f32; 3]>,
+        is_opaque: bool,
         mut f: F,
-    ) -> Result<CacheItem, ()>
-         where F: FnMut(&mut RenderTaskTree) -> Result<(RenderTaskId, bool), ()> {
+    ) -> Result<RenderTaskCacheEntryHandle, ()>
+         where F: FnMut(&mut RenderTaskTree) -> Result<RenderTaskId, ()> {
         // Get the texture cache handle for this cache key,
         // or create one.
         let cache_entries = &mut self.cache_entries;
         let entry_handle = self.map
                                .entry(key)
                                .or_insert_with(|| {
                                     let entry = RenderTaskCacheEntry {
                                         handle: TextureCacheHandle::new(),
+                                        pending_render_task_id: None,
+                                        user_data,
+                                        is_opaque,
                                     };
                                     cache_entries.insert(entry)
                                 });
         let cache_entry = cache_entries.get_mut(entry_handle);
 
-        // Check if this texture cache handle is valid.
-        if texture_cache.request(&cache_entry.handle, gpu_cache) {
-            // Invoke user closure to get render task chain
-            // to draw this into the texture cache.
-            let (render_task_id, is_opaque) = try!(f(render_tasks));
-            let render_task = &mut render_tasks[render_task_id];
-
-            // Select the right texture page to allocate from.
-            let image_format = match render_task.target_kind() {
-                RenderTargetKind::Color => ImageFormat::BGRA8,
-                RenderTargetKind::Alpha => ImageFormat::R8,
-            };
-
-            // Find out what size to alloc in the texture cache.
-            let size = match render_task.location {
-                RenderTaskLocation::Fixed(..) |
-                RenderTaskLocation::TextureCache(..) => {
-                    panic!("BUG: dynamic task was expected");
-                }
-                RenderTaskLocation::Dynamic(_, size) => size,
-            };
+        if cache_entry.pending_render_task_id.is_none() {
+            // Check if this texture cache handle is valid.
+            if texture_cache.request(&cache_entry.handle, gpu_cache) {
+                // Invoke user closure to get render task chain
+                // to draw this into the texture cache.
+                let render_task_id = try!(f(render_tasks));
 
-            // TODO(gw): Support color tasks in the texture cache,
-            //           and perhaps consider if we can determine
-            //           if some tasks are opaque as an optimization.
-            let descriptor = ImageDescriptor::new(
-                size.width as u32,
-                size.height as u32,
-                image_format,
-                is_opaque,
-                false,
-            );
-
-            // Allocate space in the texture cache, but don't supply
-            // and CPU-side data to be uploaded.
-            texture_cache.update(
-                &mut cache_entry.handle,
-                descriptor,
-                TextureFilter::Linear,
-                None,
-                user_data.unwrap_or([0.0; 3]),
-                None,
-                gpu_cache,
-                None,
-            );
-
-            // Get the allocation details in the texture cache, and store
-            // this in the render task. The renderer will draw this
-            // task into the appropriate layer and rect of the texture
-            // cache on this frame.
-            let (texture_id, texture_layer, uv_rect) =
-                texture_cache.get_cache_location(&cache_entry.handle);
-
-            render_task.location = RenderTaskLocation::TextureCache(
-                texture_id,
-                texture_layer,
-                uv_rect.to_i32()
-            );
+                cache_entry.pending_render_task_id = Some(render_task_id);
+                cache_entry.user_data = user_data;
+                cache_entry.is_opaque = is_opaque;
+            }
         }
 
-        // Finally, return the texture cache handle that we know
-        // is now up to date.
-        Ok(texture_cache.get(&cache_entry.handle))
+        Ok(entry_handle.weak())
+    }
+
+    pub fn get_cache_entry(
+        &self,
+        handle: &RenderTaskCacheEntryHandle,
+    ) -> &RenderTaskCacheEntry {
+        self.cache_entries
+            .get_opt(handle)
+            .expect("bug: invalid render task cache handle")
     }
 
     #[allow(dead_code)]
     pub fn get_cache_item_for_render_task(&self,
                                           texture_cache: &TextureCache,
                                           key: &RenderTaskCacheKey)
                                           -> CacheItem {
         // Get the texture cache handle for this cache key.
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -6,17 +6,17 @@
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use api::{BlobImageRenderer, ColorF, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, Epoch, ExternalImageId};
-use api::{ExternalImageType, FontRenderMode, ImageFormat, PipelineId};
+use api::{ExternalImageType, FontRenderMode, FrameMsg, ImageFormat, PipelineId};
 use api::{RenderApiSender, RenderNotifier, TexelRect, TextureTarget};
 use api::{channel};
 use api::DebugCommand;
 use api::channel::PayloadReceiverHelperMethods;
 use batch::{BatchKey, BatchKind, BatchTextures, BrushBatchKind, TransformBatchKind};
 #[cfg(any(feature = "capture", feature = "replay"))]
 use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
 use debug_colors;
@@ -59,17 +59,17 @@ use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
 use tiling::{BlitJob, BlitJobSource, RenderPass, RenderPassKind, RenderTargetList};
-use tiling::{Frame, RenderTarget, ScalingInfo, TextureCacheRenderTarget};
+use tiling::{Frame, RenderTarget, RenderTargetKind, ScalingInfo, TextureCacheRenderTarget};
 #[cfg(not(feature = "pathfinder"))]
 use tiling::GlyphJob;
 use time::precise_time_ns;
 
 cfg_if! {
     if #[cfg(feature = "debugger")] {
         use serde_json;
         use debug_server::{self, DebugServer};
@@ -1602,16 +1602,17 @@ impl Renderer {
                     .exit_handler(move |idx| {
                         if let Some(ref thread_listener) = *thread_listener_for_rayon_end {
                             thread_listener.thread_stopped(&format!("WRWorker#{}", idx));
                         }
                     })
                     .build();
                 Arc::new(worker.unwrap())
             });
+        let sampler = options.sampler;
         let enable_render_on_scroll = options.enable_render_on_scroll;
 
         let blob_image_renderer = options.blob_image_renderer.take();
         let thread_listener_for_render_backend = thread_listener.clone();
         let thread_listener_for_scene_builder = thread_listener.clone();
         let scene_builder_hooks = options.scene_builder_hooks;
         let rb_thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0));
         let scene_thread_name = format!("WRSceneBuilder#{}", options.renderer_id.unwrap_or(0));
@@ -1654,16 +1655,17 @@ impl Renderer {
                 result_tx,
                 scene_tx,
                 scene_rx,
                 device_pixel_ratio,
                 resource_cache,
                 backend_notifier,
                 config,
                 recorder,
+                sampler,
                 enable_render_on_scroll,
             );
             backend.run(backend_profile_counters);
             if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                 thread_listener.thread_stopped(&rb_thread_name);
             }
         })?;
 
@@ -3075,17 +3077,22 @@ impl Renderer {
                     }
                     Entry::Occupied(mut entry) => {
                         let target = entry.get_mut();
                         target.last_access = frame_id;
                         target.fbo_id
                     }
                 };
                 let (src_rect, _) = render_tasks[output.task_id].get_target_rect();
-                let dest_rect = DeviceIntRect::new(DeviceIntPoint::zero(), output_size);
+                let mut dest_rect = DeviceIntRect::new(DeviceIntPoint::zero(), output_size);
+
+                // Invert Y coordinates, to correctly convert between coordinate systems.
+                dest_rect.origin.y += dest_rect.size.height;
+                dest_rect.size.height *= -1;
+
                 self.device.bind_read_target(render_target);
                 self.device.bind_external_draw_target(fbo_id);
                 self.device.blit_render_target(src_rect, dest_rect);
                 handler.unlock(output.pipeline_id);
             }
         }
     }
 
@@ -3330,18 +3337,20 @@ impl Renderer {
 
         // Handle any blits to this texture from child tasks.
         self.handle_blits(&target.blits, render_tasks);
 
         // Draw any blurs for this target.
         if !target.horizontal_blurs.is_empty() {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
-            self.shaders.cs_blur_a8
-                .bind(&mut self.device, &projection, &mut self.renderer_errors);
+            match target.target_kind {
+                RenderTargetKind::Alpha => &mut self.shaders.cs_blur_a8,
+                RenderTargetKind::Color => &mut self.shaders.cs_blur_rgba8,
+            }.bind(&mut self.device, &projection, &mut self.renderer_errors);
 
             self.draw_instanced_batch(
                 &target.horizontal_blurs,
                 VertexArrayKind::Blur,
                 &BatchTextures::no_texture(),
                 stats,
             );
         }
@@ -4018,16 +4027,32 @@ pub trait SceneBuilderHooks {
     /// loop of the scene builder thread, but outside of any specific message
     /// handler.
     fn poke(&self);
     /// This is called exactly once, when the scene builder thread is about to
     /// terminate.
     fn deregister(&self);
 }
 
+/// Allows callers to hook into the main render_backend loop and provide
+/// additional frame ops for generate_frame transactions. These functions
+/// are all called from the render backend thread.
+pub trait AsyncPropertySampler {
+    /// This is called exactly once, when the render backend thread is started
+    /// and before it processes anything.
+    fn register(&self);
+    /// This is called for each transaction with the generate_frame flag set
+    /// (i.e. that will trigger a render). The list of frame messages returned
+    /// are processed as though they were part of the original transaction.
+    fn sample(&self) -> Vec<FrameMsg>;
+    /// This is called exactly once, when the render backend thread is about to
+    /// terminate.
+    fn deregister(&self);
+}
+
 pub struct RendererOptions {
     pub device_pixel_ratio: f32,
     pub resource_override_path: Option<PathBuf>,
     pub enable_aa: bool,
     pub enable_dithering: bool,
     pub max_recorded_profiles: usize,
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
@@ -4043,16 +4068,17 @@ pub struct RendererOptions {
     pub recorder: Option<Box<ApiRecordingReceiver>>,
     pub thread_listener: Option<Box<ThreadListener + Send + Sync>>,
     pub enable_render_on_scroll: bool,
     pub cached_programs: Option<Rc<ProgramCache>>,
     pub debug_flags: DebugFlags,
     pub renderer_id: Option<u64>,
     pub disable_dual_source_blending: bool,
     pub scene_builder_hooks: Option<Box<SceneBuilderHooks + Send>>,
+    pub sampler: Option<Box<AsyncPropertySampler + Send>>,
 }
 
 impl Default for RendererOptions {
     fn default() -> Self {
         RendererOptions {
             device_pixel_ratio: 1.0,
             resource_override_path: None,
             enable_aa: true,
@@ -4075,16 +4101,17 @@ impl Default for RendererOptions {
             blob_image_renderer: None,
             recorder: None,
             thread_listener: None,
             enable_render_on_scroll: true,
             renderer_id: None,
             cached_programs: None,
             disable_dual_source_blending: false,
             scene_builder_hooks: None,
+            sampler: None,
         }
     }
 }
 
 #[cfg(not(feature = "debugger"))]
 pub struct DebugServer;
 
 #[cfg(not(feature = "debugger"))]
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -22,17 +22,18 @@ use device::TextureFilter;
 use glyph_cache::GlyphCache;
 #[cfg(not(feature = "pathfinder"))]
 use glyph_cache::GlyphCacheEntry;
 use glyph_rasterizer::{FontInstance, GlyphFormat, GlyphRasterizer, GlyphRequest};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
-use render_task::{RenderTaskCache, RenderTaskCacheKey, RenderTaskId, RenderTaskTree};
+use render_task::{RenderTaskCache, RenderTaskCacheKey, RenderTaskId};
+use render_task::{RenderTaskCacheEntry, RenderTaskCacheEntryHandle, RenderTaskTree};
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::cmp;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::mem;
 #[cfg(any(feature = "capture", feature = "replay"))]
 use std::path::PathBuf;
 use std::sync::{Arc, RwLock};
@@ -142,16 +143,39 @@ struct CachedImageInfo {
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ResourceClassCache<K: Hash + Eq, V, U: Default> {
     resources: FastHashMap<K, V>,
     pub user_data: U,
 }
 
+fn intersect_for_tile(
+    dirty: DeviceUintRect,
+    width: u32,
+    height: u32,
+    tile_size: TileSize,
+    tile_offset: TileOffset,
+
+) -> Option<DeviceUintRect> {
+        dirty.intersection(&DeviceUintRect::new(
+            DeviceUintPoint::new(
+                tile_offset.x as u32 * tile_size as u32,
+                tile_offset.y as u32 * tile_size as u32
+            ),
+            DeviceUintSize::new(width, height),
+        )).map(|mut r| {
+                // we can't translate by a negative size so do it manually
+                r.origin.x -= tile_offset.x as u32 * tile_size as u32;
+                r.origin.y -= tile_offset.y as u32 * tile_size as u32;
+                r
+            })
+}
+
+
 impl<K, V, U> ResourceClassCache<K, V, U>
 where
     K: Clone + Hash + Eq + Debug,
     U: Default,
 {
     pub fn new() -> ResourceClassCache<K, V, U> {
         ResourceClassCache {
             resources: FastHashMap::default(),
@@ -318,24 +342,26 @@ impl ResourceCache {
     // closure will be invoked to generate the render task
     // chain that is required to draw this task.
     pub fn request_render_task<F>(
         &mut self,
         key: RenderTaskCacheKey,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         user_data: Option<[f32; 3]>,
+        is_opaque: bool,
         mut f: F,
-    ) -> CacheItem where F: FnMut(&mut RenderTaskTree) -> (RenderTaskId, bool) {
+    ) -> RenderTaskCacheEntryHandle where F: FnMut(&mut RenderTaskTree) -> RenderTaskId {
         self.cached_render_tasks.request_render_task(
             key,
             &mut self.texture_cache,
             gpu_cache,
             render_tasks,
             user_data,
+            is_opaque,
             |render_task_tree| Ok(f(render_task_tree))
         ).expect("Failed to request a render task from the resource cache!")
     }
 
     pub fn update_resources(
         &mut self,
         updates: ResourceUpdates,
         profile_counters: &mut ResourceProfileCounters,
@@ -604,29 +630,37 @@ impl ResourceCache {
             return;
         }
 
         // We can start a worker thread rasterizing right now, if:
         //  - The image is a blob.
         //  - The blob hasn't already been requested this frame.
         if self.pending_image_requests.insert(request) && template.data.is_blob() {
             if let Some(ref mut renderer) = self.blob_image_renderer {
+                let mut dirty_rect = template.dirty_rect;
                 let (offset, w, h) = match template.tiling {
                     Some(tile_size) => {
                         let tile_offset = request.tile.unwrap();
                         let (w, h) = compute_tile_size(
                             &template.descriptor,
                             tile_size,
                             tile_offset,
                         );
                         let offset = DevicePoint::new(
                             tile_offset.x as f32 * tile_size as f32,
                             tile_offset.y as f32 * tile_size as f32,
                         );
 
+                        if let Some(dirty) = dirty_rect {
+                            dirty_rect = intersect_for_tile(dirty, w, h, tile_size, tile_offset);
+                            if dirty_rect.is_none() {
+                                return
+                            }
+                        }
+
                         (offset, w, h)
                     }
                     None => (
                         DevicePoint::zero(),
                         template.descriptor.width,
                         template.descriptor.height,
                     ),
                 };
@@ -635,17 +669,17 @@ impl ResourceCache {
                     &self.resources,
                     request.into(),
                     &BlobImageDescriptor {
                         width: w,
                         height: h,
                         offset,
                         format: template.descriptor.format,
                     },
-                    template.dirty_rect,
+                    dirty_rect,
                 );
             }
         }
     }
 
     pub fn request_glyphs(
         &mut self,
         mut font: FontInstance,
@@ -805,16 +839,27 @@ impl ResourceCache {
                 Ok(self.texture_cache.get(&image_info.texture_cache_handle))
             }
             Err(_) => {
                 Err(())
             }
         }
     }
 
+    pub fn get_cached_render_task(
+        &self,
+        handle: &RenderTaskCacheEntryHandle,
+    ) -> &RenderTaskCacheEntry {
+        self.cached_render_tasks.get_cache_entry(handle)
+    }
+
+    pub fn get_texture_cache_item(&self, handle: &TextureCacheHandle) -> CacheItem {
+        self.texture_cache.get(handle)
+    }
+
     pub fn get_image_properties(&self, image_key: ImageKey) -> Option<ImageProperties> {
         let image_template = &self.resources.image_templates.get(image_key);
 
         image_template.map(|image_template| {
             let external_image = match image_template.data {
                 ImageData::External(ext_image) => match ext_image.image_type {
                     ExternalImageType::TextureHandle(_) => Some(ext_image),
                     // external buffer uses resource_cache.
@@ -881,23 +926,30 @@ impl ResourceCache {
             gpu_cache,
             &mut self.cached_render_tasks,
             render_tasks,
             texture_cache_profile,
         );
 
         // Apply any updates of new / updated images (incl. blobs) to the texture cache.
         self.update_texture_cache(gpu_cache);
+        render_tasks.prepare_for_render();
+        self.cached_render_tasks.update(
+            gpu_cache,
+            &mut self.texture_cache,
+            render_tasks,
+        );
         self.texture_cache.end_frame(texture_cache_profile);
     }
 
     fn update_texture_cache(&mut self, gpu_cache: &mut GpuCache) {
         for request in self.pending_image_requests.drain() {
             let image_template = self.resources.image_templates.get_mut(request.key).unwrap();
             debug_assert!(image_template.data.uses_texture_cache());
+            let mut dirty_rect = image_template.dirty_rect;
 
             let image_data = match image_template.data {
                 ImageData::Raw(..) | ImageData::External(..) => {
                     // Safe to clone here since the Raw image data is an
                     // Arc, and the external image data is small.
                     image_template.data.clone()
                 }
                 ImageData::Blob(..) => {
@@ -930,16 +982,23 @@ impl ResourceCache {
 
             let descriptor = if let Some(tile) = request.tile {
                 let tile_size = image_template.tiling.unwrap();
                 let image_descriptor = &image_template.descriptor;
 
                 let (actual_width, actual_height) =
                     compute_tile_size(image_descriptor, tile_size, tile);
 
+                if let Some(dirty) = dirty_rect {
+                    dirty_rect = intersect_for_tile(dirty, actual_width, actual_height, tile_size, tile);
+                    if dirty_rect.is_none() {
+                        continue
+                    }
+                }
+
                 // The tiled image could be stored on the CPU as one large image or be
                 // already broken up into tiles. This affects the way we compute the stride
                 // and offset.
                 let tiled_on_cpu = image_template.data.is_blob();
 
                 let (stride, offset) = if tiled_on_cpu {
                     (image_descriptor.stride, 0)
                 } else {
@@ -990,17 +1049,17 @@ impl ResourceCache {
 
             let entry = self.cached_images.get_mut(&request).as_mut().unwrap();
             self.texture_cache.update(
                 &mut entry.texture_cache_handle,
                 descriptor,
                 filter,
                 Some(image_data),
                 [0.0; 3],
-                image_template.dirty_rect,
+                dirty_rect,
                 gpu_cache,
                 None,
             );
             image_template.dirty_rect = None;
         }
     }
 
     pub fn end_frame(&mut self) {
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -78,16 +78,19 @@ enum EntryKind {
         origin: DeviceUintPoint,
         // The layer index of the texture array.
         layer_index: u16,
         // The region that this entry belongs to in the layer.
         region_index: u16,
     },
 }
 
+#[derive(Debug)]
+pub enum CacheEntryMarker {}
+
 // Stores information related to a single entry in the texture
 // cache. This is stored for each item whether it's in the shared
 // cache or a standalone texture.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct CacheEntry {
     // Size the requested item, in device pixels.
@@ -158,17 +161,17 @@ impl CacheEntry {
 
     fn evict(&self) {
         if let Some(eviction_notice) = self.eviction_notice.as_ref() {
             eviction_notice.notify();
         }
     }
 }
 
-type WeakCacheEntryHandle = WeakFreeListHandle<CacheEntry>;
+type WeakCacheEntryHandle = WeakFreeListHandle<CacheEntryMarker>;
 
 // A texture cache handle is a weak reference to a cache entry.
 // If the handle has not been inserted into the cache yet, the
 // value will be None. Even when the value is Some(), the location
 // may not actually be valid if it has been evicted by the cache.
 // In this case, the cache handle needs to re-upload this item
 // to the texture cache (see request() below).
 #[derive(Debug)]
@@ -235,27 +238,27 @@ pub struct TextureCache {
     #[cfg_attr(feature = "serde", serde(skip))]
     pending_updates: TextureUpdateList,
 
     // The current frame ID. Used for cache eviction policies.
     frame_id: FrameId,
 
     // Maintains the list of all current items in
     // the texture cache.
-    entries: FreeList<CacheEntry>,
+    entries: FreeList<CacheEntry, CacheEntryMarker>,
 
     // A list of the strong handles of items that were
     // allocated in the standalone texture pool. Used
     // for evicting old standalone textures.
-    standalone_entry_handles: Vec<FreeListHandle<CacheEntry>>,
+    standalone_entry_handles: Vec<FreeListHandle<CacheEntryMarker>>,
 
     // A list of the strong handles of items that were
     // allocated in the shared texture cache. Used
     // for evicting old cache items.
-    shared_entry_handles: Vec<FreeListHandle<CacheEntry>>,
+    shared_entry_handles: Vec<FreeListHandle<CacheEntryMarker>>,
 }
 
 impl TextureCache {
     pub fn new(max_texture_size: u32) -> Self {
         TextureCache {
             max_texture_size,
             array_a8_linear: TextureArray::new(
                 ImageFormat::R8,
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -618,27 +618,26 @@ impl RenderTarget for AlphaRenderTarget 
     fn needs_depth(&self) -> bool {
         false
     }
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct TextureCacheRenderTarget {
+    pub target_kind: RenderTargetKind,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub blits: Vec<BlitJob>,
     pub glyphs: Vec<GlyphJob>,
 }
 
 impl TextureCacheRenderTarget {
-    fn new(
-        _size: Option<DeviceUintSize>,
-        _screen_size: DeviceIntSize,
-    ) -> Self {
+    fn new(target_kind: RenderTargetKind) -> Self {
         TextureCacheRenderTarget {
+            target_kind,
             horizontal_blurs: vec![],
             blits: vec![],
             glyphs: vec![],
         }
     }
 
     fn add_task(
         &mut self,
@@ -821,60 +820,57 @@ impl RenderPass {
                 // Step through each task, adding to batches as appropriate.
                 for &task_id in &self.tasks {
                     let (target_kind, texture_target) = {
                         let task = &mut render_tasks[task_id];
                         let target_kind = task.target_kind();
 
                         // Find a target to assign this task to, or create a new
                         // one if required.
-                        let (target_kind, texture_target) = match task.location {
+                        let texture_target = match task.location {
                             RenderTaskLocation::TextureCache(texture_id, layer, _) => {
-                                // TODO(gw): When we support caching color items, we will
-                                //           need to calculate that here to get the
-                                //           correct target kind.
-                                (RenderTargetKind::Alpha, Some((texture_id, layer)))
+                                Some((texture_id, layer))
                             }
                             RenderTaskLocation::Fixed(..) => {
-                                (RenderTargetKind::Color, None)
+                                None
                             }
                             RenderTaskLocation::Dynamic(ref mut origin, size) => {
+                                let size = size.expect("bug: size must be assigned by now");
                                 let alloc_size = DeviceUintSize::new(size.width as u32, size.height as u32);
                                 let (alloc_origin, target_index) =  match target_kind {
                                     RenderTargetKind::Color => color.allocate(alloc_size),
                                     RenderTargetKind::Alpha => alpha.allocate(alloc_size),
                                 };
                                 *origin = Some((alloc_origin.to_i32(), target_index));
-
-                                (target_kind, None)
+                                None
                             }
                         };
 
                         // Replace the pending saved index with a real one
                         if let Some(index) = task.saved_index {
                             assert_eq!(index, SavedTargetIndex::PENDING);
                             task.saved_index = match target_kind {
                                 RenderTargetKind::Color => saved_color,
                                 RenderTargetKind::Alpha => saved_alpha,
                             };
                         }
 
                         // Give the render task an opportunity to add any
                         // information to the GPU cache, if appropriate.
-                        task.prepare_for_render(gpu_cache);
+                        task.write_gpu_blocks(gpu_cache);
 
                         (target_kind, texture_target)
                     };
 
                     match texture_target {
                         Some(texture_target) => {
                             let texture = texture_cache
                                 .entry(texture_target)
                                 .or_insert(
-                                    TextureCacheRenderTarget::new(None, DeviceIntSize::zero())
+                                    TextureCacheRenderTarget::new(target_kind)
                                 );
                             texture.add_task(task_id, render_tasks);
                         }
                         None => {
                             match target_kind {
                                 RenderTargetKind::Color => color.add_task(
                                     task_id,
                                     ctx,
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -333,16 +333,21 @@ impl Transaction {
     }
 
     /// Enable copying of the output of this pipeline id to
     /// an external texture for callers to consume.
     pub fn enable_frame_output(&mut self, pipeline_id: PipelineId, enable: bool) {
         self.frame_ops.push(FrameMsg::EnableFrameOutput(pipeline_id, enable));
     }
 
+    /// Consumes this object and just returns the frame ops.
+    pub fn get_frame_ops(self) -> Vec<FrameMsg> {
+        self.frame_ops
+    }
+
     fn finalize(self) -> (TransactionMsg, Vec<Payload>) {
         (
             TransactionMsg {
                 scene_ops: self.scene_ops,
                 frame_ops: self.frame_ops,
                 resource_updates: self.resource_updates,
                 use_scene_builder_thread: self.use_scene_builder_thread,
                 generate_frame: self.generate_frame,
--- a/gfx/webrender_api/src/units.rs
+++ b/gfx/webrender_api/src/units.rs
@@ -30,16 +30,25 @@ pub type DeviceUintRect = TypedRect<u32,
 pub type DeviceUintPoint = TypedPoint2D<u32, DevicePixel>;
 pub type DeviceUintSize = TypedSize2D<u32, DevicePixel>;
 
 pub type DeviceRect = TypedRect<f32, DevicePixel>;
 pub type DevicePoint = TypedPoint2D<f32, DevicePixel>;
 pub type DeviceVector2D = TypedVector2D<f32, DevicePixel>;
 pub type DeviceSize = TypedSize2D<f32, DevicePixel>;
 
+/// Geometry in the coordinate system of a Picture (intermediate
+/// surface) in physical pixels.
+#[derive(Hash, Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub struct PicturePixel;
+
+pub type PictureIntRect = TypedRect<i32, PicturePixel>;
+pub type PictureIntPoint = TypedPoint2D<i32, PicturePixel>;
+pub type PictureIntSize = TypedSize2D<i32, PicturePixel>;
+
 /// Geometry in a stacking context's local coordinate space (logical pixels).
 ///
 /// For now layout pixels are equivalent to layer pixels, but it may change.
 pub type LayoutPixel = LayerPixel;
 
 pub type LayoutRect = LayerRect;
 pub type LayoutPoint = LayerPoint;
 pub type LayoutVector2D = LayerVector2D;
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-6f997974cec5772b1797725f4a7942d742e7d7ff
+5bcb7f46c6931633fd20813c46cd69af164effe7
--- a/gfx/wrench/src/blob.rs
+++ b/gfx/wrench/src/blob.rs
@@ -25,53 +25,58 @@ fn deserialize_blob(blob: &[u8]) -> Resu
 }
 
 // This is the function that applies the deserialized drawing commands and generates
 // actual image data.
 fn render_blob(
     color: ColorU,
     descriptor: &BlobImageDescriptor,
     tile: Option<TileOffset>,
+    dirty_rect: Option<DeviceUintRect>,
 ) -> BlobImageResult {
     // Allocate storage for the result. Right now the resource cache expects the
     // tiles to have have no stride or offset.
-    let mut texels = Vec::with_capacity((descriptor.width * descriptor.height * 4) as usize);
+    let mut texels = vec![0u8; (descriptor.width * descriptor.height * descriptor.format.bytes_per_pixel()) as usize];
 
     // Generate a per-tile pattern to see it in the demo. For a real use case it would not
     // make sense for the rendered content to depend on its tile.
     let tile_checker = match tile {
         Some(tile) => (tile.x % 2 == 0) != (tile.y % 2 == 0),
         None => true,
     };
 
-    for y in 0 .. descriptor.height {
-        for x in 0 .. descriptor.width {
+    let dirty_rect = dirty_rect.unwrap_or(DeviceUintRect::new(
+        DeviceUintPoint::new(0, 0),
+        DeviceUintSize::new(descriptor.width, descriptor.height)));
+
+    for y in dirty_rect.min_y() .. dirty_rect.max_y() {
+        for x in dirty_rect.min_x() .. dirty_rect.max_x() {
             // Apply the tile's offset. This is important: all drawing commands should be
             // translated by this offset to give correct results with tiled blob images.
             let x2 = x + descriptor.offset.x as u32;
             let y2 = y + descriptor.offset.y as u32;
 
             // Render a simple checkerboard pattern
             let checker = if (x2 % 20 >= 10) != (y2 % 20 >= 10) {
                 1
             } else {
                 0
             };
             // ..nested in the per-tile checkerboard pattern
             let tc = if tile_checker { 0 } else { (1 - checker) * 40 };
 
             match descriptor.format {
                 ImageFormat::BGRA8 => {
-                    texels.push(color.b * checker + tc);
-                    texels.push(color.g * checker + tc);
-                    texels.push(color.r * checker + tc);
-                    texels.push(color.a * checker + tc);
+                    texels[((y * descriptor.width + x) * 4 + 0) as usize] = color.b * checker + tc;
+                    texels[((y * descriptor.width + x) * 4 + 1) as usize] = color.g * checker + tc;
+                    texels[((y * descriptor.width + x) * 4 + 2) as usize] = color.r * checker + tc;
+                    texels[((y * descriptor.width + x) * 4 + 3) as usize] = color.a * checker + tc;
                 }
                 ImageFormat::R8 => {
-                    texels.push(color.a * checker + tc);
+                    texels[(y * descriptor.width + x) as usize] = color.a * checker + tc;
                 }
                 _ => {
                     return Err(BlobImageError::Other(
                         format!("Unsupported image format {:?}", descriptor.format),
                     ));
                 }
             }
         }
@@ -130,28 +135,28 @@ impl BlobImageRenderer for CheckerboardR
         self.image_cmds.remove(&key);
     }
 
     fn request(
         &mut self,
         _resources: &BlobImageResources,
         request: BlobImageRequest,
         descriptor: &BlobImageDescriptor,
-        _dirty_rect: Option<DeviceUintRect>,
+        dirty_rect: Option<DeviceUintRect>,
     ) {
         (self.callbacks.lock().unwrap().request)(&request);
         assert!(!self.rendered_images.contains_key(&request));
         // This method is where we kick off our rendering jobs.
         // It should avoid doing work on the calling thread as much as possible.
         // In this example we will use the thread pool to render individual tiles.
 
         // Gather the input data to send to a worker thread.
         let cmds = self.image_cmds.get(&request.key).unwrap();
 
-        let result = render_blob(*cmds, descriptor, request.tile);
+        let result = render_blob(*cmds, descriptor, request.tile, dirty_rect);
 
         self.rendered_images.insert(request, result);
     }
 
     fn resolve(&mut self, request: BlobImageRequest) -> BlobImageResult {
         (self.callbacks.lock().unwrap().resolve)();
         self.rendered_images.remove(&request).unwrap()
     }
--- a/gfx/wrench/src/rawtest.rs
+++ b/gfx/wrench/src/rawtest.rs
@@ -158,42 +158,43 @@ impl<'a> RawtestHarness<'a> {
             AlphaType::PremultipliedAlpha,
             blob_img,
         );
 
         let mut epoch = Epoch(0);
 
         self.submit_dl(&mut epoch, layout_size, builder, Some(resources));
 
+        let called = Arc::new(AtomicIsize::new(0));
+        let called_inner = Arc::clone(&called);
+
+        self.wrench.callbacks.lock().unwrap().request = Box::new(move |_| {
+            called_inner.fetch_add(1, Ordering::SeqCst);
+        });
+
+        let pixels_first = self.render_and_get_pixels(window_rect);
+
+        assert!(called.load(Ordering::SeqCst) == 1);
+
         // draw the blob image a second time at a different location
 
         // make a new display list that refers to the first image
         let mut builder = DisplayListBuilder::new(self.wrench.root_pipeline_id, layout_size);
         let info = LayoutPrimitiveInfo::new(rect(1.0, 60.0, 200.0, 200.0));
         builder.push_image(
             &info,
             size(200.0, 200.0),
             size(0.0, 0.0),
             ImageRendering::Auto,
             AlphaType::PremultipliedAlpha,
             blob_img,
         );
 
         self.submit_dl(&mut epoch, layout_size, builder, None);
 
-        let called = Arc::new(AtomicIsize::new(0));
-        let called_inner = Arc::clone(&called);
-
-        self.wrench.callbacks.lock().unwrap().request = Box::new(move |_| {
-            called_inner.fetch_add(1, Ordering::SeqCst);
-        });
-
-        let pixels_first = self.render_and_get_pixels(window_rect);
-        assert!(called.load(Ordering::SeqCst) == 1);
-
         let pixels_second = self.render_and_get_pixels(window_rect);
 
         // make sure we only requested once
         assert!(called.load(Ordering::SeqCst) == 1);
 
         // use png;
         // png::save_flipped("out1.png", &pixels_first, window_rect.size);
         // png::save_flipped("out2.png", &pixels_second, window_rect.size);