Bug 1431776 - Update webrender to commit c0943271eb8c6440a61db37e2f1e84201dcac2e3. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Wed, 24 Jan 2018 09:13:07 -0500
changeset 724094 7fde47cf400cdffc3e0646c9dd6d87504a0df0cc
parent 724080 0e62eb7804c00c0996a9bdde5350328a384fb7af
child 724095 b243aa977463889202fe6b0aad3eca308eb8b3f1
push id96642
push userkgupta@mozilla.com
push dateWed, 24 Jan 2018 14:17:59 +0000
reviewersjrmuizel
bugs1431776
milestone60.0a1
Bug 1431776 - Update webrender to commit c0943271eb8c6440a61db37e2f1e84201dcac2e3. r?jrmuizel MozReview-Commit-ID: 57ZV2mKiGm0
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/frame_output.rs
gfx/webrender/examples/texture_cache_stress.rs
gfx/webrender/res/brush_picture.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_image.glsl
gfx/webrender/res/ps_yuv_image.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/border.rs
gfx/webrender/src/capture.rs
gfx/webrender/src/debug_render.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_cache.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/color.rs
gfx/webrender_api/src/image.rs
gfx/webrender_api/src/units.rs
gfx/webrender_bindings/Cargo.toml
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-e9269c7e06e20363be0b2a2a1be98d292ff7acca
+c0943271eb8c6440a61db37e2f1e84201dcac2e3
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -14,26 +14,27 @@ debugger = ["ws", "serde_json", "serde",
 capture = ["webrender_api/debug-serialization", "ron", "serde"]
 
 [dependencies]
 app_units = "0.6"
 bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.16"
 fxhash = "0.2.1"
-gleam = "0.4.19"
+gleam = "0.4.20"
 lazy_static = "1"
 log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
 bitflags = "1.0"
 thread_profiler = "0.1.1"
 plane-split = "0.7"
+png = { optional = true, version = "0.11" }
 smallvec = "0.6"
 ws = { optional = true, version = "0.7.3" }
 serde_json = { optional = true, version = "1.0" }
 serde = { optional = true, version = "1.0", features = ["serde_derive"] }
 image = { optional = true, version = "0.17" }
 base64 = { optional = true, version = "0.3.0" }
 ron = { optional = true, version = "0.1.5" }
 
--- a/gfx/webrender/examples/frame_output.rs
+++ b/gfx/webrender/examples/frame_output.rs
@@ -46,20 +46,17 @@ impl webrender::OutputImageHandler for O
     }
 
     fn unlock(&mut self, _id: PipelineId) {}
 }
 
 impl webrender::ExternalImageHandler for ExternalHandler {
     fn lock(&mut self, _key: ExternalImageId, _channel_index: u8) -> webrender::ExternalImage {
         webrender::ExternalImage {
-            u0: 0.0,
-            v0: 0.0,
-            u1: 1.0,
-            v1: 1.0,
+            uv: TexelRect::new(0.0, 0.0, 1.0, 1.0),
             source: webrender::ExternalImageSource::NativeTexture(self.texture_id),
         }
     }
     fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {}
 }
 
 impl App {
     fn init_output_document(
@@ -72,17 +69,17 @@ impl App {
         self.external_image_key = Some(api.generate_image_key());
         let mut resources = ResourceUpdates::new();
         resources.add_image(
             self.external_image_key.unwrap(),
             ImageDescriptor::new(100, 100, ImageFormat::BGRA8, true),
             ImageData::External(ExternalImageData {
                 id: ExternalImageId(0),
                 channel_index: 0,
-                image_type: ExternalImageType::Texture2DHandle,
+                image_type: ExternalImageType::TextureHandle(TextureTarget::Default),
             }),
             None,
         );
 
         let pipeline_id = PipelineId(1, 0);
         let layer = 1;
         let color = ColorF::new(1., 1., 0., 1.);
         let bounds = DeviceUintRect::new(DeviceUintPoint::zero(), framebuffer_size);
--- a/gfx/webrender/examples/texture_cache_stress.rs
+++ b/gfx/webrender/examples/texture_cache_stress.rs
@@ -16,17 +16,17 @@ use webrender::api::*;
 
 struct ImageGenerator {
     patterns: [[u8; 3]; 6],
     next_pattern: usize,
     current_image: Vec<u8>,
 }
 
 impl ImageGenerator {
-    fn new() -> ImageGenerator {
+    fn new() -> Self {
         ImageGenerator {
             next_pattern: 0,
             patterns: [
                 [1, 0, 0],
                 [0, 1, 0],
                 [0, 0, 1],
                 [1, 1, 0],
                 [0, 1, 1],
@@ -58,20 +58,17 @@ impl ImageGenerator {
         mem::replace(&mut self.current_image, Vec::new())
     }
 }
 
 impl webrender::ExternalImageHandler for ImageGenerator {
     fn lock(&mut self, _key: ExternalImageId, channel_index: u8) -> webrender::ExternalImage {
         self.generate_image(channel_index as u32);
         webrender::ExternalImage {
-            u0: 0.0,
-            v0: 0.0,
-            u1: 1.0,
-            v1: 1.0,
+            uv: TexelRect::new(0.0, 0.0, 1.0, 1.0),
             source: webrender::ExternalImageSource::RawData(&self.current_image),
         }
     }
     fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {}
 }
 
 struct App {
     stress_keys: Vec<ImageKey>,
@@ -240,17 +237,17 @@ impl Example for App {
                         }
 
                         let size = 32;
                         let image_key = api.generate_image_key();
 
                         let image_data = ExternalImageData {
                             id: ExternalImageId(0),
                             channel_index: size as u8,
-                            image_type: ExternalImageType::ExternalBuffer,
+                            image_type: ExternalImageType::Buffer,
                         };
 
                         updates.add_image(
                             image_key,
                             ImageDescriptor::new(size, size, ImageFormat::BGRA8, true),
                             ImageData::External(image_data),
                             None,
                         );
@@ -285,17 +282,17 @@ impl Example for App {
         }
 
         false
     }
 
     fn get_image_handlers(
         &mut self,
         _gl: &gl::Gl,
-    ) -> (Option<Box<webrender::ExternalImageHandler>>, 
+    ) -> (Option<Box<webrender::ExternalImageHandler>>,
           Option<Box<webrender::OutputImageHandler>>) {
         (Some(Box::new(ImageGenerator::new())), None)
     }
 }
 
 fn main() {
     let mut app = App {
         image_key: None,
--- a/gfx/webrender/res/brush_picture.glsl
+++ b/gfx/webrender/res/brush_picture.glsl
@@ -58,23 +58,23 @@ void brush_vs(
 #if defined WR_FEATURE_COLOR_TARGET_ALPHA_MASK
     vColor = blur_task.color;
 #endif
     vec2 uv0 = blur_task.common_data.task_rect.p0;
     vec2 src_size = blur_task.common_data.task_rect.size * blur_task.scale_factor;
     vec2 uv1 = uv0 + blur_task.common_data.task_rect.size;
 #else
     Picture pic = fetch_picture(prim_address);
-    ImageResource uv_rect = fetch_image_resource(user_data.x);
+    ImageResource res = fetch_image_resource(user_data.x);
     vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
     vColor = pic.color;
-    vec2 uv0 = uv_rect.uv_rect.xy;
-    vec2 uv1 = uv_rect.uv_rect.zw;
-    vec2 src_size = (uv1 - uv0) * uv_rect.user_data.x;
-    vUv.z = uv_rect.layer;
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+    vec2 src_size = (uv1 - uv0) * res.user_data.x;
+    vUv.z = res.layer;
 #endif
 
     // TODO(gw): In the future we'll probably draw these as segments
     //           with the brush shader. When that occurs, we can
     //           modify the UVs for each segment in the VS, and the
     //           FS can become a simple shader that doesn't need
     //           to adjust the UVs.
 
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -33,19 +33,19 @@ void main(void) {
                                                scroll_node,
                                                area);
 
     vPos = vi.local_pos;
     vLayer = res.layer;
 
     vClipMaskUv = vec3((vPos.xy / vPos.z - local_rect.p0) / local_rect.size, 0.0);
     vec2 texture_size = vec2(textureSize(sColor0, 0));
-    vClipMaskUvRect = vec4(res.uv_rect.xy, res.uv_rect.zw - res.uv_rect.xy) / texture_size.xyxy;
+    vClipMaskUvRect = vec4(res.uv_rect.p0, res.uv_rect.p1 - res.uv_rect.p0) / texture_size.xyxy;
     // applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
-    vec4 inner_rect = vec4(res.uv_rect.xy, res.uv_rect.zw);
+    vec4 inner_rect = vec4(res.uv_rect.p0, res.uv_rect.p1);
     vClipMaskUvInnerRect = (inner_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float alpha = init_transform_fs(vPos.xy / vPos.z);
 
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -690,52 +690,53 @@ struct GlyphResource {
 };
 
 GlyphResource fetch_glyph_resource(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
     return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
 }
 
 struct ImageResource {
-    vec4 uv_rect;
+    RectWithEndpoint uv_rect;
     float layer;
     vec3 user_data;
 };
 
 ImageResource fetch_image_resource(int address) {
     //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
     vec4 data[2] = fetch_from_resource_cache_2(address);
-    return ImageResource(data[0], data[1].x, data[1].yzw);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
 }
 
 ImageResource fetch_image_resource_direct(ivec2 address) {
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
-    return ImageResource(data[0], data[1].x, data[1].yzw);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
 }
 
 struct TextRun {
     vec4 color;
     vec4 bg_color;
     vec2 offset;
 };
 
 TextRun fetch_text_run(int address) {
     vec4 data[3] = fetch_from_resource_cache_3(address);
     return TextRun(data[0], data[1], data[2].xy);
 }
 
 struct Image {
     vec4 stretch_size_and_tile_spacing;  // Size of the actual image and amount of space between
                                          //     tiled instances of this image.
-    vec4 sub_rect;                          // If negative, ignored.
 };
 
 Image fetch_image(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    return Image(data[0], data[1]);
+    vec4 data = fetch_from_resource_cache_1(address);
+    return Image(data);
 }
 
 void write_clip(vec2 global_pos, ClipArea area) {
     vec2 uv = global_pos +
               area.common_data.task_rect.p0 -
               area.screen_origin;
     vClipMaskUvBounds = vec4(
         area.common_data.task_rect.p0,
--- a/gfx/webrender/res/ps_image.glsl
+++ b/gfx/webrender/res/ps_image.glsl
@@ -45,25 +45,18 @@ void main(void) {
     // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
     // non-normalized texture coordinates.
 #ifdef WR_FEATURE_TEXTURE_RECT
     vec2 texture_size_normalization_factor = vec2(1, 1);
 #else
     vec2 texture_size_normalization_factor = vec2(textureSize(sColor0, 0));
 #endif
 
-    vec2 uv0, uv1;
-
-    if (image.sub_rect.x < 0.0) {
-        uv0 = res.uv_rect.xy;
-        uv1 = res.uv_rect.zw;
-    } else {
-        uv0 = res.uv_rect.xy + image.sub_rect.xy;
-        uv1 = res.uv_rect.xy + image.sub_rect.zw;
-    }
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
 
     // vUv will contain how many times this image has wrapped around the image size.
     vec2 st0 = uv0 / texture_size_normalization_factor;
     vec2 st1 = uv1 / texture_size_normalization_factor;
 
     vLayer = res.layer;
     vTextureSize = st1 - st0;
     vTextureOffset = st0;
--- a/gfx/webrender/res/ps_yuv_image.glsl
+++ b/gfx/webrender/res/ps_yuv_image.glsl
@@ -65,34 +65,34 @@ void main(void) {
 
     // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
     // non-normalized texture coordinates.
 #ifdef WR_FEATURE_TEXTURE_RECT
     vec2 y_texture_size_normalization_factor = vec2(1, 1);
 #else
     vec2 y_texture_size_normalization_factor = vec2(textureSize(sColor0, 0));
 #endif
-    vec2 y_st0 = y_rect.uv_rect.xy / y_texture_size_normalization_factor;
-    vec2 y_st1 = y_rect.uv_rect.zw / y_texture_size_normalization_factor;
+    vec2 y_st0 = y_rect.uv_rect.p0 / y_texture_size_normalization_factor;
+    vec2 y_st1 = y_rect.uv_rect.p1 / y_texture_size_normalization_factor;
 
     vTextureSizeY = y_st1 - y_st0;
     vTextureOffsetY = y_st0;
 
 #ifndef WR_FEATURE_INTERLEAVED_Y_CB_CR
     // This assumes the U and V surfaces have the same size.
 #ifdef WR_FEATURE_TEXTURE_RECT
     vec2 uv_texture_size_normalization_factor = vec2(1, 1);
 #else
     vec2 uv_texture_size_normalization_factor = vec2(textureSize(sColor1, 0));
 #endif
-    vec2 u_st0 = u_rect.uv_rect.xy / uv_texture_size_normalization_factor;
-    vec2 u_st1 = u_rect.uv_rect.zw / uv_texture_size_normalization_factor;
+    vec2 u_st0 = u_rect.uv_rect.p0 / uv_texture_size_normalization_factor;
+    vec2 u_st1 = u_rect.uv_rect.p1 / uv_texture_size_normalization_factor;
 
 #ifndef WR_FEATURE_NV12
-    vec2 v_st0 = v_rect.uv_rect.xy / uv_texture_size_normalization_factor;
+    vec2 v_st0 = v_rect.uv_rect.p0 / uv_texture_size_normalization_factor;
 #endif
 
     vTextureSizeUv = u_st1 - u_st0;
     vTextureOffsetU = u_st0;
 #ifndef WR_FEATURE_NV12
     vTextureOffsetV = v_st0;
 #endif
 #endif
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -1,36 +1,36 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, DeviceIntRect, DeviceIntSize, ImageKey, LayerToWorldScale};
-use api::{ExternalImageType, FilterOp, ImageRendering, LayerRect};
+use api::{DeviceUintRect, DeviceUintPoint, DeviceUintSize, ExternalImageType, FilterOp, ImageRendering, LayerRect};
 use api::{SubpixelDirection, TileOffset, YuvColorSpace, YuvFormat};
 use api::{LayerToWorldTransform, WorldPixel};
 use border::{BorderCornerInstance, BorderCornerSide, BorderEdgeKind};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::{CoordinateSystemId};
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
-use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
+use gpu_cache::{GpuCache, GpuCacheAddress};
 use gpu_types::{BrushImageKind, BrushInstance, ClipChainRectIndex};
 use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, PictureType};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use internal_types::{FastHashMap, SourceTexture};
 use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Polygon, Splitter};
-use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
+use prim_store::{ImageSource, PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushPrimitive, BrushKind, DeferredResolve, EdgeAaSegmentMask, PrimitiveRun};
 use render_task::{ClipWorkItem};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind};
 use render_task::{RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind};
 use renderer::BLOCKS_PER_UV_RECT;
-use resource_cache::{GlyphFetchResult, ResourceCache};
+use resource_cache::{CacheItem, GlyphFetchResult, ResourceCache};
 use std::{usize, f32, i32};
 use tiling::{RenderTargetContext, RenderTargetKind};
 use util::{MatrixHelpers, TransformedRectKind};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(i32::MAX as u32);
 
@@ -542,16 +542,34 @@ impl AlphaBatcher {
                     deferred_resolves,
                     splitter,
                     pic_type,
                 );
             }
         }
     }
 
+    fn get_buffer_kind(texture: SourceTexture) -> ImageBufferKind {
+        match texture {
+            SourceTexture::External(ext_image) => {
+                match ext_image.image_type {
+                    ExternalImageType::TextureHandle(target) => {
+                        target.into()
+                    }
+                    ExternalImageType::Buffer => {
+                        // The ExternalImageType::Buffer should be handled by resource_cache.
+                        // It should go through the non-external case.
+                        panic!("Unexpected non-texture handle type");
+                    }
+                }
+            }
+            _ => ImageBufferKind::Texture2DArray,
+        }
+    }
+
     // Adds a primitive to a batch.
     // It can recursively call itself in some situations, for
     // example if it encounters a picture where the items
     // in that picture are being drawn into the same target.
     fn add_prim_to_batch(
         &mut self,
         clip_chain_rect_index: ClipChainRectIndex,
         scroll_id: ClipScrollNodeIndex,
@@ -672,72 +690,51 @@ impl AlphaBatcher {
                           batch.push(base_instance.build(border_segment as i32, 0, 0));
                         }
                     }
                 }
             }
             PrimitiveKind::Image => {
                 let image_cpu = &ctx.prim_store.cpu_images[prim_metadata.cpu_prim_index.0];
 
-                let (color_texture_id, uv_address) = resolve_image(
-                    image_cpu.image_key,
-                    image_cpu.image_rendering,
-                    image_cpu.tile_offset,
-                    ctx.resource_cache,
-                    gpu_cache,
-                    deferred_resolves,
-                );
+                let cache_item = match image_cpu.source {
+                    ImageSource::Default => {
+                        resolve_image(
+                            image_cpu.key.image_key,
+                            image_cpu.key.image_rendering,
+                            image_cpu.key.tile_offset,
+                            ctx.resource_cache,
+                            gpu_cache,
+                            deferred_resolves,
+                        )
+                    }
+                    ImageSource::Cache { ref item, .. } => {
+                        item.clone()
+                    }
+                };
 
-                if color_texture_id == SourceTexture::Invalid {
+                if cache_item.texture_id == SourceTexture::Invalid {
                     warn!("Warnings: skip a PrimitiveKind::Image at {:?}.\n", item_bounding_rect);
                     return;
                 }
 
-                let batch_kind = match color_texture_id {
-                    SourceTexture::External(ext_image) => {
-                        match ext_image.image_type {
-                            ExternalImageType::Texture2DHandle => {
-                                TransformBatchKind::Image(ImageBufferKind::Texture2D)
-                            }
-                            ExternalImageType::Texture2DArrayHandle => {
-                                TransformBatchKind::Image(ImageBufferKind::Texture2DArray)
-                            }
-                            ExternalImageType::TextureRectHandle => {
-                                TransformBatchKind::Image(ImageBufferKind::TextureRect)
-                            }
-                            ExternalImageType::TextureExternalHandle => {
-                                TransformBatchKind::Image(ImageBufferKind::TextureExternal)
-                            }
-                            ExternalImageType::ExternalBuffer => {
-                                // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
-                                // It should go through the non-external case.
-                                panic!(
-                                    "Non-texture handle type should be handled in other way"
-                                );
-                            }
-                        }
-                    }
-                    _ => TransformBatchKind::Image(ImageBufferKind::Texture2DArray),
-                };
-
-                let textures = BatchTextures {
-                    colors: [
-                        color_texture_id,
-                        SourceTexture::Invalid,
-                        SourceTexture::Invalid,
-                    ],
-                };
-
+                let batch_kind = TransformBatchKind::Image(Self::get_buffer_kind(cache_item.texture_id));
                 let key = BatchKey::new(
                     BatchKind::Transformable(transform_kind, batch_kind),
                     blend_mode,
-                    textures,
+                    BatchTextures {
+                        colors: [
+                            cache_item.texture_id,
+                            SourceTexture::Invalid,
+                            SourceTexture::Invalid,
+                        ],
+                    },
                 );
                 let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
-                batch.push(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
+                batch.push(base_instance.build(cache_item.uv_rect_handle.as_int(gpu_cache), 0, 0));
             }
             PrimitiveKind::TextRun => {
                 let text_cpu =
                     &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
                 let is_shadow = pic_type == PictureType::TextShadow;
 
                 // TODO(gw): It probably makes sense to base this decision on the content
                 //           origin field in the future (once that's configurable).
@@ -1159,67 +1156,40 @@ impl AlphaBatcher {
                     &ctx.prim_store.cpu_yuv_images[prim_metadata.cpu_prim_index.0];
 
                 //yuv channel
                 let channel_count = image_yuv_cpu.format.get_plane_num();
                 debug_assert!(channel_count <= 3);
                 for channel in 0 .. channel_count {
                     let image_key = image_yuv_cpu.yuv_key[channel];
 
-                    let (texture, address) = resolve_image(
+                    let cache_item = resolve_image(
                         image_key,
                         image_yuv_cpu.image_rendering,
                         None,
                         ctx.resource_cache,
                         gpu_cache,
                         deferred_resolves,
                     );
 
-                    if texture == SourceTexture::Invalid {
+                    if cache_item.texture_id == SourceTexture::Invalid {
                         warn!("Warnings: skip a PrimitiveKind::YuvImage at {:?}.\n", item_bounding_rect);
                         return;
                     }
 
-                    textures.colors[channel] = texture;
-                    uv_rect_addresses[channel] = address.as_int(gpu_cache);
+                    textures.colors[channel] = cache_item.texture_id;
+                    uv_rect_addresses[channel] = cache_item.uv_rect_handle.as_int(gpu_cache);
                 }
 
-                let get_buffer_kind = |texture: SourceTexture| {
-                    match texture {
-                        SourceTexture::External(ext_image) => {
-                            match ext_image.image_type {
-                                ExternalImageType::Texture2DHandle => {
-                                    ImageBufferKind::Texture2D
-                                }
-                                ExternalImageType::Texture2DArrayHandle => {
-                                    ImageBufferKind::Texture2DArray
-                                }
-                                ExternalImageType::TextureRectHandle => {
-                                    ImageBufferKind::TextureRect
-                                }
-                                ExternalImageType::TextureExternalHandle => {
-                                    ImageBufferKind::TextureExternal
-                                }
-                                ExternalImageType::ExternalBuffer => {
-                                    // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
-                                    // It should go through the non-external case.
-                                    panic!("Unexpected non-texture handle type");
-                                }
-                            }
-                        }
-                        _ => ImageBufferKind::Texture2DArray,
-                    }
-                };
-
                 // All yuv textures should be the same type.
-                let buffer_kind = get_buffer_kind(textures.colors[0]);
+                let buffer_kind = Self::get_buffer_kind(textures.colors[0]);
                 assert!(
                     textures.colors[1 .. image_yuv_cpu.format.get_plane_num()]
                         .iter()
-                        .all(|&tid| buffer_kind == get_buffer_kind(tid))
+                        .all(|&tid| buffer_kind == Self::get_buffer_kind(tid))
                 );
 
                 let kind = BatchKind::Transformable(
                     transform_kind,
                     TransformBatchKind::YuvImage(
                         buffer_kind,
                         image_yuv_cpu.format,
                         image_yuv_cpu.color_space,
@@ -1394,52 +1364,67 @@ impl AlphaBatchHelpers for PrimitiveStor
                 }
             } else {
                 BlendMode::None
             },
         }
     }
 }
 
-fn resolve_image(
+pub fn resolve_image(
     image_key: ImageKey,
     image_rendering: ImageRendering,
     tile_offset: Option<TileOffset>,
     resource_cache: &ResourceCache,
     gpu_cache: &mut GpuCache,
     deferred_resolves: &mut Vec<DeferredResolve>,
-) -> (SourceTexture, GpuCacheHandle) {
+) -> CacheItem {
     match resource_cache.get_image_properties(image_key) {
         Some(image_properties) => {
             // Check if an external image that needs to be resolved
             // by the render thread.
             match image_properties.external_image {
                 Some(external_image) => {
                     // This is an external texture - we will add it to
                     // the deferred resolves list to be patched by
                     // the render thread...
                     let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
+                    let cache_item = CacheItem {
+                        texture_id: SourceTexture::External(external_image),
+                        uv_rect_handle: cache_handle,
+                        uv_rect: DeviceUintRect::new(
+                            DeviceUintPoint::zero(),
+                            DeviceUintSize::new(
+                                image_properties.descriptor.width,
+                                image_properties.descriptor.height,
+                            )
+                        ),
+                        texture_layer: 0,
+                    };
+
                     deferred_resolves.push(DeferredResolve {
                         image_properties,
                         address: gpu_cache.get_address(&cache_handle),
                     });
 
-                    (SourceTexture::External(external_image), cache_handle)
+                    cache_item
                 }
                 None => {
                     if let Ok(cache_item) = resource_cache.get_cached_image(image_key, image_rendering, tile_offset) {
-                        (cache_item.texture_id, cache_item.uv_rect_handle)
+                        cache_item
                     } else {
                         // There is no usable texture entry for the image key. Just return an invalid texture here.
-                        (SourceTexture::Invalid, GpuCacheHandle::new())
+                        CacheItem::invalid()
                     }
                 }
             }
         }
-        None => (SourceTexture::Invalid, GpuCacheHandle::new()),
+        None => {
+            CacheItem::invalid()
+        }
     }
 }
 
 /// Construct a polygon from stacking context boundaries.
 /// `anchor` here is an index that's going to be preserved in all the
 /// splits of the polygon.
 fn make_polygon(
     rect: LayerRect,
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -1,21 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF};
-use api::{LayerPoint, LayerRect};
-use api::{LayerPrimitiveInfo, LayerSize, NormalBorder, RepeatMode};
+use api::{LayerPoint, LayerRect, LayerPrimitiveInfo, LayerSize};
+use api::{NormalBorder, RepeatMode, TexelRect};
 use clip::ClipSource;
 use ellipse::Ellipse;
 use frame_builder::FrameBuilder;
 use gpu_cache::GpuDataRequest;
 use prim_store::{BorderPrimitiveCpu, BrushSegment, BrushSegmentDescriptor};
-use prim_store::{BrushClipMaskKind, EdgeAaSegmentMask, PrimitiveContainer, TexelRect};
+use prim_store::{BrushClipMaskKind, EdgeAaSegmentMask, PrimitiveContainer};
 use util::{lerp, pack_as_float};
 
 #[repr(u8)]
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BorderCornerInstance {
     None,
     Single, // Single instance needed - corner styles are same or similar.
     Double, // Different corner styles. Draw two instances, one per style.
--- a/gfx/webrender/src/capture.rs
+++ b/gfx/webrender/src/capture.rs
@@ -1,17 +1,19 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use std::fs::File;
 use std::io::{Read, Write};
 use std::path::{Path, PathBuf};
 
-use api::{CaptureBits, ExternalImageData, ImageDescriptor};
+use api::{CaptureBits, ExternalImageData, ExternalImageId, ImageDescriptor, TexelRect};
+#[cfg(feature = "png")]
+use device::ReadPixelsFormat;
 use ron::{de, ser};
 use serde::{Deserialize, Serialize};
 
 
 pub struct CaptureConfig {
     pub root: PathBuf,
     pub bits: CaptureBits,
     pretty: ser::PrettyConfig,
@@ -53,16 +55,67 @@ impl CaptureConfig {
             .with_extension("ron");
         File::open(path)
             .ok()?
             .read_to_string(&mut string)
             .unwrap();
         Some(de::from_str(&string)
             .unwrap())
     }
+
+    #[cfg(feature = "png")]
+    pub fn save_png(
+        path: PathBuf, size: (u32, u32), format: ReadPixelsFormat, data: &[u8],
+    ) {
+        use api::ImageFormat;
+        use png::{BitDepth, ColorType, Encoder, HasParameters};
+        use std::io::BufWriter;
+
+        let color_type = match format {
+            ReadPixelsFormat::Rgba8 => ColorType::RGBA,
+            ReadPixelsFormat::Standard(ImageFormat::BGRA8) => {
+                warn!("Unable to swizzle PNG of BGRA8 type");
+                ColorType::RGBA
+            },
+            ReadPixelsFormat::Standard(ImageFormat::R8) => ColorType::Grayscale,
+            ReadPixelsFormat::Standard(ImageFormat::RG8) => ColorType::GrayscaleAlpha,
+            ReadPixelsFormat::Standard(fm) => {
+                error!("Unable to save PNG of {:?}", fm);
+                return;
+            }
+        };
+        let w = BufWriter::new(File::create(path).unwrap());
+        let mut enc = Encoder::new(w, size.0, size.1);
+        enc
+            .set(color_type)
+            .set(BitDepth::Eight);
+        enc
+            .write_header()
+            .unwrap()
+            .write_image_data(&data)
+            .unwrap();
+    }
 }
 
+/// An image that `ResourceCache` is unable to resolve during a capture.
+/// The image has to be transferred to `Renderer` and locked with the
+/// external image handler to get the actual contents and serialize them.
 #[derive(Deserialize, Serialize)]
 pub struct ExternalCaptureImage {
     pub short_path: String,
     pub descriptor: ImageDescriptor,
     pub external: ExternalImageData,
 }
+
+/// A short description of an external image to be saved separately as
+/// "externals/XX.ron", redirecting into a specific texture/blob with
+/// the corresponding UV rectangle.
+#[derive(Deserialize, Serialize)]
+pub struct PlainExternalImage {
+    /// Path to the RON file describing the texel data.
+    pub data: String,
+    /// Public ID of the external image.
+    pub id: ExternalImageId,
+    /// Channel index of an external image.
+    pub channel_index: u8,
+    /// UV sub-rectangle of the image.
+    pub uv: TexelRect,
+}
--- a/gfx/webrender/src/debug_render.rs
+++ b/gfx/webrender/src/debug_render.rs
@@ -1,16 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorU, DeviceIntRect, DeviceUintSize, ImageFormat};
+use api::{ColorU, DeviceIntRect, DeviceUintSize, ImageFormat, TextureTarget};
 use debug_font_data;
 use device::{Device, Program, Texture, TextureSlot, VertexDescriptor, VAO};
-use device::{TextureFilter, TextureTarget, VertexAttribute, VertexAttributeKind, VertexUsageHint};
+use device::{TextureFilter, VertexAttribute, VertexAttributeKind, VertexUsageHint};
 use euclid::{Point2D, Rect, Size2D, Transform3D};
 use internal_types::{ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use std::f32;
 
 #[derive(Debug, Copy, Clone)]
 enum DebugSampler {
     Font,
 }
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1,15 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
 use api::{ColorF, ImageDescriptor, ImageFormat};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceUintRect, DeviceUintSize};
+use api::TextureTarget;
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::{FastHashMap, RenderTargetInfo};
 use smallvec::SmallVec;
 use std::cell::RefCell;
 use std::fs::File;
 use std::io::Read;
 use std::marker::PhantomData;
@@ -21,17 +22,17 @@ use std::rc::Rc;
 use std::thread;
 
 
 #[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)]
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct FrameId(usize);
 
 impl FrameId {
-    pub fn new(value: usize) -> FrameId {
+    pub fn new(value: usize) -> Self {
         FrameId(value)
     }
 }
 
 impl Add<usize> for FrameId {
     type Output = FrameId;
 
     fn add(self, other: usize) -> FrameId {
@@ -58,35 +59,16 @@ const DEFAULT_TEXTURE: TextureSlot = Tex
 
 #[repr(u32)]
 pub enum DepthFunction {
     Less = gl::LESS,
     LessEqual = gl::LEQUAL,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
-pub enum TextureTarget {
-    Default,
-    Array,
-    Rect,
-    External,
-}
-
-impl TextureTarget {
-    pub fn to_gl_target(&self) -> gl::GLuint {
-        match *self {
-            TextureTarget::Default => gl::TEXTURE_2D,
-            TextureTarget::Array => gl::TEXTURE_2D_ARRAY,
-            TextureTarget::Rect => gl::TEXTURE_RECTANGLE,
-            TextureTarget::External => gl::TEXTURE_EXTERNAL_OES,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub enum TextureFilter {
     Nearest,
     Linear,
 }
 
 #[derive(Debug)]
 pub enum VertexAttributeKind {
@@ -125,16 +107,25 @@ pub enum UploadMethod {
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum ReadPixelsFormat {
     Standard(ImageFormat),
     Rgba8,
 }
 
+pub fn get_gl_target(target: TextureTarget) -> gl::GLuint {
+    match target {
+        TextureTarget::Default => gl::TEXTURE_2D,
+        TextureTarget::Array => gl::TEXTURE_2D_ARRAY,
+        TextureTarget::Rect => gl::TEXTURE_RECTANGLE,
+        TextureTarget::External => gl::TEXTURE_EXTERNAL_OES,
+    }
+}
+
 pub fn get_gl_format_bgra(gl: &gl::Gl) -> gl::GLuint {
     match gl.get_type() {
         gl::GlType::Gl => GL_FORMAT_BGRA_GL,
         gl::GlType::Gles => GL_FORMAT_BGRA_GLES,
     }
 }
 
 fn get_shader_version(gl: &gl::Gl) -> &'static str {
@@ -418,19 +409,24 @@ pub struct ExternalTexture {
     id: gl::GLuint,
     target: gl::GLuint,
 }
 
 impl ExternalTexture {
     pub fn new(id: u32, target: TextureTarget) -> Self {
         ExternalTexture {
             id,
-            target: target.to_gl_target(),
+            target: get_gl_target(target),
         }
     }
+
+    #[cfg(feature = "capture")]
+    pub fn internal_id(&self) -> gl::GLuint {
+        self.id
+    }
 }
 
 pub struct Texture {
     id: gl::GLuint,
     target: gl::GLuint,
     layer_count: i32,
     format: ImageFormat,
     width: u32,
@@ -925,17 +921,17 @@ impl Device {
         }
     }
 
     pub fn create_texture(
         &mut self, target: TextureTarget, format: ImageFormat,
     ) -> Texture {
         Texture {
             id: self.gl.gen_textures(1)[0],
-            target: target.to_gl_target(),
+            target: get_gl_target(target),
             width: 0,
             height: 0,
             layer_count: 0,
             format,
             filter: TextureFilter::Nearest,
             render_target: None,
             fbo_ids: vec![],
             depth_rb: None,
@@ -1506,17 +1502,17 @@ impl Device {
             0, 0,
             img_desc.width as i32,
             img_desc.height as i32,
             desc.external,
             desc.pixel_type,
         )
     }
 
-    /// Read rectangle of RGBA8 or BGRA8 pixels into the specified output slice.
+    /// Read rectangle of pixels into the specified output slice.
     pub fn read_pixels_into(
         &mut self,
         rect: DeviceUintRect,
         format: ReadPixelsFormat,
         output: &mut [u8],
     ) {
         let (bytes_per_pixel, desc) = match format {
             ReadPixelsFormat::Standard(imf) => {
@@ -1540,16 +1536,34 @@ impl Device {
             rect.size.width as _,
             rect.size.height as _,
             desc.external,
             desc.pixel_type,
             output,
         );
     }
 
+    /// Get texels of a texture into the specified output slice.
+    pub fn get_tex_image_into(
+        &mut self,
+        texture: &Texture,
+        format: ImageFormat,
+        output: &mut [u8],
+    ) {
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+        let desc = gl_describe_format(self.gl(), format);
+        self.gl.get_tex_image_into_buffer(
+            texture.target,
+            0,
+            desc.external,
+            desc.pixel_type,
+            output,
+        );
+    }
+
     /// Attaches the provided texture to the current Read FBO binding.
     fn attach_read_texture_raw(
         &mut self, texture_id: gl::GLuint, target: gl::GLuint, layer_id: i32
     ) {
         match target {
             gl::TEXTURE_2D_ARRAY => {
                 self.gl.framebuffer_texture_layer(
                     gl::READ_FRAMEBUFFER,
@@ -1570,17 +1584,17 @@ impl Device {
                 )
             }
         }
     }
 
     pub fn attach_read_texture_external(
         &mut self, texture_id: gl::GLuint, target: TextureTarget, layer_id: i32
     ) {
-        self.attach_read_texture_raw(texture_id, target.to_gl_target(), layer_id)
+        self.attach_read_texture_raw(texture_id, get_gl_target(target), layer_id)
     }
 
     pub fn attach_read_texture(&mut self, texture: &Texture, layer_id: i32) {
         self.attach_read_texture_raw(texture.id, texture.target, layer_id)
     }
 
     fn bind_vao_impl(&mut self, id: gl::GLuint) {
         debug_assert!(self.inside_frame);
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -414,18 +414,18 @@ impl<'a> FlattenContext<'a> {
                             tiling.image_size,
                             tiling.tile_size as u32,
                         );
                     }
                     None => {
                         self.builder.add_image(
                             clip_and_scroll,
                             &prim_info,
-                            &info.stretch_size,
-                            &info.tile_spacing,
+                            info.stretch_size,
+                            info.tile_spacing,
                             None,
                             info.image_key,
                             info.image_rendering,
                             info.alpha_type,
                             None,
                         );
                     }
                 }
@@ -929,18 +929,18 @@ impl<'a> FlattenContext<'a> {
 
         // Fix up the primitive's rect if it overflows the original item rect.
         if let Some(prim_rect) = prim_rect.intersection(&prim_info.rect) {
             let mut prim_info = prim_info.clone();
             prim_info.rect = prim_rect;
             self.builder.add_image(
                 clip_and_scroll,
                 &prim_info,
-                &stretched_size,
-                &info.tile_spacing,
+                stretched_size,
+                info.tile_spacing,
                 None,
                 info.image_key,
                 info.image_rendering,
                 info.alpha_type,
                 Some(tile_offset),
             );
         }
     }
@@ -1096,16 +1096,18 @@ impl FrameContext {
         self.pipeline_epoch_map.insert(pipeline_id, epoch);
     }
 
     pub fn make_rendered_document(&self, frame: Frame) -> RenderedDocument {
         let nodes_bouncing_back = self.clip_scroll_tree.collect_nodes_bouncing_back();
         RenderedDocument::new(self.pipeline_epoch_map.clone(), nodes_bouncing_back, frame)
     }
 
+    //TODO: this can probably be simplified if `build()` is called directly by RB.
+    // The only things it needs from the frame context is the CST and frame ID.
     pub fn build_rendered_document(
         &mut self,
         frame_builder: &mut FrameBuilder,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         device_pixel_scale: DevicePixelScale,
         layer: DocumentLayer,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,35 +1,35 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderDetails, BorderDisplayItem, BuiltDisplayList, ClipAndScrollInfo, ClipId};
 use api::{ColorF, ColorU, DeviceIntPoint, DevicePixelScale, DeviceUintPoint, DeviceUintRect};
 use api::{DeviceUintSize, DocumentLayer, ExtendMode, FontRenderMode, GlyphInstance, GlyphOptions};
 use api::{GradientStop, HitTestFlags, HitTestItem, HitTestResult, ImageKey, ImageRendering};
-use api::{ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect, LayerSize};
+use api::{Epoch, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect, LayerSize};
 use api::{LayerTransform, LayerVector2D, LayoutTransform, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, PipelineId, PremultipliedColorF, PropertyBinding, RepeatMode};
-use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle, WorldPoint, YuvColorSpace};
-use api::YuvData;
+use api::{ScrollSensitivity, Shadow, TexelRect, TileOffset, TransformStyle, WorldPoint};
+use api::{DeviceIntRect, DeviceIntSize, YuvColorSpace, YuvData};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
 use euclid::{SideOffsets2D, vec2};
 use frame::FrameId;
 use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCache;
 use gpu_types::{ClipScrollNodeData, PictureType};
 use internal_types::{FastHashMap, FastHashSet, RenderPassIndex};
 use picture::{ContentOrigin, PictureCompositeMode, PictureKind, PicturePrimitive, PictureSurface};
-use prim_store::{BrushKind, BrushPrimitive, TexelRect, YuvImagePrimitiveCpu};
-use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, PrimitiveKind};
+use prim_store::{BrushKind, BrushPrimitive, ImageCacheKey, YuvImagePrimitiveCpu};
+use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, ImageSource, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex, SpecificPrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{BrushSegmentDescriptor, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{ClearMode, ClipChain, RenderTask, RenderTaskId, RenderTaskTree};
 use resource_cache::ResourceCache;
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, usize, f32};
@@ -1089,18 +1089,18 @@ impl FrameBuilder {
                 );
 
                 for segment in segments {
                     let mut info = info.clone();
                     info.rect = segment.geom_rect;
                     self.add_image(
                         clip_and_scroll,
                         &info,
-                        &segment.stretch_size,
-                        &segment.tile_spacing,
+                        segment.stretch_size,
+                        segment.tile_spacing,
                         Some(segment.sub_rect),
                         border.image_key,
                         ImageRendering::Auto,
                         AlphaType::PremultipliedAlpha,
                         None,
                     );
                 }
             }
@@ -1417,51 +1417,55 @@ impl FrameBuilder {
             }
         }
     }
 
     pub fn add_image(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
-        stretch_size: &LayerSize,
-        tile_spacing: &LayerSize,
+        stretch_size: LayerSize,
+        mut tile_spacing: LayerSize,
         sub_rect: Option<TexelRect>,
         image_key: ImageKey,
         image_rendering: ImageRendering,
         alpha_type: AlphaType,
-        tile: Option<TileOffset>,
+        tile_offset: Option<TileOffset>,
     ) {
-        let sub_rect_block = sub_rect.unwrap_or(TexelRect::invalid()).into();
-
         // If the tile spacing is the same as the rect size,
         // then it is effectively zero. We use this later on
         // in prim_store to detect if an image can be considered
         // opaque.
-        let tile_spacing = if *tile_spacing == info.rect.size {
-            LayerSize::zero()
-        } else {
-            *tile_spacing
-        };
+        if tile_spacing == info.rect.size {
+            tile_spacing = LayerSize::zero();
+        }
 
         let prim_cpu = ImagePrimitiveCpu {
-            image_key,
-            image_rendering,
-            tile_offset: tile,
             tile_spacing,
             alpha_type,
-            gpu_blocks: [
-                [
-                    stretch_size.width,
-                    stretch_size.height,
-                    tile_spacing.width,
-                    tile_spacing.height,
-                ].into(),
-                sub_rect_block,
-            ],
+            stretch_size,
+            current_epoch: Epoch::invalid(),
+            source: ImageSource::Default,
+            key: ImageCacheKey {
+                image_key,
+                image_rendering,
+                tile_offset,
+                texel_rect: sub_rect.map(|texel_rect| {
+                    DeviceIntRect::new(
+                        DeviceIntPoint::new(
+                            texel_rect.uv0.x as i32,
+                            texel_rect.uv0.y as i32,
+                        ),
+                        DeviceIntSize::new(
+                            (texel_rect.uv1.x - texel_rect.uv0.x) as i32,
+                            (texel_rect.uv1.y - texel_rect.uv0.y) as i32,
+                        ),
+                    )
+                }),
+            },
         };
 
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Image(prim_cpu),
         );
@@ -1472,19 +1476,19 @@ impl FrameBuilder {
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         yuv_data: YuvData,
         color_space: YuvColorSpace,
         image_rendering: ImageRendering,
     ) {
         let format = yuv_data.get_format();
         let yuv_key = match yuv_data {
-            YuvData::NV12(plane_0, plane_1) => [plane_0, plane_1, ImageKey::dummy()],
+            YuvData::NV12(plane_0, plane_1) => [plane_0, plane_1, ImageKey::DUMMY],
             YuvData::PlanarYCbCr(plane_0, plane_1, plane_2) => [plane_0, plane_1, plane_2],
-            YuvData::InterleavedYCbCr(plane_0) => [plane_0, ImageKey::dummy(), ImageKey::dummy()],
+            YuvData::InterleavedYCbCr(plane_0) => [plane_0, ImageKey::DUMMY, ImageKey::DUMMY],
         };
 
         let prim_cpu = YuvImagePrimitiveCpu {
             yuv_key,
             format,
             color_space,
             image_rendering,
             gpu_block: [info.rect.size.width, info.rect.size.height, 0.0, 0.0].into(),
--- a/gfx/webrender/src/gpu_cache.rs
+++ b/gfx/webrender/src/gpu_cache.rs
@@ -19,19 +19,19 @@
 //! data is not in the cache, the user provided closure
 //! will be invoked to build the data.
 //!
 //! After ```end_frame``` has occurred, callers can
 //! use the ```get_address``` API to get the allocated
 //! address in the GPU cache of a given resource slot
 //! for this frame.
 
-use api::{LayerRect, PremultipliedColorF};
+use api::{PremultipliedColorF, TexelRect};
 use device::FrameId;
-use internal_types::UvRect;
+use euclid::TypedRect;
 use profiler::GpuCacheProfileCounters;
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use std::{mem, u16, u32};
 use std::ops::Add;
 
 
 pub const GPU_CACHE_INITIAL_HEIGHT: u32 = 512;
 const FRAMES_BEFORE_EVICTION: usize = 10;
@@ -53,61 +53,60 @@ struct CacheLocation {
     block_index: BlockIndex,
     epoch: Epoch,
 }
 
 /// A single texel in RGBAF32 texture - 16 bytes.
 #[derive(Copy, Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct GpuBlockData {
-    pub data: [f32; 4],
+    data: [f32; 4],
 }
 
 impl GpuBlockData {
-    pub fn empty() -> Self {
-        GpuBlockData { data: [0.0; 4] }
-    }
+    pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] };
 }
 
 /// Conversion helpers for GpuBlockData
-impl Into<GpuBlockData> for PremultipliedColorF {
-    fn into(self) -> GpuBlockData {
+impl From<PremultipliedColorF> for GpuBlockData {
+    fn from(c: PremultipliedColorF) -> Self {
         GpuBlockData {
-            data: [self.r, self.g, self.b, self.a],
+            data: [c.r, c.g, c.b, c.a],
         }
     }
 }
 
-impl Into<GpuBlockData> for [f32; 4] {
-    fn into(self) -> GpuBlockData {
-        GpuBlockData { data: self }
+impl From<[f32; 4]> for GpuBlockData {
+    fn from(data: [f32; 4]) -> Self {
+        GpuBlockData { data }
     }
 }
 
-impl Into<GpuBlockData> for LayerRect {
-    fn into(self) -> GpuBlockData {
+impl<P> From<TypedRect<f32, P>> for GpuBlockData {
+    fn from(r: TypedRect<f32, P>) -> Self {
         GpuBlockData {
             data: [
-                self.origin.x,
-                self.origin.y,
-                self.size.width,
-                self.size.height,
+                r.origin.x,
+                r.origin.y,
+                r.size.width,
+                r.size.height,
             ],
         }
     }
 }
 
-impl Into<GpuBlockData> for UvRect {
-    fn into(self) -> GpuBlockData {
+impl From<TexelRect> for GpuBlockData {
+    fn from(tr: TexelRect) -> Self {
         GpuBlockData {
-            data: [self.uv0.x, self.uv0.y, self.uv1.x, self.uv1.y],
+            data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y],
         }
     }
 }
 
+
 // Any data type that can be stored in the GPU cache should
 // implement this trait.
 pub trait ToGpuBlocks {
     // Request an arbitrary number of GPU data blocks.
     fn write_gpu_blocks(&self, GpuDataRequest);
 }
 
 // A handle to a GPU resource.
@@ -217,16 +216,17 @@ impl Row {
 pub enum GpuCacheUpdate {
     Copy {
         block_index: usize,
         block_count: usize,
         address: GpuCacheAddress,
     },
 }
 
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct GpuCacheUpdateList {
     // The current height of the texture. The render thread
     // should resize the texture if required.
     pub height: u32,
     // List of updates to apply.
     pub updates: Vec<GpuCacheUpdate>,
     // A flat list of GPU blocks that are pending upload
     // to GPU memory.
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -1,28 +1,28 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipId, DevicePoint, DeviceUintRect, DocumentId, Epoch};
+use api::{ClipId, DeviceUintRect, DocumentId, Epoch};
 use api::{ExternalImageData, ExternalImageId};
 use api::{ImageFormat, PipelineId};
 use api::DebugCommand;
 use device::TextureFilter;
 use fxhash::FxHasher;
 use profiler::BackendProfileCounters;
 use std::{usize, i32};
 use std::collections::{HashMap, HashSet};
 use std::f32;
 use std::hash::BuildHasherDefault;
 use std::path::PathBuf;
 use std::sync::Arc;
 
 #[cfg(feature = "capture")]
-use capture::{CaptureConfig, ExternalCaptureImage};
+use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
 use tiling;
 
 pub type FastHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
 pub type FastHashSet<K> = HashSet<K, BuildHasherDefault<FxHasher>>;
 
 // An ID for a texture that is owned by the
 // texture cache module. This can include atlases
 // or standalone textures allocated via the
@@ -149,17 +149,17 @@ impl RenderedDocument {
 }
 
 pub enum DebugOutput {
     FetchDocuments(String),
     FetchClipScrollTree(String),
     #[cfg(feature = "capture")]
     SaveCapture(CaptureConfig, Vec<ExternalCaptureImage>),
     #[cfg(feature = "capture")]
-    LoadCapture(PathBuf),
+    LoadCapture(PathBuf, Vec<PlainExternalImage>),
 }
 
 pub enum ResultMsg {
     DebugCommand(DebugCommand),
     DebugOutput(DebugOutput),
     RefreshShader(PathBuf),
     PublishDocument(
         DocumentId,
@@ -167,14 +167,8 @@ pub enum ResultMsg {
         TextureUpdateList,
         BackendProfileCounters,
     ),
     UpdateResources {
         updates: TextureUpdateList,
         cancel_rendering: bool,
     },
 }
-
-#[derive(Clone, Copy, Debug)]
-pub struct UvRect {
-    pub uv0: DevicePoint,
-    pub uv1: DevicePoint,
-}
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -153,16 +153,18 @@ extern crate serde_json;
 extern crate smallvec;
 extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 #[cfg(feature = "debugger")]
 extern crate image;
 #[cfg(feature = "debugger")]
 extern crate base64;
+#[cfg(all(feature = "capture", feature = "png"))]
+extern crate png;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
 pub use device::{build_shader_strings, ProgramCache, ReadPixelsFormat, UploadMethod, VertexUsageHint};
 pub use renderer::{CpuProfile, DebugFlags, GpuProfile, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, Renderer, RendererOptions};
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -580,17 +580,17 @@ impl PicturePrimitive {
                         parent_tasks.push(root_task_id);
 
                         // TODO(gw): Remove the nastiness with having to pass
                         //           the scale factor through the texture cache
                         //           item user data. This will disappear once
                         //           the brush_picture shader is updated to draw
                         //           segments, since the scale factor will not
                         //           be used at all then during drawing.
-                        (root_task_id, [scale_factor, 0.0, 0.0])
+                        (root_task_id, [scale_factor, 0.0, 0.0], false)
                     }
                 );
 
                 self.surface = Some(PictureSurface::TextureCache(cache_item));
             }
         }
     }
 
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,35 +1,35 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderRadius, BuiltDisplayList, ClipAndScrollInfo, ClipId, ClipMode};
-use api::{ColorF, ColorU, DeviceIntRect, DevicePixelScale, DevicePoint};
+use api::{ColorF, ColorU, DeviceIntRect, DeviceIntSize, DevicePixelScale, Epoch};
 use api::{ComplexClipRegion, ExtendMode, FontRenderMode};
 use api::{GlyphInstance, GlyphKey, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag};
 use api::{LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, LayerVector2D, LineOrientation};
-use api::{LineStyle, PipelineId, PremultipliedColorF, TileOffset, WorldToLayerTransform};
-use api::{YuvColorSpace, YuvFormat};
+use api::{LineStyle, PipelineId, PremultipliedColorF, TileOffset};
+use api::{WorldToLayerTransform, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderEdgeKind};
 use clip_scroll_tree::{CoordinateSystemId, ClipScrollTree};
 use clip_scroll_node::ClipScrollNode;
 use clip::{ClipSource, ClipSourcesHandle, ClipStore};
 use frame_builder::PrimitiveContext;
 use glyph_rasterizer::{FontInstance, FontTransform};
 use internal_types::{FastHashMap};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
 use gpu_types::{ClipChainRectIndex, ClipScrollNodeData};
 use picture::{PictureKind, PicturePrimitive};
 use profiler::FrameProfileCounters;
-use render_task::{ClipChain, ClipChainNode, ClipChainNodeIter, ClipChainNodeRef, ClipWorkItem};
-use render_task::{RenderTask, RenderTaskId, RenderTaskTree};
-use renderer::{BLOCKS_PER_UV_RECT, MAX_VERTEX_TEXTURE_WIDTH};
-use resource_cache::{ImageProperties, ResourceCache};
+use render_task::{BlitSource, ClipChain, ClipChainNode, ClipChainNodeIter, ClipChainNodeRef, ClipWorkItem};
+use render_task::{RenderTask, RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskId, RenderTaskTree};
+use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
+use resource_cache::{CacheItem, ImageProperties, ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
 use segment::SegmentBuilder;
 use std::{mem, usize};
 use std::rc::Rc;
 use util::{MatrixHelpers, calculate_screen_bounding_rect, pack_as_float};
 use util::recycle_vec;
 
 
@@ -83,59 +83,25 @@ impl PrimitiveOpacity {
 //          polygons directly and store internally
 //          in the picture structure.
 #[derive(Debug)]
 pub struct PrimitiveRunLocalRect {
     pub local_rect_in_actual_parent_space: LayerRect,
     pub local_rect_in_original_parent_space: LayerRect,
 }
 
-/// Stores two coordinates in texel space. The coordinates
-/// are stored in texel coordinates because the texture atlas
-/// may grow. Storing them as texel coords and normalizing
-/// the UVs in the vertex shader means nothing needs to be
-/// updated on the CPU when the texture size changes.
-#[derive(Copy, Clone, Debug)]
-pub struct TexelRect {
-    pub uv0: DevicePoint,
-    pub uv1: DevicePoint,
-}
-
-impl TexelRect {
-    pub fn new(u0: f32, v0: f32, u1: f32, v1: f32) -> TexelRect {
-        TexelRect {
-            uv0: DevicePoint::new(u0, v0),
-            uv1: DevicePoint::new(u1, v1),
-        }
-    }
-
-    pub fn invalid() -> TexelRect {
-        TexelRect {
-            uv0: DevicePoint::new(-1.0, -1.0),
-            uv1: DevicePoint::new(-1.0, -1.0),
-        }
-    }
-}
-
-impl Into<GpuBlockData> for TexelRect {
-    fn into(self) -> GpuBlockData {
-        GpuBlockData {
-            data: [self.uv0.x, self.uv0.y, self.uv1.x, self.uv1.y],
-        }
-    }
-}
-
 /// For external images, it's not possible to know the
 /// UV coords of the image (or the image data itself)
 /// until the render thread receives the frame and issues
 /// callbacks to the client application. For external
 /// images that are visible, a DeferredResolve is created
 /// that is stored in the frame. This allows the render
 /// thread to iterate this list and update any changed
 /// texture data and update the UV rect.
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct DeferredResolve {
     pub address: GpuCacheAddress,
     pub image_properties: ImageProperties,
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct SpecificPrimitiveIndex(pub usize);
 
@@ -349,30 +315,59 @@ impl BrushPrimitive {
                     pack_as_float(orientation as u32),
                     0.0,
                 ]);
             }
         }
     }
 }
 
-#[derive(Debug)]
-pub struct ImagePrimitiveCpu {
+// Key that identifies a unique (partial) image that is being
+// stored in the render task cache.
+#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
+pub struct ImageCacheKey {
+    // TODO(gw): Consider introducing a struct that collectively
+    //           identifies an image in the resource cache
+    //           uniquely. We pass this around to a few places.
     pub image_key: ImageKey,
     pub image_rendering: ImageRendering,
     pub tile_offset: Option<TileOffset>,
+    pub texel_rect: Option<DeviceIntRect>,
+}
+
+// Where to find the texture data for an image primitive.
+#[derive(Debug)]
+pub enum ImageSource {
+    // A normal image - just reference the texture cache.
+    Default,
+    // An image that is pre-rendered into the texture cache
+    // via a render task.
+    Cache {
+        size: DeviceIntSize,
+        item: CacheItem,
+    },
+}
+
+#[derive(Debug)]
+pub struct ImagePrimitiveCpu {
     pub tile_spacing: LayerSize,
     pub alpha_type: AlphaType,
-    // TODO(gw): Build on demand
-    pub gpu_blocks: [GpuBlockData; BLOCKS_PER_UV_RECT],
+    pub stretch_size: LayerSize,
+    pub current_epoch: Epoch,
+    pub source: ImageSource,
+    pub key: ImageCacheKey,
 }
 
 impl ToGpuBlocks for ImagePrimitiveCpu {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        request.extend_from_slice(&self.gpu_blocks);
+        request.push([
+            self.stretch_size.width, self.stretch_size.height,
+            self.tile_spacing.width, self.tile_spacing.height,
+        ]);
     }
 }
 
 #[derive(Debug)]
 pub struct YuvImagePrimitiveCpu {
     pub yuv_key: [ImageKey; 3],
     pub format: YuvFormat,
     pub color_space: YuvColorSpace,
@@ -703,50 +698,48 @@ impl TextRunPrimitiveCpu {
         //           directly from the display list.
         if self.glyph_keys.is_empty() {
             let subpx_dir = font.subpx_dir.limit_by(font.render_mode);
             let src_glyphs = display_list.get(self.glyph_range);
 
             // TODO(gw): If we support chunks() on AuxIter
             //           in the future, this code below could
             //           be much simpler...
-            let mut gpu_block = GpuBlockData::empty();
+            let mut gpu_block = [0.0; 4];
             for (i, src) in src_glyphs.enumerate() {
                 let key = GlyphKey::new(src.index, src.point, font.render_mode, subpx_dir);
                 self.glyph_keys.push(key);
 
                 // Two glyphs are packed per GPU block.
 
                 if (i & 1) == 0 {
-                    gpu_block.data[0] = src.point.x;
-                    gpu_block.data[1] = src.point.y;
+                    gpu_block[0] = src.point.x;
+                    gpu_block[1] = src.point.y;
                 } else {
-                    gpu_block.data[2] = src.point.x;
-                    gpu_block.data[3] = src.point.y;
-                    self.glyph_gpu_blocks.push(gpu_block);
+                    gpu_block[2] = src.point.x;
+                    gpu_block[3] = src.point.y;
+                    self.glyph_gpu_blocks.push(gpu_block.into());
                 }
             }
 
             // Ensure the last block is added in the case
             // of an odd number of glyphs.
             if (self.glyph_keys.len() & 1) != 0 {
-                self.glyph_gpu_blocks.push(gpu_block);
+                self.glyph_gpu_blocks.push(gpu_block.into());
             }
         }
 
         resource_cache.request_glyphs(font, &self.glyph_keys, gpu_cache);
     }
 
     fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
         request.push(self.get_color().premultiplied());
         // this is the only case where we need to provide plain color to GPU
         let bg_color = ColorF::from(self.font.bg_color);
-        request.extend_from_slice(&[
-            GpuBlockData { data: [bg_color.r, bg_color.g, bg_color.b, 1.0] }
-        ]);
+        request.push([bg_color.r, bg_color.g, bg_color.b, 1.0]);
         request.push([
             self.offset.x,
             self.offset.y,
             0.0,
             0.0,
         ]);
         request.extend_from_slice(&self.glyph_gpu_blocks);
 
@@ -1192,34 +1185,107 @@ impl PrimitiveStore {
                     prim_context.device_pixel_scale,
                     transform,
                     prim_context.display_list,
                     gpu_cache,
                 );
             }
             PrimitiveKind::Image => {
                 let image_cpu = &mut self.cpu_images[metadata.cpu_prim_index.0];
+                let image_properties = resource_cache.get_image_properties(image_cpu.key.image_key);
 
-                resource_cache.request_image(
-                    image_cpu.image_key,
-                    image_cpu.image_rendering,
-                    image_cpu.tile_offset,
-                    gpu_cache,
-                );
+                // TODO(gw): Add image.rs and move this code out to a separate
+                //           source file as it gets more complicated, and we
+                //           start pre-rendering images for other reasons.
+
+                if let Some(image_properties) = image_properties {
+                    // See if this image has been updated since we last hit this code path.
+                    // If so, we need to (at least) update the opacity, and also rebuild
+                    // and render task cached portions of this image.
+                    if image_properties.epoch != image_cpu.current_epoch {
+                        image_cpu.current_epoch = image_properties.epoch;
+
+                        // Update the opacity.
+                        metadata.opacity.is_opaque = image_properties.descriptor.is_opaque &&
+                            image_cpu.tile_spacing.width == 0.0 &&
+                            image_cpu.tile_spacing.height == 0.0;
+
+                        // Work out whether this image is a normal / simple type, or if
+                        // we need to pre-render it to the render task cache.
+                        image_cpu.source = match image_cpu.key.texel_rect {
+                            Some(texel_rect) => {
+                                ImageSource::Cache {
+                                    // Size in device-pixels we need to allocate in render task cache.
+                                    size: texel_rect.size,
+                                    item: CacheItem::invalid(),
+                                }
+                            }
+                            None => {
+                                // Simple image - just use a normal texture cache entry.
+                                ImageSource::Default
+                            }
+                        };
+                    }
+
+                    // TODO(gw): Don't actually need this in cached source mode if
+                    //           the cache item is still valid...
+                    resource_cache.request_image(
+                        image_cpu.key.image_key,
+                        image_cpu.key.image_rendering,
+                        image_cpu.key.tile_offset,
+                        gpu_cache,
+                    );
 
-                // TODO(gw): This doesn't actually need to be calculated each frame.
-                // It's cheap enough that it's not worth introducing a cache for images
-                // right now, but if we introduce a cache for images for some other
-                // reason then we might as well cache this with it.
-                if let Some(image_properties) =
-                    resource_cache.get_image_properties(image_cpu.image_key)
-                {
-                    metadata.opacity.is_opaque = image_properties.descriptor.is_opaque &&
-                        image_cpu.tile_spacing.width == 0.0 &&
-                        image_cpu.tile_spacing.height == 0.0;
+                    // Every frame, for cached items, we need to request the render
+                    // task cache item. The closure will be invoked on the first
+                    // time through, and any time the render task output has been
+                    // evicted from the texture cache.
+                    if let ImageSource::Cache { size, ref mut item } = image_cpu.source {
+                        let key = image_cpu.key;
+
+                        // Request a pre-rendered image task.
+                        *item = resource_cache.request_render_task(
+                            RenderTaskCacheKey {
+                                size,
+                                kind: RenderTaskCacheKeyKind::Image(key),
+                            },
+                            gpu_cache,
+                            render_tasks,
+                            |render_tasks| {
+                                // Create a task to blit from the texture cache to
+                                // a normal transient render task surface. This will
+                                // copy only the sub-rect, if specified.
+                                let cache_to_target_task = RenderTask::new_blit(
+                                    size,
+                                    BlitSource::Image {
+                                        key,
+                                    },
+                                );
+                                let cache_to_target_task_id = render_tasks.add(cache_to_target_task);
+
+                                // Create a task to blit the rect from the child render
+                                // task above back into the right spot in the persistent
+                                // render target cache.
+                                let target_to_cache_task = RenderTask::new_blit(
+                                    size,
+                                    BlitSource::RenderTask {
+                                        task_id: cache_to_target_task_id,
+                                    },
+                                );
+                                let target_to_cache_task_id = render_tasks.add(target_to_cache_task);
+
+                                // Hook this into the render task tree at the right spot.
+                                parent_tasks.push(target_to_cache_task_id);
+
+                                // Pass the image opacity, so that the cached render task
+                                // item inherits the same opacity properties.
+                                (target_to_cache_task_id, [0.0; 3], image_properties.descriptor.is_opaque)
+                            }
+                        );
+                    }
                 }
             }
             PrimitiveKind::YuvImage => {
                 let image_cpu = &mut self.cpu_yuv_images[metadata.cpu_prim_index.0];
 
                 let channel_num = image_cpu.format.get_plane_num();
                 debug_assert!(channel_num <= 3);
                 for channel in 0 .. channel_num {
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -72,16 +72,21 @@ struct Document {
     // made available as output textures.
     output_pipelines: FastHashSet<PipelineId>,
     // A helper switch to prevent any frames rendering triggered by scrolling
     // messages between `SetDisplayList` and `GenerateFrame`.
     // If we allow them, then a reftest that scrolls a few layers before generating
     // the first frame would produce inconsistent rendering results, because
     // scroll events are not necessarily received in deterministic order.
     render_on_scroll: Option<bool>,
+    // A helper flag to prevent any hit-tests from happening between calls
+    // to build_scene and rendering the document. In between these two calls,
+    // hit-tests produce inconsistent results because the clip_scroll_tree
+    // is out of sync with the display list.
+    render_on_hittest: bool,
 }
 
 impl Document {
     pub fn new(
         config: FrameBuilderConfig,
         window_size: DeviceUintSize,
         layer: DocumentLayer,
         enable_render_on_scroll: bool,
@@ -100,18 +105,19 @@ impl Document {
                 layer,
                 pan: DeviceIntPoint::zero(),
                 page_zoom_factor: 1.0,
                 pinch_zoom_factor: 1.0,
                 device_pixel_ratio: default_device_pixel_ratio,
             },
             frame_ctx: FrameContext::new(config),
             frame_builder: Some(FrameBuilder::empty()),
+            output_pipelines: FastHashSet::default(),
             render_on_scroll,
-            output_pipelines: FastHashSet::default(),
+            render_on_hittest: false,
         }
     }
 
     fn build_scene(&mut self, resource_cache: &mut ResourceCache) {
         // this code is why we have `Option`, which is never `None`
         let frame_builder = self.frame_ctx.create(
             self.frame_builder.take().unwrap(),
             &self.scene,
@@ -398,16 +404,25 @@ impl RenderBackend {
                 DocumentOps {
                     scroll: true,
                     build: false,
                     render: should_render,
                 }
             }
             DocumentMsg::HitTest(pipeline_id, point, flags, tx) => {
                 profile_scope!("HitTest");
+                if doc.render_on_hittest {
+                    doc.render(
+                        &mut self.resource_cache,
+                        &mut self.gpu_cache,
+                        &mut profile_counters.resources,
+                    );
+                    doc.render_on_hittest = false;
+                }
+
                 let cst = doc.frame_ctx.get_clip_scroll_tree();
                 let result = doc.frame_builder
                     .as_ref()
                     .unwrap()
                     .hit_test(cst, pipeline_id, point, flags);
                 tx.send(result).unwrap();
                 DocumentOps::nop()
             }
@@ -575,26 +590,24 @@ impl RenderBackend {
                             let config = CaptureConfig::new(root, bits);
                             let deferred = self.save_capture(&config, &mut profile_counters);
                             ResultMsg::DebugOutput(DebugOutput::SaveCapture(config, deferred))
                         },
                         #[cfg(feature = "capture")]
                         DebugCommand::LoadCapture(root, tx) => {
                             NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed);
                             frame_counter += 1;
-                            let msg = ResultMsg::DebugOutput(
-                                DebugOutput::LoadCapture(root.clone())
-                            );
-                            self.result_tx.send(msg).unwrap();
+
                             self.load_capture(&root, &mut profile_counters);
 
                             for (id, doc) in &self.documents {
                                 let captured = CapturedDocument {
                                     document_id: *id,
                                     root_pipeline_id: doc.scene.root_pipeline_id,
+                                    window_size: doc.view.window_size,
                                 };
                                 tx.send(captured).unwrap();
                             }
                             // Note: we can't pass `LoadCapture` here since it needs to arrive
                             // before the `PublishDocument` messages sent by `load_capture`.
                             continue
                         },
                         DebugCommand::EnableDualSourceBlending(enable) => {
@@ -653,16 +666,17 @@ impl RenderBackend {
             );
         }
 
         let doc = self.documents.get_mut(&document_id).unwrap();
 
         if op.build {
             profile_scope!("build scene");
             doc.build_scene(&mut self.resource_cache);
+            doc.render_on_hittest = true;
         }
 
         if op.render {
             profile_scope!("generate frame");
 
             *frame_counter += 1;
             let rendered_document = doc.render(
                 &mut self.resource_cache,
@@ -678,16 +692,17 @@ impl RenderBackend {
             let msg = ResultMsg::PublishDocument(
                 document_id,
                 rendered_document,
                 pending_update,
                 profile_counters.clone()
             );
             self.result_tx.send(msg).unwrap();
             profile_counters.reset();
+            doc.render_on_hittest = false;
         }
 
         if op.render || op.scroll {
             self.notifier.new_document_ready(document_id, op.scroll, op.render);
         }
     }
 
     #[cfg(not(feature = "debugger"))]
@@ -857,16 +872,25 @@ impl RenderBackend {
                 .map(|(id, doc)| (*id, doc.view.clone()))
                 .collect(),
             resources,
         };
 
         config.serialize(&backend, "backend");
 
         if config.bits.contains(CaptureBits::FRAME) {
+            // After we rendered the frames, there are pending updates.
+            // Instead of serializing them, we are going to make sure
+            // they are applied on the `Renderer` side.
+            let msg = ResultMsg::UpdateResources {
+                updates: self.resource_cache.pending_updates(),
+                cancel_rendering: false,
+            };
+            self.result_tx.send(msg).unwrap();
+            // Save the texture/glyph/image caches.
             info!("\tresource cache");
             let caches = self.resource_cache.save_caches(&config.root);
             config.serialize(&caches, "resource_cache");
             info!("\tgpu cache");
             config.serialize(&self.gpu_cache, "gpu_cache");
         }
 
         deferred
@@ -879,21 +903,25 @@ impl RenderBackend {
     ) {
         use tiling::Frame;
 
         info!("capture: loading {:?}", root);
         let backend = CaptureConfig::deserialize::<PlainRenderBackend, _>(root, "backend")
             .expect("Unable to open backend.ron");
         let caches_maybe = CaptureConfig::deserialize::<PlainCacheOwn, _>(root, "resource_cache");
 
-        // Note: it would be great to have RenderBackend to be split
+        // Note: it would be great to have `RenderBackend` to be split
         // rather explicitly on what's used before and after scene building
         // so that, for example, we never miss anything in the code below:
 
-        self.resource_cache.load_capture(backend.resources, caches_maybe,root);
+        let plain_externals = self.resource_cache.load_capture(backend.resources, caches_maybe, root);
+        let msg_load = ResultMsg::DebugOutput(
+            DebugOutput::LoadCapture(root.clone(), plain_externals)
+        );
+        self.result_tx.send(msg_load).unwrap();
 
         self.gpu_cache = match CaptureConfig::deserialize::<GpuCache, _>(root, "gpu_cache") {
             Some(gpu_cache) => gpu_cache,
             None => GpuCache::new(),
         };
 
         self.documents.clear();
         self.default_device_pixel_ratio = backend.default_device_pixel_ratio;
@@ -908,16 +936,17 @@ impl RenderBackend {
 
             let mut doc = Document {
                 scene,
                 view,
                 frame_ctx: FrameContext::new(self.frame_config.clone()),
                 frame_builder: Some(FrameBuilder::empty()),
                 output_pipelines: FastHashSet::default(),
                 render_on_scroll: None,
+                render_on_hittest: false,
             };
 
             let frame_name = format!("frame-{}-{}", (id.0).0, id.1);
             let render_doc = match CaptureConfig::deserialize::<Frame, _>(root, frame_name) {
                 Some(frame) => {
                     info!("\tloaded a built frame with {} passes", frame.passes.len());
                     doc.frame_ctx.make_rendered_document(frame)
                 }
@@ -926,22 +955,22 @@ impl RenderBackend {
                     doc.render(
                         &mut self.resource_cache,
                         &mut self.gpu_cache,
                         &mut profile_counters.resources,
                     )
                 }
             };
 
-            let msg = ResultMsg::PublishDocument(
+            let msg_publish = ResultMsg::PublishDocument(
                 id,
                 render_doc,
                 self.resource_cache.pending_updates(),
                 profile_counters.clone(),
             );
-            self.result_tx.send(msg).unwrap();
+            self.result_tx.send(msg_publish).unwrap();
             profile_counters.reset();
 
             self.notifier.new_document_ready(id, false, true);
             self.documents.insert(id, doc);
         }
     }
 }
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -7,17 +7,17 @@ use api::{ImageDescriptor, ImageFormat, 
 use box_shadow::BoxShadowCacheKey;
 use clip::{ClipSourcesWeakHandle};
 use clip_scroll_tree::CoordinateSystemId;
 use device::TextureFilter;
 use gpu_cache::GpuCache;
 use gpu_types::{ClipScrollNodeIndex, PictureType};
 use internal_types::{FastHashMap, RenderPassIndex, SourceTexture};
 use picture::ContentOrigin;
-use prim_store::{PrimitiveIndex};
+use prim_store::{PrimitiveIndex, ImageCacheKey};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use resource_cache::CacheItem;
 use std::{cmp, ops, usize, f32, i32};
 use std::rc::Rc;
 use texture_cache::{TextureCache, TextureCacheHandle};
 use tiling::{RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind};
@@ -258,31 +258,50 @@ impl BlurTask {
     #[cfg(feature = "debugger")]
     fn print_with<T: PrintTreePrinter>(&self, pt: &mut T) {
         pt.add_item(format!("std deviation: {}", self.blur_std_deviation));
         pt.add_item(format!("target: {:?}", self.target_kind));
         pt.add_item(format!("scale: {}", self.scale_factor));
     }
 }
 
+// Where the source data for a blit task can be found.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
+pub enum BlitSource {
+    Image {
+        key: ImageCacheKey,
+    },
+    RenderTask {
+        task_id: RenderTaskId,
+    },
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
+pub struct BlitTask {
+    pub source: BlitSource,
+}
+
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct RenderTaskData {
     pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub enum RenderTaskKind {
     Picture(PictureTask),
     CacheMask(CacheMaskTask),
     VerticalBlur(BlurTask),
     HorizontalBlur(BlurTask),
     Readback(DeviceIntRect),
     Scaling(RenderTargetKind),
+    Blit(BlitTask),
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub enum ClearMode {
     // Applicable to color and alpha targets.
     Zero,
     One,
@@ -337,16 +356,42 @@ impl RenderTask {
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, screen_rect.size),
             kind: RenderTaskKind::Readback(screen_rect),
             clear_mode: ClearMode::Transparent,
             pass_index: None,
         }
     }
 
+    pub fn new_blit(
+        size: DeviceIntSize,
+        source: BlitSource,
+    ) -> Self {
+        let mut children = Vec::new();
+
+        // If this blit uses a render task as a source,
+        // ensure it's added as a child task. This will
+        // ensure it gets allocated in the correct pass
+        // and made available as an input when this task
+        // executes.
+        if let BlitSource::RenderTask { task_id } = source {
+            children.push(task_id);
+        }
+
+        RenderTask {
+            children,
+            location: RenderTaskLocation::Dynamic(None, size),
+            kind: RenderTaskKind::Blit(BlitTask {
+                source,
+            }),
+            clear_mode: ClearMode::Transparent,
+            pass_index: None,
+        }
+    }
+
     pub fn new_mask(
         outer_rect: DeviceIntRect,
         clips: Vec<ClipWorkItem>,
         prim_coordinate_system_id: CoordinateSystemId,
     ) -> RenderTask {
         RenderTask {
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, outer_rect.size),
@@ -506,17 +551,18 @@ impl RenderTask {
                         task.blur_std_deviation,
                         task.scale_factor,
                         0.0,
                     ],
                     task.color.to_array()
                 )
             }
             RenderTaskKind::Readback(..) |
-            RenderTaskKind::Scaling(..) => {
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::Blit(..) => {
                 (
                     [0.0; 3],
                     [0.0; 4],
                 )
             }
         };
 
         let (target_rect, target_index) = self.get_target_rect();
@@ -593,32 +639,37 @@ impl RenderTask {
 
             RenderTaskKind::Scaling(target_kind) => {
                 target_kind
             }
 
             RenderTaskKind::Picture(ref task_info) => {
                 task_info.target_kind
             }
+
+            RenderTaskKind::Blit(..) => {
+                RenderTargetKind::Color
+            }
         }
     }
 
     // Check if this task wants to be made available as an input
     // to all passes (except the first) in the render task tree.
     // To qualify for this, the task needs to have no children / dependencies.
     // Currently, this is only supported for A8 targets, but it can be
     // trivially extended to also support RGBA8 targets in the future
     // if we decide that is useful.
     pub fn is_shared(&self) -> bool {
         match self.kind {
             RenderTaskKind::Picture(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Scaling(..) => false,
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::Blit(..) => false,
 
             RenderTaskKind::CacheMask(..) => true,
         }
     }
 
     #[cfg(feature = "debugger")]
     pub fn print_with<T: PrintTreePrinter>(&self, pt: &mut T, tree: &RenderTaskTree) -> bool {
         match self.kind {
@@ -641,16 +692,20 @@ impl RenderTask {
             RenderTaskKind::Readback(ref rect) => {
                 pt.new_level("Readback".to_owned());
                 pt.add_item(format!("rect: {:?}", rect));
             }
             RenderTaskKind::Scaling(ref kind) => {
                 pt.new_level("Scaling".to_owned());
                 pt.add_item(format!("kind: {:?}", kind));
             }
+            RenderTaskKind::Blit(ref task) => {
+                pt.new_level("Blit".to_owned());
+                pt.add_item(format!("source: {:?}", task.source));
+            }
         }
 
         pt.add_item(format!("clear to: {:?}", self.clear_mode));
 
         for &child_id in &self.children {
             if tree[child_id].print_with(pt, tree) {
                 pt.add_item(format!("self: {:?}", child_id))
             }
@@ -659,16 +714,17 @@ impl RenderTask {
         pt.end_level();
         true
     }
 }
 
 #[derive(Debug, Hash, PartialEq, Eq)]
 pub enum RenderTaskCacheKeyKind {
     BoxShadow(BoxShadowCacheKey),
+    Image(ImageCacheKey),
 }
 
 #[derive(Debug, Hash, PartialEq, Eq)]
 pub struct RenderTaskCacheKey {
     pub size: DeviceIntSize,
     pub kind: RenderTaskCacheKeyKind,
 }
 
@@ -717,49 +773,55 @@ impl RenderTaskCache {
 
     pub fn request_render_task<F>(
         &mut self,
         key: RenderTaskCacheKey,
         texture_cache: &mut TextureCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         mut f: F,
-    ) -> CacheItem where F: FnMut(&mut RenderTaskTree) -> (RenderTaskId, [f32; 3]) {
+    ) -> CacheItem where F: FnMut(&mut RenderTaskTree) -> (RenderTaskId, [f32; 3], bool) {
         // Get the texture cache handle for this cache key,
         // or create one.
         let cache_entry = self.entries
                               .entry(key)
                               .or_insert(RenderTaskCacheEntry {
                                   handle: TextureCacheHandle::new(),
                               });
 
         // Check if this texture cache handle is valie.
         if texture_cache.request(&mut cache_entry.handle, gpu_cache) {
             // Invoke user closure to get render task chain
             // to draw this into the texture cache.
-            let (render_task_id, user_data) = f(render_tasks);
+            let (render_task_id, user_data, is_opaque) = f(render_tasks);
             let render_task = &mut render_tasks[render_task_id];
 
+            // Select the right texture page to allocate from.
+            let image_format = match render_task.target_kind() {
+                RenderTargetKind::Color => ImageFormat::BGRA8,
+                RenderTargetKind::Alpha => ImageFormat::R8,
+            };
+
             // Find out what size to alloc in the texture cache.
             let size = match render_task.location {
                 RenderTaskLocation::Fixed |
                 RenderTaskLocation::TextureCache(..) => {
                     panic!("BUG: dynamic task was expected");
                 }
                 RenderTaskLocation::Dynamic(_, size) => size,
             };
 
             // TODO(gw): Support color tasks in the texture cache,
             //           and perhaps consider if we can determine
             //           if some tasks are opaque as an optimization.
             let descriptor = ImageDescriptor::new(
                 size.width as u32,
                 size.height as u32,
-                ImageFormat::R8,
-                false,
+                image_format,
+                is_opaque,
             );
 
             // Allocate space in the texture cache, but don't supply
             // and CPU-side data to be uploaded.
             texture_cache.update(
                 &mut cache_entry.handle,
                 descriptor,
                 TextureFilter::Linear,
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -6,35 +6,36 @@
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use api::{BlobImageRenderer, ColorF, ColorU, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, Epoch, ExternalImageId};
-use api::{ExternalImageType, FontRenderMode, ImageFormat, PipelineId, RenderApiSender};
-use api::{RenderNotifier, YUV_COLOR_SPACES, YUV_FORMATS, YuvColorSpace, YuvFormat, channel};
+use api::{ExternalImageType, FontRenderMode, ImageFormat, PipelineId};
+use api::{RenderApiSender, RenderNotifier, TexelRect, TextureTarget, YuvColorSpace, YuvFormat};
+use api::{YUV_COLOR_SPACES, YUV_FORMATS, channel};
 #[cfg(not(feature = "debugger"))]
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
 use api::channel::MsgSender;
 use batch::{BatchKey, BatchKind, BatchTextures, BrushBatchKind};
 use batch::{BrushImageSourceKind, TransformBatchKind};
 #[cfg(feature = "capture")]
-use capture::{CaptureConfig, ExternalCaptureImage};
+use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
 use debug_colors;
 use debug_render::DebugRenderer;
 #[cfg(feature = "debugger")]
 use debug_server::{self, DebugServer};
 use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture,
              VertexDescriptor, PBO};
 use device::{ExternalTexture, FBOId, TextureSlot, VertexAttribute, VertexAttributeKind};
-use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
+use device::{FileWatcherHandler, ShaderError, TextureFilter,
              VertexUsageHint, VAO, VBO, CustomVAO};
 use device::{ProgramCache, ReadPixelsFormat};
 use euclid::{rect, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
@@ -62,17 +63,17 @@ use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
-use tiling::{RenderPass, RenderPassKind, RenderTargetList};
+use tiling::{BlitJob, BlitJobSource, RenderPass, RenderPassKind, RenderTargetList};
 use tiling::{Frame, RenderTarget, ScalingInfo, TextureCacheRenderTarget};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 /// Enabling this toggle would force the GPU cache scattered texture to
 /// be resized every frame, which enables GPU debuggers to see if this
 /// is performed correctly.
@@ -160,16 +161,20 @@ const GPU_TAG_PRIM_BORDER_CORNER: GpuPro
 const GPU_TAG_PRIM_BORDER_EDGE: GpuProfileTag = GpuProfileTag {
     label: "BorderEdge",
     color: debug_colors::LAVENDER,
 };
 const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag {
     label: "Blur",
     color: debug_colors::VIOLET,
 };
+const GPU_TAG_BLIT: GpuProfileTag = GpuProfileTag {
+    label: "Blit",
+    color: debug_colors::LIME,
+};
 
 const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag {
     label: "Alpha Targets",
     color: debug_colors::BLACK,
 };
 const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag {
     label: "Opaque Pass",
     color: debug_colors::BLACK,
@@ -485,16 +490,28 @@ pub struct GraphicsApiInfo {
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub enum ImageBufferKind {
     Texture2D = 0,
     TextureRect = 1,
     TextureExternal = 2,
     Texture2DArray = 3,
 }
 
+//TODO: those types are the same, so let's merge them
+impl From<TextureTarget> for ImageBufferKind {
+    fn from(target: TextureTarget) -> Self {
+        match target {
+            TextureTarget::Default => ImageBufferKind::Texture2D,
+            TextureTarget::Rect => ImageBufferKind::TextureRect,
+            TextureTarget::Array => ImageBufferKind::Texture2DArray,
+            TextureTarget::External => ImageBufferKind::TextureExternal,
+        }
+    }
+}
+
 pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 4] = [
     ImageBufferKind::Texture2D,
     ImageBufferKind::TextureRect,
     ImageBufferKind::TextureExternal,
     ImageBufferKind::Texture2DArray,
 ];
 
 impl ImageBufferKind {
@@ -955,17 +972,17 @@ impl CacheTexture {
 
                             // Ensure that the CPU-side shadow copy of the GPU cache data has enough
                             // rows to apply this patch.
                             while rows.len() <= row {
                                 // Add a new row.
                                 rows.push(CacheRow::new());
                                 // Add enough GPU blocks for this row.
                                 cpu_blocks
-                                    .extend_from_slice(&[GpuBlockData::empty(); MAX_VERTEX_TEXTURE_WIDTH]);
+                                    .extend_from_slice(&[GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]);
                             }
 
                             // This row is dirty (needs to be updated in GPU texture).
                             rows[row].is_dirty = true;
 
                             // Copy the blocks from the patch array in the shadow CPU copy.
                             let block_offset = row * MAX_VERTEX_TEXTURE_WIDTH + address.u as usize;
                             let data = &mut cpu_blocks[block_offset .. (block_offset + block_count)];
@@ -1521,26 +1538,16 @@ fn create_clip_shader(name: &'static str
                 ("sLocalClipRects", TextureSampler::LocalClipRects),
             ],
         );
     }
 
     program
 }
 
-fn get_external_image_target(ext_type: ExternalImageType) -> Option<TextureTarget> {
-    Some(match ext_type {
-        ExternalImageType::Texture2DHandle => TextureTarget::Default,
-        ExternalImageType::Texture2DArrayHandle => TextureTarget::Array,
-        ExternalImageType::TextureRectHandle => TextureTarget::Rect,
-        ExternalImageType::TextureExternalHandle => TextureTarget::External,
-        ExternalImageType::ExternalBuffer => return None,
-    })
-}
-
 struct FileWatcher {
     notifier: Box<RenderNotifier>,
     result_tx: Sender<ResultMsg>,
 }
 
 impl FileWatcherHandler for FileWatcher {
     fn file_changed(&self, path: PathBuf) {
         self.result_tx.send(ResultMsg::RefreshShader(path)).ok();
@@ -2419,19 +2426,19 @@ impl Renderer {
                     DebugOutput::FetchClipScrollTree(string) => {
                         self.debug_server.send(string);
                     }
                     #[cfg(feature = "capture")]
                     DebugOutput::SaveCapture(config, deferred) => {
                         self.save_capture(config, deferred);
                     }
                     #[cfg(feature = "capture")]
-                    DebugOutput::LoadCapture(root) => {
+                    DebugOutput::LoadCapture(root, plain_externals) => {
                         self.active_documents.clear();
-                        self.load_capture(root);
+                        self.load_capture(root, plain_externals);
                     }
                 },
                 ResultMsg::DebugCommand(command) => {
                     self.handle_debug_command(command);
                 }
             }
         }
     }
@@ -3319,16 +3326,60 @@ impl Renderer {
         self.draw_instanced_batch(
             instances,
             VertexArrayKind::Primitive,
             &key.textures,
             stats
         );
     }
 
+    fn handle_blits(
+        &mut self,
+        blits: &[BlitJob],
+        render_tasks: &RenderTaskTree,
+    ) {
+        if blits.is_empty() {
+            return;
+        }
+
+        let _timer = self.gpu_profile.start_timer(GPU_TAG_BLIT);
+
+        // TODO(gw): For now, we don't bother batching these by source texture.
+        //           If if ever shows up as an issue, we can easily batch them.
+        for blit in blits {
+            let source_rect = match blit.source {
+                BlitJobSource::Texture(texture_id, layer, source_rect) => {
+                    // A blit from a texture into this target.
+                    let src_texture = self.texture_resolver
+                        .resolve(&texture_id)
+                        .expect("BUG: invalid source texture");
+                    self.device.bind_read_target(Some((src_texture, layer)));
+                    source_rect
+                }
+                BlitJobSource::RenderTask(task_id) => {
+                    // A blit from the child render task into this target.
+                    // TODO(gw): Support R8 format here once we start
+                    //           creating mips for alpha masks.
+                    let src_texture = self.texture_resolver
+                        .resolve(&SourceTexture::CacheRGBA8)
+                        .expect("BUG: invalid source texture");
+                    let source = &render_tasks[task_id];
+                    let (source_rect, layer) = source.get_target_rect();
+                    self.device.bind_read_target(Some((src_texture, layer.0 as i32)));
+                    source_rect
+                }
+            };
+            debug_assert_eq!(source_rect.size, blit.target_rect.size);
+            self.device.blit_render_target(
+                source_rect,
+                blit.target_rect,
+            );
+        }
+    }
+
     fn handle_scaling(
         &mut self,
         render_tasks: &RenderTaskTree,
         scalings: &Vec<ScalingInfo>,
         source: SourceTexture,
     ) {
         let cache_texture = self.texture_resolver
             .resolve(&source)
@@ -3408,16 +3459,19 @@ impl Renderer {
 
             self.device.clear_target(clear_color, depth_clear, clear_rect);
 
             if depth_clear.is_some() {
                 self.device.disable_depth_write();
             }
         }
 
+        // Handle any blits from the texture cache to this target.
+        self.handle_blits(&target.blits, render_tasks);
+
         // Draw any blurs for this target.
         // Blurs are rendered as a standard 2-pass
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
@@ -3972,16 +4026,17 @@ impl Renderer {
         self.gpu_profile.finish_sampler(alpha_sampler);
     }
 
     fn draw_texture_cache_target(
         &mut self,
         texture: &SourceTexture,
         layer: i32,
         target: &TextureCacheRenderTarget,
+        render_tasks: &RenderTaskTree,
         stats: &mut RendererStats,
     ) {
         let projection = {
             let texture = self.texture_resolver
                 .resolve(texture)
                 .expect("BUG: invalid target texture");
             let target_size = texture.get_dimensions();
 
@@ -3996,16 +4051,19 @@ impl Renderer {
                 target_size.width as f32,
                 0.0,
                 target_size.height as f32,
                 ORTHO_NEAR_PLANE,
                 ORTHO_FAR_PLANE,
             )
         };
 
+        // Handle any blits to this texture from child tasks.
+        self.handle_blits(&target.blits, render_tasks);
+
         // Draw any blurs for this target.
         if !target.horizontal_blurs.is_empty() {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             self.cs_blur_a8
                 .bind(&mut self.device, &projection, 0, &mut self.renderer_errors);
 
             self.draw_instanced_batch(
@@ -4038,18 +4096,22 @@ impl Renderer {
 
         for deferred_resolve in deferred_resolves {
             self.gpu_profile.place_marker("deferred resolve");
             let props = &deferred_resolve.image_properties;
             let ext_image = props
                 .external_image
                 .expect("BUG: Deferred resolves must be external images!");
             let image = handler.lock(ext_image.id, ext_image.channel_index);
-            let texture_target = get_external_image_target(ext_image.image_type)
-                .expect(&format!("{:?} is not a suitable image type in update_deferred_resolves()", ext_image.image_type));
+            let texture_target = match ext_image.image_type {
+                ExternalImageType::TextureHandle(target) => target,
+                ExternalImageType::Buffer => {
+                    panic!("{:?} is not a suitable image type in update_deferred_resolves()", ext_image.image_type);
+                }
+            };
 
             // In order to produce the handle, the external image handler may call into
             // the GL context and change some states.
             self.device.reset_state();
 
             let texture = match image.source {
                 ExternalImageSource::NativeTexture(texture_id) => {
                     ExternalTexture::new(texture_id, texture_target)
@@ -4058,29 +4120,31 @@ impl Renderer {
                     warn!(
                         "Invalid ext-image for ext_id:{:?}, channel:{}.",
                         ext_image.id,
                         ext_image.channel_index
                     );
                     // Just use 0 as the gl handle for this failed case.
                     ExternalTexture::new(0, texture_target)
                 }
-                _ => panic!("No native texture found."),
+                ExternalImageSource::RawData(_) => {
+                    panic!("Raw external data is not expected for deferred resolves!");
+                }
             };
 
             self.texture_resolver
                 .external_images
                 .insert((ext_image.id, ext_image.channel_index), texture);
 
             list.updates.push(GpuCacheUpdate::Copy {
                 block_index: list.blocks.len(),
                 block_count: BLOCKS_PER_UV_RECT,
                 address: deferred_resolve.address,
             });
-            list.blocks.push([image.u0, image.v0, image.u1, image.v1].into());
+            list.blocks.push(image.uv.into());
             list.blocks.push([0f32; 4].into());
         }
 
         Some(list)
     }
 
     fn unlock_external_images(&mut self) {
         if !self.texture_resolver.external_images.is_empty() {
@@ -4204,19 +4268,19 @@ impl Renderer {
         &mut self,
         frame: &mut Frame,
         framebuffer_size: Option<DeviceUintSize>,
         framebuffer_depth_is_ready: bool,
         frame_id: FrameId,
         stats: &mut RendererStats,
     ) {
         let _gm = self.gpu_profile.start_marker("tile frame draw");
-        frame.has_been_rendered = true;
 
         if frame.passes.is_empty() {
+            frame.has_been_rendered = true;
             return;
         }
 
         self.device.disable_depth_write();
         self.device.disable_stencil();
         self.device.set_blend(false);
 
         self.bind_frame_data(frame);
@@ -4265,23 +4329,30 @@ impl Renderer {
                         );
                     }
 
                     (None, None)
                 }
                 RenderPassKind::OffScreen { ref mut alpha, ref mut color, ref mut texture_cache } => {
                     alpha.check_ready();
                     color.check_ready();
-                    for (&(texture_id, target_index), target) in texture_cache {
-                        self.draw_texture_cache_target(
-                            &texture_id,
-                            target_index,
-                            target,
-                            stats,
-                        );
+
+                    // If this frame has already been drawn, then any texture
+                    // cache targets have already been updated and can be
+                    // skipped this time.
+                    if !frame.has_been_rendered {
+                        for (&(texture_id, target_index), target) in texture_cache {
+                            self.draw_texture_cache_target(
+                                &texture_id,
+                                target_index,
+                                target,
+                                &frame.render_tasks,
+                                stats,
+                            );
+                        }
                     }
 
                     for (target_index, target) in alpha.targets.iter().enumerate() {
                         stats.alpha_target_count += 1;
 
                         let projection = Transform3D::ortho(
                             0.0,
                             alpha.max_size.width as f32,
@@ -4360,16 +4431,18 @@ impl Renderer {
         let device = &mut self.device;
         self.output_targets
             .retain(|_, target| if target.last_access != frame_id {
                 device.delete_fbo(target.fbo_id);
                 false
             } else {
                 true
             });
+
+        frame.has_been_rendered = true;
     }
 
     pub fn debug_renderer<'b>(&'b mut self) -> &'b mut DebugRenderer {
         &mut self.debug
     }
 
     pub fn get_debug_flags(&self) -> DebugFlags {
         self.debug_flags
@@ -4631,20 +4704,17 @@ pub enum ExternalImageSource<'a> {
 /// the renderer should upload new texture data this
 /// frame. For instance, if providing video frames, the
 /// application could call wr.render() whenever a new
 /// video frame is ready. If the callback increments
 /// the returned timestamp for a given image, the renderer
 /// will know to re-upload the image data to the GPU.
 /// Note that the UV coords are supplied in texel-space!
 pub struct ExternalImage<'a> {
-    pub u0: f32,
-    pub v0: f32,
-    pub u1: f32,
-    pub v1: f32,
+    pub uv: TexelRect,
     pub source: ExternalImageSource<'a>,
 }
 
 /// The interfaces that an application can implement to support providing
 /// external image buffers.
 /// When the the application passes an external image to WR, it should kepp that
 /// external image life time. People could check the epoch id in RenderNotifier
 /// at the client side to make sure that the external image is not used by WR.
@@ -4782,30 +4852,36 @@ struct PlainTexture {
 #[derive(Deserialize, Serialize)]
 struct PlainRenderer {
     gpu_cache: PlainTexture,
     textures: Vec<PlainTexture>,
     external_images: Vec<ExternalCaptureImage>
 }
 
 #[cfg(feature = "capture")]
+enum CapturedExternalImageData {
+    NativeTexture(gl::GLuint),
+    Buffer(Arc<Vec<u8>>),
+}
+
+#[cfg(feature = "capture")]
 struct DummyExternalImageHandler {
-    data: FastHashMap<(ExternalImageId, u8), Vec<u8>>,
+    data: FastHashMap<(ExternalImageId, u8), (CapturedExternalImageData, TexelRect)>,
 }
 
 #[cfg(feature = "capture")]
 impl ExternalImageHandler for DummyExternalImageHandler {
     fn lock(&mut self, key: ExternalImageId, channel_index: u8) -> ExternalImage {
-        let slice = &self.data[&(key, channel_index)];
+        let (ref captured_data, ref uv) = self.data[&(key, channel_index)];
         ExternalImage {
-            u0: 0.0,
-            v0: 0.0,
-            u1: 1.0,
-            v1: 1.0,
-            source: ExternalImageSource::RawData(slice),
+            uv: *uv,
+            source: match *captured_data {
+                CapturedExternalImageData::NativeTexture(tid) => ExternalImageSource::NativeTexture(tid),
+                CapturedExternalImageData::Buffer(ref arc) => ExternalImageSource::RawData(&*arc),
+            }
         }
     }
     fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {}
 }
 
 #[cfg(feature = "capture")]
 impl OutputImageHandler for () {
     fn lock(&mut self, _: PipelineId) -> Option<(u32, DeviceIntSize)> {
@@ -4833,18 +4909,38 @@ impl Renderer {
             texture.get_dimensions(),
         );
 
         let mut file = fs::File::create(root.join(&short_path))
             .expect(&format!("Unable to create {}", short_path));
         let bytes_per_layer = (rect.size.width * rect.size.height * bytes_per_pixel) as usize;
         let mut data = vec![0; bytes_per_layer];
 
+        //TODO: instead of reading from an FBO with `read_pixels*`, we could
+        // read from textures directly with `get_tex_image*`.
+
         for layer_id in 0 .. texture.get_layer_count() {
             device.attach_read_texture(texture, layer_id);
+            #[cfg(feature = "png")]
+            {
+                let mut png_data;
+                let (data_ref, format) = match texture.get_format() {
+                    ImageFormat::RGBAF32 => {
+                        png_data = vec![0; (rect.size.width * rect.size.height * 4) as usize];
+                        device.read_pixels_into(rect, ReadPixelsFormat::Rgba8, &mut png_data);
+                        (&png_data, ReadPixelsFormat::Rgba8)
+                    }
+                    fm => (&data, ReadPixelsFormat::Standard(fm)),
+                };
+                CaptureConfig::save_png(
+                    root.join(format!("textures/{}-{}.png", name, layer_id)),
+                    (rect.size.width, rect.size.height), format,
+                    data_ref,
+                );
+            }
             device.read_pixels_into(rect, read_format, &mut data);
             file.write_all(&data)
                 .unwrap();
         }
 
         PlainTexture {
             data: short_path,
             size: (rect.size.width, rect.size.height, texture.get_layer_count()),
@@ -4856,64 +4952,107 @@ impl Renderer {
 
     fn load_texture(texture: &mut Texture, plain: &PlainTexture, root: &PathBuf, device: &mut Device) -> Vec<u8> {
         use std::fs::File;
         use std::io::Read;
 
         let mut texels = Vec::new();
         assert_eq!(plain.format, texture.get_format());
         File::open(root.join(&plain.data))
-            .unwrap()
+            .expect(&format!("Unable to open texture at {}", plain.data))
             .read_to_end(&mut texels)
             .unwrap();
 
         device.init_texture(
             texture, plain.size.0, plain.size.1,
             plain.filter, plain.render_target,
             plain.size.2, Some(texels.as_slice()),
         );
 
         texels
     }
 
-    fn save_capture(&mut self, config: CaptureConfig, deferred_images: Vec<ExternalCaptureImage>) {
+    fn save_capture(
+        &mut self,
+        config: CaptureConfig,
+        deferred_images: Vec<ExternalCaptureImage>,
+    ) {
         use std::fs;
         use std::io::Write;
         use api::{CaptureBits, ExternalImageData};
 
         self.device.begin_frame();
+        let _gm = self.gpu_profile.start_marker("read GPU data");
         self.device.bind_read_target_impl(self.capture.read_fbo);
 
         if !deferred_images.is_empty() {
             info!("saving external images");
+            let mut arc_map = FastHashMap::<*const u8, String>::default();
+            let mut tex_map = FastHashMap::<u32, String>::default();
             let handler = self.external_image_handler
                 .as_mut()
                 .expect("Unable to lock the external image handler!");
             for def in &deferred_images {
+                info!("\t{}", def.short_path);
                 let ExternalImageData { id, channel_index, image_type } = def.external;
-                let data = match handler.lock(id, channel_index).source {
-                    ExternalImageSource::RawData(data) => data.to_vec(),
+                let ext_image = handler.lock(id, channel_index);
+                let (data, short_path) = match ext_image.source {
+                    ExternalImageSource::RawData(data) => {
+                        let arc_id = arc_map.len() + 1;
+                        match arc_map.entry(data.as_ptr()) {
+                            Entry::Occupied(e) => {
+                                (None, e.get().clone())
+                            }
+                            Entry::Vacant(e) => {
+                                let short_path = format!("externals/d{}.raw", arc_id);
+                                (Some(data.to_vec()), e.insert(short_path).clone())
+                            }
+                        }
+                    }
                     ExternalImageSource::NativeTexture(gl_id) => {
-                        let target = get_external_image_target(image_type).unwrap();
-                        self.device.attach_read_texture_external(gl_id, target, 0);
-                        self.device.read_pixels(&def.descriptor)
+                        let tex_id = tex_map.len() + 1;
+                        match tex_map.entry(gl_id) {
+                            Entry::Occupied(e) => {
+                                (None, e.get().clone())
+                            }
+                            Entry::Vacant(e) => {
+                                let target = match image_type {
+                                    ExternalImageType::TextureHandle(target) => target,
+                                    ExternalImageType::Buffer => unreachable!(),
+                                };
+                                info!("\t\tnative texture of target {:?}", target);
+                                let layer_index = 0; //TODO: what about layered textures?
+                                self.device.attach_read_texture_external(gl_id, target, layer_index);
+                                let data = self.device.read_pixels(&def.descriptor);
+                                let short_path = format!("externals/t{}.raw", tex_id);
+                                (Some(data), e.insert(short_path).clone())
+                            }
+                        }
                     }
                     ExternalImageSource::Invalid => {
-                        // Create a dummy buffer...
-                        let stride = def.descriptor.compute_stride();
-                        let total_size = def.descriptor.height * stride;
-                        vec![0xFF; total_size as usize]
+                        info!("\t\tinvalid source!");
+                        (None, String::new())
                     }
                 };
-                handler.unlock(id, channel_index);
-
-                fs::File::create(config.root.join(&def.short_path))
-                    .expect(&format!("Unable to create {}", def.short_path))
-                    .write_all(&data)
-                    .unwrap();
+                if let Some(bytes) = data {
+                    fs::File::create(config.root.join(&short_path))
+                        .expect(&format!("Unable to create {}", short_path))
+                        .write_all(&bytes)
+                        .unwrap();
+                }
+                let plain = PlainExternalImage {
+                    data: short_path,
+                    id: def.external.id,
+                    channel_index: def.external.channel_index,
+                    uv: ext_image.uv,
+                };
+                config.serialize(&plain, &def.short_path);
+            }
+            for def in &deferred_images {
+                handler.unlock(def.external.id, def.external.channel_index);
             }
         }
 
         if config.bits.contains(CaptureBits::FRAME) {
             let path_textures = config.root.join("textures");
             if !path_textures.is_dir() {
                 fs::create_dir(&path_textures).unwrap();
             }
@@ -4939,76 +5078,124 @@ impl Renderer {
             config.serialize(&plain_self, "renderer");
         }
 
         self.device.bind_read_target(None);
         self.device.end_frame();
         info!("done.");
     }
 
-    fn load_capture(&mut self, root: PathBuf) {
-        let renderer = match CaptureConfig::deserialize::<PlainRenderer, _>(&root, "renderer") {
-            Some(r) => r,
-            None => return,
-        };
-
-        self.device.begin_frame();
-        info!("loading cached textures");
-
-        for texture in self.texture_resolver.cache_texture_map.drain(..) {
-            self.device.delete_texture(texture);
-        }
-        for texture in renderer.textures {
-            info!("\t{}", texture.data);
-            let mut t = self.device.create_texture(TextureTarget::Array, texture.format);
-            Self::load_texture(&mut t, &texture, &root, &mut self.device);
-            self.texture_resolver.cache_texture_map.push(t);
-        }
-
-        info!("loading gpu cache");
-        Self::load_texture(
-            &mut self.gpu_cache_texture.texture,
-            &renderer.gpu_cache,
-            &root,
-            &mut self.device,
-        );
-        match self.gpu_cache_texture.bus {
-            CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
-                rows.clear();
-                cpu_blocks.clear();
-            }
-            CacheBus::Scatter { .. } => {}
-        }
-
-        info!("loading external images");
+    fn load_capture(
+        &mut self, root: PathBuf, plain_externals: Vec<PlainExternalImage>
+    ) {
+        use std::fs::File;
+        use std::io::Read;
+        use std::slice;
+
+        info!("loading external buffer-backed images");
         assert!(self.texture_resolver.external_images.is_empty());
+        let mut raw_map = FastHashMap::<String, Arc<Vec<u8>>>::default();
         let mut image_handler = DummyExternalImageHandler {
             data: FastHashMap::default(),
         };
-
-        for ExternalCaptureImage { short_path, external, descriptor } in renderer.external_images {
-            let target = match get_external_image_target(external.image_type) {
-                Some(target) => target,
-                None => continue,
+        // Note: this is a `SCENE` level population of the external image handlers
+        // It would put both external buffers and texture into the map.
+        // But latter are going to be overwritten later in this function
+        // if we are in the `FRAME` level.
+        for plain_ext in plain_externals {
+            let data = match raw_map.entry(plain_ext.data) {
+                Entry::Occupied(e) => e.get().clone(),
+                Entry::Vacant(e) => {
+                    let mut buffer = Vec::new();
+                    File::open(root.join(e.key()))
+                        .expect(&format!("Unable to open {}", e.key()))
+                        .read_to_end(&mut buffer)
+                        .unwrap();
+                    e.insert(Arc::new(buffer)).clone()
+                }
             };
-            //TODO: provide a way to query both the layer count and the filter from external images
-            let (layer_count, filter) = (1, TextureFilter::Linear);
-            let plain = PlainTexture {
-                data: short_path,
-                size: (descriptor.width, descriptor.height, layer_count),
-                format: descriptor.format,
-                filter,
-                render_target: None,
-            };
-
-            let mut t = self.device.create_texture(target, plain.format);
-            let data = Self::load_texture(&mut t, &plain, &root, &mut self.device);
-            let key = (external.id, external.channel_index);
-            self.capture.owned_external_images.insert(key, t.into_external());
-            image_handler.data.insert(key, data);
+            let key = (plain_ext.id, plain_ext.channel_index);
+            let value = (CapturedExternalImageData::Buffer(data), plain_ext.uv);
+            image_handler.data.insert(key, value);
         }
 
-        self.device.end_frame();
+        if let Some(renderer) = CaptureConfig::deserialize::<PlainRenderer, _>(&root, "renderer") {
+            info!("loading cached textures");
+            self.device.begin_frame();
+
+            for texture in self.texture_resolver.cache_texture_map.drain(..) {
+                self.device.delete_texture(texture);
+            }
+            for texture in renderer.textures {
+                info!("\t{}", texture.data);
+                let mut t = self.device.create_texture(TextureTarget::Array, texture.format);
+                Self::load_texture(&mut t, &texture, &root, &mut self.device);
+                self.texture_resolver.cache_texture_map.push(t);
+            }
+
+            info!("loading gpu cache");
+            let gpu_cache_data = Self::load_texture(
+                &mut self.gpu_cache_texture.texture,
+                &renderer.gpu_cache,
+                &root,
+                &mut self.device,
+            );
+            match self.gpu_cache_texture.bus {
+                CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+                    let dim = self.gpu_cache_texture.texture.get_dimensions();
+                    let blocks = unsafe {
+                        slice::from_raw_parts(
+                            gpu_cache_data.as_ptr() as *const GpuBlockData,
+                            gpu_cache_data.len() / mem::size_of::<GpuBlockData>(),
+                        )
+                    };
+                    // fill up the CPU cache from the contents we just loaded
+                    rows.clear();
+                    cpu_blocks.clear();
+                    rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
+                    cpu_blocks.extend_from_slice(blocks);
+                }
+                CacheBus::Scatter { .. } => {}
+            }
+
+            info!("loading external texture-backed images");
+            let mut native_map = FastHashMap::<String, gl::GLuint>::default();
+            for ExternalCaptureImage { short_path, external, descriptor } in renderer.external_images {
+                let target = match external.image_type {
+                    ExternalImageType::TextureHandle(target) => target,
+                    ExternalImageType::Buffer => continue,
+                };
+                let plain_ext = CaptureConfig::deserialize::<PlainExternalImage, _>(&root, &short_path)
+                    .expect(&format!("Unable to read {}.ron", short_path));
+                let key = (external.id, external.channel_index);
+
+                let tid = match native_map.entry(plain_ext.data) {
+                    Entry::Occupied(e) => e.get().clone(),
+                    Entry::Vacant(e) => {
+                        //TODO: provide a way to query both the layer count and the filter from external images
+                        let (layer_count, filter) = (1, TextureFilter::Linear);
+                        let plain_tex = PlainTexture {
+                            data: e.key().clone(),
+                            size: (descriptor.width, descriptor.height, layer_count),
+                            format: descriptor.format,
+                            filter,
+                            render_target: None,
+                        };
+                        let mut t = self.device.create_texture(target, plain_tex.format);
+                        Self::load_texture(&mut t, &plain_tex, &root, &mut self.device);
+                        let extex = t.into_external();
+                        self.capture.owned_external_images.insert(key, extex.clone());
+                        e.insert(extex.internal_id()).clone()
+                    }
+                };
+
+                let value = (CapturedExternalImageData::NativeTexture(tid), plain_ext.uv);
+                image_handler.data.insert(key, value);
+            }
+
+            self.device.end_frame();
+        }
+
+        self.output_image_handler = Some(Box::new(()) as Box<_>);
         self.external_image_handler = Some(Box::new(image_handler) as Box<_>);
-        self.output_image_handler = Some(Box::new(()) as Box<_>);
         info!("done.");
     }
 }
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -10,17 +10,17 @@ use api::{ExternalImageData, ExternalIma
 use api::{FontInstanceOptions, FontInstancePlatformOptions, FontVariation};
 use api::{GlyphDimensions, GlyphKey, IdNamespace};
 use api::{ImageData, ImageDescriptor, ImageKey, ImageRendering};
 use api::{TileOffset, TileSize};
 #[cfg(feature = "capture")]
 use api::{NativeFontHandle};
 use app_units::Au;
 #[cfg(feature = "capture")]
-use capture::{ExternalCaptureImage};
+use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
 use device::TextureFilter;
 use frame::FrameId;
 use glyph_cache::GlyphCache;
 #[cfg(feature = "capture")]
 use glyph_cache::{CachedGlyphInfo, PlainGlyphCacheOwn, PlainGlyphCacheRef, PlainCachedGlyphInfo};
 use glyph_rasterizer::{FontInstance, GlyphFormat, GlyphRasterizer, GlyphRequest};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
@@ -50,27 +50,42 @@ pub struct GlyphFetchResult {
 // They are converted to normalized ST
 // values in the vertex shader. The reason
 // for this is that the texture may change
 // dimensions (e.g. the pages in a texture
 // atlas can grow). When this happens, by
 // storing the coordinates as texel values
 // we don't need to go through and update
 // various CPU-side structures.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct CacheItem {
     pub texture_id: SourceTexture,
     pub uv_rect_handle: GpuCacheHandle,
+    pub uv_rect: DeviceUintRect,
+    pub texture_layer: i32,
+}
+
+impl CacheItem {
+    pub fn invalid() -> Self {
+        CacheItem {
+            texture_id: SourceTexture::Invalid,
+            uv_rect_handle: GpuCacheHandle::new(),
+            uv_rect: DeviceUintRect::zero(),
+            texture_layer: 0,
+        }
+    }
 }
 
 #[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct ImageProperties {
     pub descriptor: ImageDescriptor,
     pub external_image: Option<ExternalImageData>,
     pub tiling: Option<TileSize>,
+    pub epoch: Epoch,
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 enum State {
     Idle,
     AddResources,
     QueryResources,
 }
@@ -280,33 +295,33 @@ impl ResourceCache {
 
     fn should_tile(limit: u32, descriptor: &ImageDescriptor, data: &ImageData) -> bool {
         let size_check = descriptor.width > limit || descriptor.height > limit;
         match *data {
             ImageData::Raw(_) | ImageData::Blob(_) => size_check,
             ImageData::External(info) => {
                 // External handles already represent existing textures so it does
                 // not make sense to tile them into smaller ones.
-                info.image_type == ExternalImageType::ExternalBuffer && size_check
+                info.image_type == ExternalImageType::Buffer && size_check
             }
         }
     }
 
     // Request the texture cache item for a cacheable render
     // task. If the item is already cached, the texture cache
     // handle will be returned. Otherwise, the user supplied
     // closure will be invoked to generate the render task
     // chain that is required to draw this task.
     pub fn request_render_task<F>(
         &mut self,
         key: RenderTaskCacheKey,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         f: F,
-    ) -> CacheItem where F: FnMut(&mut RenderTaskTree) -> (RenderTaskId, [f32; 3]) {
+    ) -> CacheItem where F: FnMut(&mut RenderTaskTree) -> (RenderTaskId, [f32; 3], bool) {
         self.cached_render_tasks.request_render_task(
             key,
             &mut self.texture_cache,
             gpu_cache,
             render_tasks,
             f
         )
     }
@@ -742,34 +757,30 @@ impl ResourceCache {
         }
     }
 
     pub fn get_image_properties(&self, image_key: ImageKey) -> Option<ImageProperties> {
         let image_template = &self.resources.image_templates.get(image_key);
 
         image_template.map(|image_template| {
             let external_image = match image_template.data {
-                ImageData::External(ext_image) => {
-                    match ext_image.image_type {
-                        ExternalImageType::Texture2DHandle |
-                        ExternalImageType::Texture2DArrayHandle |
-                        ExternalImageType::TextureRectHandle |
-                        ExternalImageType::TextureExternalHandle => Some(ext_image),
-                        // external buffer uses resource_cache.
-                        ExternalImageType::ExternalBuffer => None,
-                    }
-                }
+                ImageData::External(ext_image) => match ext_image.image_type {
+                    ExternalImageType::TextureHandle(_) => Some(ext_image),
+                    // external buffer uses resource_cache.
+                    ExternalImageType::Buffer => None,
+                },
                 // raw and blob image are all using resource_cache.
                 ImageData::Raw(..) | ImageData::Blob(..) => None,
             };
 
             ImageProperties {
                 descriptor: image_template.descriptor,
                 external_image,
                 tiling: image_template.tiling,
+                epoch: image_template.epoch,
             }
         })
     }
 
     pub fn get_tiled_image_map(&self) -> TiledImageMap {
         self.resources
             .image_templates
             .images
@@ -985,16 +996,17 @@ enum PlainFontTemplate {
     Native(NativeFontHandle),
 }
 
 #[cfg(feature = "capture")]
 #[derive(Serialize, Deserialize)]
 struct PlainImageTemplate {
     data: String,
     descriptor: ImageDescriptor,
+    epoch: Epoch,
     tiling: Option<TileSize>,
 }
 
 #[cfg(feature = "capture")]
 #[derive(Serialize, Deserialize)]
 pub struct PlainResources {
     font_templates: FastHashMap<FontKey, PlainFontTemplate>,
     font_instances: FastHashMap<FontInstanceKey, FontInstance>,
@@ -1021,16 +1033,18 @@ pub struct PlainCacheOwn {
     textures: TextureCache,
 }
 
 #[cfg(feature = "capture")]
 impl ResourceCache {
     pub fn save_capture(
         &mut self, root: &PathBuf
     ) -> (PlainResources, Vec<ExternalCaptureImage>) {
+        #[cfg(feature = "png")]
+        use device::ReadPixelsFormat;
         use std::fs;
         use std::io::Write;
 
         info!("saving resource cache");
         let res = &self.resources;
         if !root.is_dir() {
             fs::create_dir_all(root).unwrap()
         }
@@ -1041,16 +1055,20 @@ impl ResourceCache {
         let path_images = root.join("images");
         if !path_images.is_dir() {
             fs::create_dir(&path_images).unwrap();
         }
         let path_blobs = root.join("blobs");
         if !path_blobs.is_dir() {
             fs::create_dir(&path_blobs).unwrap();
         }
+        let path_externals = root.join("externals");
+        if !path_externals.is_dir() {
+            fs::create_dir(&path_externals).unwrap();
+        }
 
         info!("\tfont templates");
         let mut font_paths = FastHashMap::default();
         for template in res.font_templates.values() {
             let data: &[u8] = match *template {
                 FontTemplate::Raw(ref arc, _) => arc,
                 FontTemplate::Native(_) => continue,
             };
@@ -1066,73 +1084,87 @@ impl ResourceCache {
                 .write_all(data)
                 .unwrap();
             entry.insert(short_path);
         }
 
         info!("\timage templates");
         let mut image_paths = FastHashMap::default();
         let mut other_paths = FastHashMap::default();
+        let mut num_blobs = 0;
         let mut external_images = Vec::new();
         for (&key, template) in res.image_templates.images.iter() {
             let desc = &template.descriptor;
             match template.data {
                 ImageData::Raw(ref arc) => {
                     let image_id = image_paths.len() + 1;
                     let entry = match image_paths.entry(arc.as_ptr()) {
                         Entry::Occupied(_) => continue,
                         Entry::Vacant(e) => e,
                     };
 
-                    //TODO: option to save as PNG:
-                    // https://github.com/servo/webrender/issues/2234
+                    #[cfg(feature = "png")]
+                    CaptureConfig::save_png(
+                        root.join(format!("images/{}.png", image_id)),
+                        (desc.width, desc.height),
+                        ReadPixelsFormat::Standard(desc.format),
+                        &arc,
+                    );
                     let file_name = format!("{}.raw", image_id);
                     let short_path = format!("images/{}", file_name);
                     fs::File::create(path_images.join(file_name))
                         .expect(&format!("Unable to create {}", short_path))
                         .write_all(&*arc)
                         .unwrap();
                     entry.insert(short_path);
                 }
                 ImageData::Blob(_) => {
                     assert_eq!(template.tiling, None);
                     let request = BlobImageRequest {
                         key,
                         //TODO: support tiled blob images
                         // https://github.com/servo/webrender/issues/2236
                         tile: None,
                     };
-
                     let renderer = self.blob_image_renderer.as_mut().unwrap();
                     renderer.request(
                         &self.resources,
                         request,
                         &BlobImageDescriptor {
                             width: desc.width,
                             height: desc.height,
                             offset: DevicePoint::zero(),
                             format: desc.format,
                         },
                         None,
                     );
                     let result = renderer.resolve(request)
                         .expect("Blob resolve failed");
                     assert_eq!((result.width, result.height), (desc.width, desc.height));
+                    assert_eq!(result.data.len(), desc.compute_total_size() as usize);
 
-                    let file_name = format!("{}.raw", other_paths.len() + 1);
+                    num_blobs += 1;
+                    #[cfg(feature = "png")]
+                    CaptureConfig::save_png(
+                        root.join(format!("blobs/{}.png", num_blobs)),
+                        (desc.width, desc.height),
+                        ReadPixelsFormat::Standard(desc.format),
+                        &result.data,
+                    );
+                    let file_name = format!("{}.raw", num_blobs);
                     let short_path = format!("blobs/{}", file_name);
                     let full_path = path_blobs.clone().join(&file_name);
                     fs::File::create(full_path)
                         .expect(&format!("Unable to create {}", short_path))
                         .write_all(&result.data)
                         .unwrap();
                     other_paths.insert(key, short_path);
                 }
                 ImageData::External(ref ext) => {
-                    let short_path = format!("blobs/{}.raw", other_paths.len() + 1);
+                    let short_path = format!("externals/{}", external_images.len() + 1);
                     other_paths.insert(key, short_path.clone());
                     external_images.push(ExternalCaptureImage {
                         short_path,
                         descriptor: desc.clone(),
                         external: ext.clone(),
                     });
                 }
             }
@@ -1161,16 +1193,17 @@ impl ResourceCache {
                 .map(|(key, template)| {
                     (*key, PlainImageTemplate {
                         data: match template.data {
                             ImageData::Raw(ref arc) => image_paths[&arc.as_ptr()].clone(),
                             _ => other_paths[key].clone(),
                         },
                         descriptor: template.descriptor.clone(),
                         tiling: template.tiling,
+                        epoch: template.epoch,
                     })
                 })
                 .collect(),
         };
 
         (resources, external_images)
     }
 
@@ -1238,17 +1271,17 @@ impl ResourceCache {
         }
     }
 
     pub fn load_capture(
         &mut self,
         resources: PlainResources,
         caches: Option<PlainCacheOwn>,
         root: &PathBuf,
-    ) {
+    ) -> Vec<PlainExternalImage> {
         use std::fs::File;
         use std::io::Read;
 
         info!("loading resource cache");
         //TODO: instead of filling the local path to Arc<data> map as we process
         // each of the resource types, we could go through all of the local paths
         // and fill out the map as the first step.
         let mut raw_map = FastHashMap::<String, Arc<Vec<u8>>>::default();
@@ -1343,34 +1376,51 @@ impl ResourceCache {
                 }
             };
 
             self.glyph_rasterizer.add_font(key, template.clone());
             res.font_templates.insert(key, template);
         }
 
         info!("\timage templates...");
+        let mut external_images = Vec::new();
         for (key, template) in resources.image_templates {
-            let arc = match raw_map.entry(template.data) {
-                Entry::Occupied(e) => {
-                    e.get().clone()
+            let data = match CaptureConfig::deserialize::<PlainExternalImage, _>(root, &template.data) {
+                Some(plain) => {
+                    let ext_data = ExternalImageData {
+                        id: plain.id,
+                        channel_index: plain.channel_index,
+                        image_type: ExternalImageType::Buffer,
+                    };
+                    external_images.push(plain);
+                    ImageData::External(ext_data)
                 }
-                Entry::Vacant(e) => {
-                    let mut buffer = Vec::new();
-                    File::open(root.join(e.key()))
-                        .expect(&format!("Unable to open {}", e.key()))
-                        .read_to_end(&mut buffer)
-                        .unwrap();
-                    e.insert(Arc::new(buffer))
-                        .clone()
+                None => {
+                    let arc = match raw_map.entry(template.data) {
+                        Entry::Occupied(e) => {
+                            e.get().clone()
+                        }
+                        Entry::Vacant(e) => {
+                            let mut buffer = Vec::new();
+                            File::open(root.join(e.key()))
+                                .expect(&format!("Unable to open {}", e.key()))
+                                .read_to_end(&mut buffer)
+                                .unwrap();
+                            e.insert(Arc::new(buffer))
+                                .clone()
+                        }
+                    };
+                    ImageData::Raw(arc)
                 }
             };
 
             res.image_templates.images.insert(key, ImageResource {
-                data: ImageData::Raw(arc),
+                data,
                 descriptor: template.descriptor,
                 tiling: template.tiling,
-                epoch: Epoch(0),
+                epoch: template.epoch,
                 dirty_rect: None,
             });
         }
+
+        external_images
     }
 }
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -400,19 +400,31 @@ impl TextureCache {
     // tries to get a handle that was not requested this frame.
     pub fn get(&self, handle: &TextureCacheHandle) -> CacheItem {
         match handle.entry {
             Some(ref handle) => {
                 let entry = self.entries
                     .get_opt(handle)
                     .expect("BUG: was dropped from cache or not updated!");
                 debug_assert_eq!(entry.last_access, self.frame_id);
+                let (layer_index, origin) = match entry.kind {
+                    EntryKind::Standalone { .. } => {
+                        (0, DeviceUintPoint::zero())
+                    }
+                    EntryKind::Cache {
+                        layer_index,
+                        origin,
+                        ..
+                    } => (layer_index, origin),
+                };
                 CacheItem {
                     uv_rect_handle: entry.uv_rect_handle,
                     texture_id: SourceTexture::TextureCache(entry.texture_id),
+                    uv_rect: DeviceUintRect::new(origin, entry.size),
+                    texture_layer: layer_index as i32,
                 }
             }
             None => panic!("BUG: handle not requested earlier in frame"),
         }
     }
 
     // A more detailed version of get(). This allows access to the actual
     // device rect of the cache allocation.
@@ -1046,23 +1058,20 @@ impl TextureUpdate {
         layer_index: i32,
         dirty_rect: Option<DeviceUintRect>,
     ) -> TextureUpdate {
         let data_src = match data {
             ImageData::Blob(..) => {
                 panic!("The vector image should have been rasterized.");
             }
             ImageData::External(ext_image) => match ext_image.image_type {
-                ExternalImageType::Texture2DHandle |
-                ExternalImageType::Texture2DArrayHandle |
-                ExternalImageType::TextureRectHandle |
-                ExternalImageType::TextureExternalHandle => {
+                ExternalImageType::TextureHandle(_) => {
                     panic!("External texture handle should not go through texture_cache.");
                 }
-                ExternalImageType::ExternalBuffer => TextureUpdateSource::External {
+                ExternalImageType::Buffer => TextureUpdateSource::External {
                     id: ext_image.id,
                     channel_index: ext_image.channel_index,
                 },
             },
             ImageData::Raw(bytes) => {
                 let finish = descriptor.offset +
                     descriptor.width * descriptor.format.bytes_per_pixel() +
                     (descriptor.height - 1) * descriptor.compute_stride();
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,30 +1,30 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, ColorF, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DevicePixelScale, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{DocumentLayer, FilterOp, ImageFormat};
 use api::{LayerRect, MixBlendMode, PipelineId};
-use batch::{AlphaBatcher, ClipBatcher};
+use batch::{AlphaBatcher, ClipBatcher, resolve_image};
 use clip::{ClipStore};
 use clip_scroll_tree::{ClipScrollTree};
 use device::Texture;
 use gpu_cache::{GpuCache, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, ClipChainRectIndex};
 use gpu_types::{ClipScrollNodeData, ClipScrollNodeIndex};
 use gpu_types::{PrimitiveInstance};
 use internal_types::{FastHashMap, RenderPassIndex, SourceTexture};
 use picture::{PictureKind};
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveStore};
 use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, EdgeAaSegmentMask};
 use profiler::FrameProfileCounters;
-use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind};
+use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use resource_cache::{ResourceCache};
 use std::{cmp, usize, f32, i32};
 use texture_allocator::GuillotineAllocator;
 
 const MIN_TARGET_SIZE: u32 = 2048;
 
 #[derive(Debug)]
@@ -95,23 +95,31 @@ pub trait RenderTarget {
     fn build(
         &mut self,
         _ctx: &RenderTargetContext,
         _gpu_cache: &mut GpuCache,
         _render_tasks: &mut RenderTaskTree,
         _deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
     }
+    // TODO(gw): It's a bit odd that we need the deferred resolves and mutable
+    //           GPU cache here. They are typically used by the build step
+    //           above. They are used for the blit jobs to allow resolve_image
+    //           to be called. It's a bit of extra overhead to store the image
+    //           key here and the resolve them in the build step separately.
+    //           BUT: if/when we add more texture cache target jobs, we might
+    //           want to tidy this up.
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
-        gpu_cache: &GpuCache,
+        gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
+        deferred_resolves: &mut Vec<DeferredResolve>,
     );
     fn used_rect(&self) -> DeviceIntRect;
     fn needs_depth(&self) -> bool;
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub enum RenderTargetKind {
@@ -154,26 +162,28 @@ impl<T: RenderTarget> RenderTargetList<T
             target.build(ctx, gpu_cache, render_tasks, deferred_resolves);
         }
     }
 
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
-        gpu_cache: &GpuCache,
+        gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &ClipStore,
+        deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         self.targets.last_mut().unwrap().add_task(
             task_id,
             ctx,
             gpu_cache,
             render_tasks,
             clip_store,
+            deferred_resolves,
         );
     }
 
     fn allocate(
         &mut self,
         alloc_size: DeviceUintSize,
     ) -> (DeviceUintPoint, RenderTargetIndex) {
         let existing_origin = self.targets
@@ -228,25 +238,40 @@ pub struct FrameOutput {
 }
 
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct ScalingInfo {
     pub src_task_id: RenderTaskId,
     pub dest_task_id: RenderTaskId,
 }
 
+// Defines where the source data for a blit job can be found.
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
+pub enum BlitJobSource {
+    Texture(SourceTexture, i32, DeviceIntRect),
+    RenderTask(RenderTaskId),
+}
+
+// Information required to do a blit from a source to a target.
+#[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
+pub struct BlitJob {
+    pub source: BlitJobSource,
+    pub target_rect: DeviceIntRect,
+}
+
 /// A render target represents a number of rendering operations on a surface.
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct ColorRenderTarget {
     pub alpha_batcher: AlphaBatcher,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub readbacks: Vec<DeviceIntRect>,
     pub scalings: Vec<ScalingInfo>,
+    pub blits: Vec<BlitJob>,
     // List of frame buffer outputs for this render target.
     pub outputs: Vec<FrameOutput>,
     allocator: Option<TextureAllocator>,
     alpha_tasks: Vec<RenderTaskId>,
 }
 
 impl RenderTarget for ColorRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
@@ -261,16 +286,17 @@ impl RenderTarget for ColorRenderTarget 
         screen_size: DeviceIntSize,
     ) -> Self {
         ColorRenderTarget {
             alpha_batcher: AlphaBatcher::new(screen_size),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             readbacks: Vec::new(),
             scalings: Vec::new(),
+            blits: Vec::new(),
             allocator: size.map(TextureAllocator::new),
             outputs: Vec::new(),
             alpha_tasks: Vec::new(),
         }
     }
 
     fn build(
         &mut self,
@@ -287,19 +313,20 @@ impl RenderTarget for ColorRenderTarget 
             deferred_resolves,
         );
     }
 
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
-        _: &GpuCache,
+        gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         _: &ClipStore,
+        deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         let task = &render_tasks[task_id];
 
         match task.kind {
             RenderTaskKind::VerticalBlur(ref info) => {
                 info.add_instances(
                     &mut self.vertical_blurs,
                     task_id,
@@ -349,16 +376,61 @@ impl RenderTarget for ColorRenderTarget 
                 self.readbacks.push(device_rect);
             }
             RenderTaskKind::Scaling(..) => {
                 self.scalings.push(ScalingInfo {
                     src_task_id: task.children[0],
                     dest_task_id: task_id,
                 });
             }
+            RenderTaskKind::Blit(ref task_info) => {
+                match task_info.source {
+                    BlitSource::Image { key } => {
+                        // Get the cache item for the source texture.
+                        let cache_item = resolve_image(
+                            key.image_key,
+                            key.image_rendering,
+                            key.tile_offset,
+                            ctx.resource_cache,
+                            gpu_cache,
+                            deferred_resolves,
+                        );
+
+                        // Work out a source rect to copy from the texture, depending on whether
+                        // a sub-rect is present or not.
+                        // TODO(gw): We have much type confusion below - f32, i32 and u32 for
+                        //           various representations of the texel rects. We should make
+                        //           this consistent!
+                        let source_rect = key.texel_rect.map_or(cache_item.uv_rect.to_i32(), |sub_rect| {
+                            DeviceIntRect::new(
+                                DeviceIntPoint::new(
+                                    cache_item.uv_rect.origin.x as i32 + sub_rect.origin.x,
+                                    cache_item.uv_rect.origin.y as i32 + sub_rect.origin.y,
+                                ),
+                                sub_rect.size,
+                            )
+                        });
+
+                        // Store the blit job for the renderer to execute, including
+                        // the allocated destination rect within this target.
+                        let (target_rect, _) = task.get_target_rect();
+                        self.blits.push(BlitJob {
+                            source: BlitJobSource::Texture(
+                                cache_item.texture_id,
+                                cache_item.texture_layer,
+                                source_rect,
+                            ),
+                            target_rect,
+                        });
+                    }
+                    BlitSource::RenderTask { .. } => {
+                        panic!("BUG: render task blit jobs to render tasks not supported");
+                    }
+                }
+            }
         }
     }
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator
             .as_ref()
             .expect("bug: used_rect called on framebuffer")
             .used_rect
@@ -402,35 +474,37 @@ impl RenderTarget for AlphaRenderTarget 
             allocator: TextureAllocator::new(size.expect("bug: alpha targets need size")),
         }
     }
 
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
-        gpu_cache: &GpuCache,
+        gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
+        _: &mut Vec<DeferredResolve>,
     ) {
         let task = &render_tasks[task_id];
 
         match task.clear_mode {
             ClearMode::Zero => {
                 self.zero_clears.push(task_id);
             }
             ClearMode::One => {}
             ClearMode::Transparent => {
                 panic!("bug: invalid clear mode for alpha task");
             }
         }
 
         match task.kind {
-            RenderTaskKind::Readback(..) => {
-                panic!("Should not be added to alpha target!");
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Blit(..) => {
+                panic!("BUG: should not be added to alpha target!");
             }
             RenderTaskKind::VerticalBlur(ref info) => {
                 info.add_instances(
                     &mut self.vertical_blurs,
                     task_id,
                     task.children[0],
                     BlurDirection::Vertical,
                     render_tasks,
@@ -538,25 +612,27 @@ impl RenderTarget for AlphaRenderTarget 
     fn needs_depth(&self) -> bool {
         false
     }
 }
 
 #[cfg_attr(feature = "capture", derive(Deserialize, Serialize))]
 pub struct TextureCacheRenderTarget {
     pub horizontal_blurs: Vec<BlurInstance>,
+    pub blits: Vec<BlitJob>,
 }
 
 impl TextureCacheRenderTarget {
     fn new(
         _size: Option<DeviceUintSize>,
         _screen_size: DeviceIntSize,
     ) -> Self {
         TextureCacheRenderTarget {
             horizontal_blurs: Vec::new(),
+            blits: Vec::new(),
         }
     }
 
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         render_tasks: &RenderTaskTree,
     ) {
@@ -567,16 +643,34 @@ impl TextureCacheRenderTarget {
                 info.add_instances(
                     &mut self.horizontal_blurs,
                     task_id,
                     task.children[0],
                     BlurDirection::Horizontal,
                     render_tasks,
                 );
             }
+            RenderTaskKind::Blit(ref task_info) => {
+                match task_info.source {
+                    BlitSource::Image { .. } => {
+                        // reading/writing from the texture cache at the same time
+                        // is undefined behavior.
+                        panic!("bug: a single blit cannot be to/from texture cache");
+                    }
+                    BlitSource::RenderTask { task_id } => {
+                        // Add a blit job to copy from an existing render
+                        // task to this target.
+                        let (target_rect, _) = task.get_target_rect();
+                        self.blits.push(BlitJob {
+                            source: BlitJobSource::RenderTask(task_id),
+                            target_rect,
+                        });
+                    }
+                }
+            }
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Picture(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::Scaling(..) => {
                 panic!("BUG: unexpected task kind for texture cache target");
             }
         }
@@ -653,17 +747,24 @@ impl RenderPass {
     ) {
         profile_scope!("RenderPass::build");
 
         match self.kind {
             RenderPassKind::MainFramebuffer(ref mut target) => {
                 for &task_id in &self.tasks {
                     assert_eq!(render_tasks[task_id].target_kind(), RenderTargetKind::Color);
                     render_tasks[task_id].pass_index = Some(pass_index);
-                    target.add_task(task_id, ctx, gpu_cache, render_tasks, clip_store);
+                    target.add_task(
+                        task_id,
+                        ctx,
+                        gpu_cache,
+                        render_tasks,
+                        clip_store,
+                        deferred_resolves,
+                    );
                 }
                 target.build(ctx, gpu_cache, render_tasks, deferred_resolves);
             }
             RenderPassKind::OffScreen { ref mut color, ref mut alpha, ref mut texture_cache } => {
                 // Step through each task, adding to batches as appropriate.
                 for &task_id in &self.tasks {
                     let (target_kind, texture_target) = {
                         let task = &mut render_tasks[task_id];
@@ -701,18 +802,32 @@ impl RenderPass {
                                 .entry(texture_target)
                                 .or_insert(
                                     TextureCacheRenderTarget::new(None, DeviceIntSize::zero())
                                 );
                             texture.add_task(task_id, render_tasks);
                         }
                         None => {
                             match target_kind {
-                                RenderTargetKind::Color => color.add_task(task_id, ctx, gpu_cache, render_tasks, clip_store),
-                                RenderTargetKind::Alpha => alpha.add_task(task_id, ctx, gpu_cache, render_tasks, clip_store),
+                                RenderTargetKind::Color => color.add_task(
+                                    task_id,
+                                    ctx,
+                                    gpu_cache,
+                                    render_tasks,
+                                    clip_store,
+                                    deferred_resolves,
+                                ),
+                                RenderTargetKind::Alpha => alpha.add_task(
+                                    task_id,
+                                    ctx,
+                                    gpu_cache,
+                                    render_tasks,
+                                    clip_store,
+                                    deferred_resolves,
+                                ),
                             }
                         }
                     }
                 }
 
                 color.build(ctx, gpu_cache, render_tasks, deferred_resolves);
                 alpha.build(ctx, gpu_cache, render_tasks, deferred_resolves);
             }
@@ -756,24 +871,22 @@ pub struct Frame {
     pub profile_counters: FrameProfileCounters,
 
     pub node_data: Vec<ClipScrollNodeData>,
     pub clip_chain_local_clip_rects: Vec<LayerRect>,
     pub render_tasks: RenderTaskTree,
 
     // List of updates that need to be pushed to the
     // gpu resource cache.
-    #[cfg_attr(feature = "capture", serde(skip))]
     pub gpu_cache_updates: Option<GpuCacheUpdateList>,
 
     // List of textures that we don't know about yet
     // from the backend thread. The render thread
     // will use a callback to resolve these and
     // patch the data structures.
-    #[cfg_attr(feature = "capture", serde(skip))]
     pub deferred_resolves: Vec<DeferredResolve>,
 
     // True if this frame contains any render tasks
     // that write to the texture cache.
     pub has_texture_cache_tasks: bool,
 
     // True if this frame has been drawn by the
     // renderer.
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -8,16 +8,17 @@ use {FontInstancePlatformOptions, FontKe
 use {ImageDescriptor, ImageKey, ItemTag, LayoutPoint, LayoutSize, LayoutTransform, LayoutVector2D};
 use {NativeFontHandle, WorldPoint};
 use app_units::Au;
 use channel::{self, MsgSender, Payload, PayloadSender, PayloadSenderHelperMethods};
 use std::cell::Cell;
 use std::fmt;
 use std::marker::PhantomData;
 use std::path::PathBuf;
+use std::u32;
 
 pub type TileSize = u16;
 /// Documents are rendered in the ascending order of their associated layer values.
 pub type DocumentLayer = i8;
 
 /// The resource updates for a given transaction (they must be applied in the same frame).
 #[derive(Clone, Deserialize, Serialize)]
 pub struct ResourceUpdates {
@@ -425,16 +426,17 @@ bitflags!{
 }
 
 /// Information about a loaded capture of each document
 /// that is returned by `RenderBackend`.
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct CapturedDocument {
     pub document_id: DocumentId,
     pub root_pipeline_id: Option<PipelineId>,
+    pub window_size: DeviceUintSize,
 }
 
 #[derive(Clone, Deserialize, Serialize)]
 pub enum DebugCommand {
     /// Display the frame profiler on screen.
     EnableProfiler(bool),
     /// Display all texture cache pages on screen.
     EnableTextureCacheDebug(bool),
@@ -515,16 +517,22 @@ impl fmt::Debug for ApiMsg {
         })
     }
 }
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
 pub struct Epoch(pub u32);
 
+impl Epoch {
+    pub fn invalid() -> Epoch {
+        Epoch(u32::MAX)
+    }
+}
+
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Deserialize, Serialize)]
 pub struct IdNamespace(pub u32);
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub struct DocumentId(pub IdNamespace, pub u32);
 
--- a/gfx/webrender_api/src/color.rs
+++ b/gfx/webrender_api/src/color.rs
@@ -42,22 +42,22 @@ pub struct ColorF {
     pub r: f32,
     pub g: f32,
     pub b: f32,
     pub a: f32,
 }
 
 impl ColorF {
     /// Constructs a new `ColorF` from its components.
-    pub fn new(r: f32, g: f32, b: f32, a: f32) -> ColorF {
+    pub fn new(r: f32, g: f32, b: f32, a: f32) -> Self {
         ColorF { r, g, b, a }
     }
 
     /// Multiply the RGB channels (but not alpha) with a given factor.
-    pub fn scale_rgb(&self, scale: f32) -> ColorF {
+    pub fn scale_rgb(&self, scale: f32) -> Self {
         ColorF {
             r: self.r * scale,
             g: self.g * scale,
             b: self.b * scale,
             a: self.a,
         }
     }
 
--- a/gfx/webrender_api/src/image.rs
+++ b/gfx/webrender_api/src/image.rs
@@ -8,40 +8,43 @@ use IdNamespace;
 use font::{FontInstanceKey, FontKey, FontTemplate};
 use std::sync::Arc;
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub struct ImageKey(pub IdNamespace, pub u32);
 
 impl ImageKey {
-    pub fn new(namespace: IdNamespace, key: u32) -> ImageKey {
+    pub const DUMMY: Self = ImageKey(IdNamespace(0), 0);
+
+    pub fn new(namespace: IdNamespace, key: u32) -> Self {
         ImageKey(namespace, key)
     }
-
-    pub fn dummy() -> ImageKey {
-        ImageKey(IdNamespace(0), 0)
-    }
 }
 
 /// An arbitrary identifier for an external image provided by the
 /// application. It must be a unique identifier for each external
 /// image.
 #[repr(C)]
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
 pub struct ExternalImageId(pub u64);
 
+#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
+pub enum TextureTarget {
+    Default = 0,
+    Array = 1,
+    Rect = 2,
+    External = 3,
+}
+
 #[repr(u32)]
 #[derive(Debug, Copy, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub enum ExternalImageType {
-    Texture2DHandle,       // gl TEXTURE_2D handle
-    Texture2DArrayHandle,  // gl TEXTURE_2D_ARRAY handle
-    TextureRectHandle,     // gl TEXTURE_RECT handle
-    TextureExternalHandle, // gl TEXTURE_EXTERNAL handle
-    ExternalBuffer,
+    TextureHandle(TextureTarget),
+    Buffer,
 }
 
 #[repr(C)]
 #[derive(Debug, Copy, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub struct ExternalImageData {
     pub id: ExternalImageId,
     pub channel_index: u8,
     pub image_type: ExternalImageType,
@@ -88,58 +91,59 @@ impl ImageDescriptor {
             is_opaque,
         }
     }
 
     pub fn compute_stride(&self) -> u32 {
         self.stride
             .unwrap_or(self.width * self.format.bytes_per_pixel())
     }
+
+    pub fn compute_total_size(&self) -> u32 {
+        self.compute_stride() * self.height
+    }
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum ImageData {
     Raw(Arc<Vec<u8>>),
     Blob(BlobImageData),
     External(ExternalImageData),
 }
 
 impl ImageData {
-    pub fn new(bytes: Vec<u8>) -> ImageData {
+    pub fn new(bytes: Vec<u8>) -> Self {
         ImageData::Raw(Arc::new(bytes))
     }
 
-    pub fn new_shared(bytes: Arc<Vec<u8>>) -> ImageData {
+    pub fn new_shared(bytes: Arc<Vec<u8>>) -> Self {
         ImageData::Raw(bytes)
     }
 
-    pub fn new_blob_image(commands: Vec<u8>) -> ImageData {
+    pub fn new_blob_image(commands: Vec<u8>) -> Self {
         ImageData::Blob(commands)
     }
 
     #[inline]
     pub fn is_blob(&self) -> bool {
         match self {
             &ImageData::Blob(_) => true,
             _ => false,
         }
     }
 
     #[inline]
     pub fn uses_texture_cache(&self) -> bool {
-        match self {
-            &ImageData::External(ext_data) => match ext_data.image_type {
-                ExternalImageType::Texture2DHandle => false,
-                ExternalImageType::Texture2DArrayHandle => false,
-                ExternalImageType::TextureRectHandle => false,
-                ExternalImageType::TextureExternalHandle => false,
-                ExternalImageType::ExternalBuffer => true,
+        match *self {
+            ImageData::External(ref ext_data) => match ext_data.image_type {
+                ExternalImageType::TextureHandle(_) => false,
+                ExternalImageType::Buffer => true,
             },
-            &ImageData::Blob(_) => true,
-            &ImageData::Raw(_) => true,
+            ImageData::Blob(_) => true,
+            ImageData::Raw(_) => true,
         }
     }
 }
 
 pub trait BlobImageResources {
     fn get_font_data(&self, key: FontKey) -> &FontTemplate;
     fn get_image(&self, key: ImageKey) -> Option<(&ImageData, &ImageDescriptor)>;
 }
--- a/gfx/webrender_api/src/units.rs
+++ b/gfx/webrender_api/src/units.rs
@@ -109,8 +109,35 @@ pub type LayerSizeAu = TypedSize2D<Au, L
 
 pub fn as_scroll_parent_rect(rect: &LayerRect) -> ScrollLayerRect {
     ScrollLayerRect::from_untyped(&rect.to_untyped())
 }
 
 pub fn as_scroll_parent_vector(vector: &LayerVector2D) -> ScrollLayerVector2D {
     ScrollLayerVector2D::from_untyped(&vector.to_untyped())
 }
+
+/// Stores two coordinates in texel space. The coordinates
+/// are stored in texel coordinates because the texture atlas
+/// may grow. Storing them as texel coords and normalizing
+/// the UVs in the vertex shader means nothing needs to be
+/// updated on the CPU when the texture size changes.
+#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
+pub struct TexelRect {
+    pub uv0: DevicePoint,
+    pub uv1: DevicePoint,
+}
+
+impl TexelRect {
+    pub fn new(u0: f32, v0: f32, u1: f32, v1: f32) -> Self {
+        TexelRect {
+            uv0: DevicePoint::new(u0, v0),
+            uv1: DevicePoint::new(u1, v1),
+        }
+    }
+
+    pub fn invalid() -> Self {
+        TexelRect {
+            uv0: DevicePoint::new(-1.0, -1.0),
+            uv1: DevicePoint::new(-1.0, -1.0),
+        }
+    }
+}
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -4,17 +4,17 @@ version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
 rayon = "0.8"
 thread_profiler = "0.1.1"
 euclid = "0.16"
 app_units = "0.6"
-gleam = "0.4.19"
+gleam = "0.4.20"
 log = "0.3"
 
 [dependencies.webrender]
 path = "../webrender"
 version = "0.56.1"
 default-features = false
 
 [target.'cfg(target_os = "windows")'.dependencies]